blob: f035f2325b4ec397d4afaaf6055d65d4da09e75b [file] [log] [blame]
// Copyright 2010 Google Inc. All Rights Reserved.
// Maintainer: mec@google.com (Michael Chastain)
//
// Convert strings to numbers or numbers to strings.
#ifndef STRINGS_NUMBERS_H_
#define STRINGS_NUMBERS_H_
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <functional>
using std::binary_function;
using std::less;
#include <limits>
using std::numeric_limits;
#include <string>
using std::string;
#include <vector>
using std::vector;
#include "gutil/int128.h"
#include "gutil/integral_types.h"
#include "gutil/macros.h"
#include "gutil/port.h"
#include "gutil/stringprintf.h"
// START DOXYGEN NumbersFunctions grouping
/* @defgroup NumbersFunctions
* @{ */
// Convert a fingerprint to 16 hex digits.
string FpToString(Fprint fp);
// Formats a uint128 as a 32-digit hex string.
string Uint128ToHexString(uint128 ui128);
// Convert strings to numeric values, with strict error checking.
// Leading and trailing spaces are allowed.
// Negative inputs are not allowed for unsigned ints (unlike strtoul).
// Numbers must be in base 10; see the _base variants below for other bases.
// Returns false on errors (including overflow/underflow).
bool safe_strto32(const char* str, int32* value);
bool safe_strto64(const char* str, int64* value);
bool safe_strtou32(const char* str, uint32* value);
bool safe_strtou64(const char* str, uint64* value);
// Convert strings to floating point values.
// Leading and trailing spaces are allowed.
// Values may be rounded on over- and underflow.
bool safe_strtof(const char* str, float* value);
bool safe_strtod(const char* str, double* value);
bool safe_strto32(const string& str, int32* value);
bool safe_strto64(const string& str, int64* value);
bool safe_strtou32(const string& str, uint32* value);
bool safe_strtou64(const string& str, uint64* value);
bool safe_strtof(const string& str, float* value);
bool safe_strtod(const string& str, double* value);
// Parses buffer_size many characters from startptr into value.
bool safe_strto32(const char* startptr, int buffer_size, int32* value);
bool safe_strto64(const char* startptr, int buffer_size, int64* value);
// Parses with a fixed base between 2 and 36. For base 16, leading "0x" is ok.
// If base is set to 0, its value is inferred from the beginning of str:
// "0x" means base 16, "0" means base 8, otherwise base 10 is used.
bool safe_strto32_base(const char* str, int32* value, int base);
bool safe_strto64_base(const char* str, int64* value, int base);
bool safe_strtou32_base(const char* str, uint32* value, int base);
bool safe_strtou64_base(const char* str, uint64* value, int base);
bool safe_strto32_base(const string& str, int32* value, int base);
bool safe_strto64_base(const string& str, int64* value, int base);
bool safe_strtou32_base(const string& str, uint32* value, int base);
bool safe_strtou64_base(const string& str, uint64* value, int base);
bool safe_strto32_base(const char* startptr, int buffer_size,
int32* value, int base);
bool safe_strto64_base(const char* startptr, int buffer_size,
int64* value, int base);
// u64tostr_base36()
// The inverse of safe_strtou64_base, converts the number agument to
// a string representation in base-36.
// Conversion fails if buffer is too small to to hold the string and
// terminating NUL.
// Returns number of bytes written, not including terminating NUL.
// Return value 0 indicates error.
size_t u64tostr_base36(uint64 number, size_t buf_size, char* buffer);
// Similar to atoi(s), except s could be like "16k", "32M", "2G", "4t".
uint64 atoi_kmgt(const char* s);
inline uint64 atoi_kmgt(const string& s) { return atoi_kmgt(s.c_str()); }
// ----------------------------------------------------------------------
// FastIntToBuffer()
// FastHexToBuffer()
// FastHex64ToBuffer()
// FastHex32ToBuffer()
// FastTimeToBuffer()
// These are intended for speed. FastIntToBuffer() assumes the
// integer is non-negative. FastHexToBuffer() puts output in
// hex rather than decimal. FastTimeToBuffer() puts the output
// into RFC822 format.
//
// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
// padded to exactly 16 bytes (plus one byte for '\0')
//
// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
// padded to exactly 8 bytes (plus one byte for '\0')
//
// All functions take the output buffer as an arg. FastInt() uses
// at most 22 bytes, FastTime() uses exactly 30 bytes. They all
// return a pointer to the beginning of the output, which for
// FastHex() may not be the beginning of the input buffer. (For
// all others, we guarantee that it is.)
//
// NOTE: In 64-bit land, sizeof(time_t) is 8, so it is possible
// to pass to FastTimeToBuffer() a time whose year cannot be
// represented in 4 digits. In this case, the output buffer
// will contain the string "Invalid:<value>"
// ----------------------------------------------------------------------
// Previously documented minimums -- the buffers provided must be at least this
// long, though these numbers are subject to change:
// Int32, UInt32: 12 bytes
// Int64, UInt64, Hex: 22 bytes
// Time: 30 bytes
// Hex32: 9 bytes
// Hex64: 17 bytes
// Use kFastToBufferSize rather than hardcoding constants.
static const int kFastToBufferSize = 32;
char* FastInt32ToBuffer(int32 i, char* buffer);
char* FastInt64ToBuffer(int64 i, char* buffer);
char* FastUInt32ToBuffer(uint32 i, char* buffer);
char* FastUInt64ToBuffer(uint64 i, char* buffer);
char* FastHexToBuffer(int i, char* buffer) MUST_USE_RESULT;
char* FastTimeToBuffer(time_t t, char* buffer);
char* FastHex64ToBuffer(uint64 i, char* buffer);
char* FastHex32ToBuffer(uint32 i, char* buffer);
// at least 22 bytes long
inline char* FastIntToBuffer(int i, char* buffer) {
return (sizeof(i) == 4 ?
FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer));
}
inline char* FastUIntToBuffer(unsigned int i, char* buffer) {
return (sizeof(i) == 4 ?
FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer));
}
// ----------------------------------------------------------------------
// FastInt32ToBufferLeft()
// FastUInt32ToBufferLeft()
// FastInt64ToBufferLeft()
// FastUInt64ToBufferLeft()
//
// Like the Fast*ToBuffer() functions above, these are intended for speed.
// Unlike the Fast*ToBuffer() functions, however, these functions write
// their output to the beginning of the buffer (hence the name, as the
// output is left-aligned). The caller is responsible for ensuring that
// the buffer has enough space to hold the output.
//
// Returns a pointer to the end of the string (i.e. the null character
// terminating the string).
// ----------------------------------------------------------------------
char* FastInt32ToBufferLeft(int32 i, char* buffer); // at least 12 bytes
char* FastUInt32ToBufferLeft(uint32 i, char* buffer); // at least 12 bytes
char* FastInt64ToBufferLeft(int64 i, char* buffer); // at least 22 bytes
char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // at least 22 bytes
// Just define these in terms of the above.
inline char* FastUInt32ToBuffer(uint32 i, char* buffer) {
FastUInt32ToBufferLeft(i, buffer);
return buffer;
}
inline char* FastUInt64ToBuffer(uint64 i, char* buffer) {
FastUInt64ToBufferLeft(i, buffer);
return buffer;
}
// ----------------------------------------------------------------------
// HexDigitsPrefix()
// returns 1 if buf is prefixed by "num_digits" of hex digits
// returns 0 otherwise.
// The function checks for '\0' for string termination.
// ----------------------------------------------------------------------
int HexDigitsPrefix(const char* buf, int num_digits);
// ----------------------------------------------------------------------
// ConsumeStrayLeadingZeroes
// Eliminates all leading zeroes (unless the string itself is composed
// of nothing but zeroes, in which case one is kept: 0...0 becomes 0).
void ConsumeStrayLeadingZeroes(string* str);
// ----------------------------------------------------------------------
// ParseLeadingInt32Value
// A simple parser for int32 values. Returns the parsed value
// if a valid integer is found; else returns deflt. It does not
// check if str is entirely consumed.
// This cannot handle decimal numbers with leading 0s, since they will be
// treated as octal. If you know it's decimal, use ParseLeadingDec32Value.
// --------------------------------------------------------------------
int32 ParseLeadingInt32Value(const char* str, int32 deflt);
inline int32 ParseLeadingInt32Value(const string& str, int32 deflt) {
return ParseLeadingInt32Value(str.c_str(), deflt);
}
// ParseLeadingUInt32Value
// A simple parser for uint32 values. Returns the parsed value
// if a valid integer is found; else returns deflt. It does not
// check if str is entirely consumed.
// This cannot handle decimal numbers with leading 0s, since they will be
// treated as octal. If you know it's decimal, use ParseLeadingUDec32Value.
// --------------------------------------------------------------------
uint32 ParseLeadingUInt32Value(const char* str, uint32 deflt);
inline uint32 ParseLeadingUInt32Value(const string& str, uint32 deflt) {
return ParseLeadingUInt32Value(str.c_str(), deflt);
}
// ----------------------------------------------------------------------
// ParseLeadingDec32Value
// A simple parser for decimal int32 values. Returns the parsed value
// if a valid integer is found; else returns deflt. It does not
// check if str is entirely consumed.
// The string passed in is treated as *10 based*.
// This can handle strings with leading 0s.
// See also: ParseLeadingDec64Value
// --------------------------------------------------------------------
int32 ParseLeadingDec32Value(const char* str, int32 deflt);
inline int32 ParseLeadingDec32Value(const string& str, int32 deflt) {
return ParseLeadingDec32Value(str.c_str(), deflt);
}
// ParseLeadingUDec32Value
// A simple parser for decimal uint32 values. Returns the parsed value
// if a valid integer is found; else returns deflt. It does not
// check if str is entirely consumed.
// The string passed in is treated as *10 based*.
// This can handle strings with leading 0s.
// See also: ParseLeadingUDec64Value
// --------------------------------------------------------------------
uint32 ParseLeadingUDec32Value(const char* str, uint32 deflt);
inline uint32 ParseLeadingUDec32Value(const string& str, uint32 deflt) {
return ParseLeadingUDec32Value(str.c_str(), deflt);
}
// ----------------------------------------------------------------------
// ParseLeadingUInt64Value
// ParseLeadingInt64Value
// ParseLeadingHex64Value
// ParseLeadingDec64Value
// ParseLeadingUDec64Value
// A simple parser for long long values.
// Returns the parsed value if a
// valid integer is found; else returns deflt
// --------------------------------------------------------------------
uint64 ParseLeadingUInt64Value(const char* str, uint64 deflt);
inline uint64 ParseLeadingUInt64Value(const string& str, uint64 deflt) {
return ParseLeadingUInt64Value(str.c_str(), deflt);
}
int64 ParseLeadingInt64Value(const char* str, int64 deflt);
inline int64 ParseLeadingInt64Value(const string& str, int64 deflt) {
return ParseLeadingInt64Value(str.c_str(), deflt);
}
uint64 ParseLeadingHex64Value(const char* str, uint64 deflt);
inline uint64 ParseLeadingHex64Value(const string& str, uint64 deflt) {
return ParseLeadingHex64Value(str.c_str(), deflt);
}
int64 ParseLeadingDec64Value(const char* str, int64 deflt);
inline int64 ParseLeadingDec64Value(const string& str, int64 deflt) {
return ParseLeadingDec64Value(str.c_str(), deflt);
}
uint64 ParseLeadingUDec64Value(const char* str, uint64 deflt);
inline uint64 ParseLeadingUDec64Value(const string& str, uint64 deflt) {
return ParseLeadingUDec64Value(str.c_str(), deflt);
}
// ----------------------------------------------------------------------
// ParseLeadingDoubleValue
// A simple parser for double values. Returns the parsed value
// if a valid double is found; else returns deflt. It does not
// check if str is entirely consumed.
// --------------------------------------------------------------------
double ParseLeadingDoubleValue(const char* str, double deflt);
inline double ParseLeadingDoubleValue(const string& str, double deflt) {
return ParseLeadingDoubleValue(str.c_str(), deflt);
}
// ----------------------------------------------------------------------
// ParseLeadingBoolValue()
// A recognizer of boolean string values. Returns the parsed value
// if a valid value is found; else returns deflt. This skips leading
// whitespace, is case insensitive, and recognizes these forms:
// 0/1, false/true, no/yes, n/y
// --------------------------------------------------------------------
bool ParseLeadingBoolValue(const char* str, bool deflt);
inline bool ParseLeadingBoolValue(const string& str, bool deflt) {
return ParseLeadingBoolValue(str.c_str(), deflt);
}
// ----------------------------------------------------------------------
// AutoDigitStrCmp
// AutoDigitLessThan
// StrictAutoDigitLessThan
// autodigit_less
// autodigit_greater
// strict_autodigit_less
// strict_autodigit_greater
// These are like less<string> and greater<string>, except when a
// run of digits is encountered at corresponding points in the two
// arguments. Such digit strings are compared numerically instead
// of lexicographically. Therefore if you sort by
// "autodigit_less", some machine names might get sorted as:
// exaf1
// exaf2
// exaf10
// When using "strict" comparison (AutoDigitStrCmp with the strict flag
// set to true, or the strict version of the other functions),
// strings that represent equal numbers will not be considered equal if
// the string representations are not identical. That is, "01" < "1" in
// strict mode, but "01" == "1" otherwise.
// ----------------------------------------------------------------------
int AutoDigitStrCmp(const char* a, int alen,
const char* b, int blen,
bool strict);
bool AutoDigitLessThan(const char* a, int alen,
const char* b, int blen);
bool StrictAutoDigitLessThan(const char* a, int alen,
const char* b, int blen);
struct autodigit_less
: public binary_function<const string&, const string&, bool> {
bool operator()(const string& a, const string& b) const {
return AutoDigitLessThan(a.data(), a.size(), b.data(), b.size());
}
};
struct autodigit_greater
: public binary_function<const string&, const string&, bool> {
bool operator()(const string& a, const string& b) const {
return AutoDigitLessThan(b.data(), b.size(), a.data(), a.size());
}
};
struct strict_autodigit_less
: public binary_function<const string&, const string&, bool> {
bool operator()(const string& a, const string& b) const {
return StrictAutoDigitLessThan(a.data(), a.size(), b.data(), b.size());
}
};
struct strict_autodigit_greater
: public binary_function<const string&, const string&, bool> {
bool operator()(const string& a, const string& b) const {
return StrictAutoDigitLessThan(b.data(), b.size(), a.data(), a.size());
}
};
// ----------------------------------------------------------------------
// SimpleItoa()
// Description: converts an integer to a string.
// Faster than printf("%d").
//
// Return value: string
// ----------------------------------------------------------------------
inline string SimpleItoa(int32 i) {
char buf[16]; // Longest is -2147483648
return string(buf, FastInt32ToBufferLeft(i, buf));
}
// We need this overload because otherwise SimpleItoa(5U) wouldn't compile.
inline string SimpleItoa(uint32 i) {
char buf[16]; // Longest is 4294967295
return string(buf, FastUInt32ToBufferLeft(i, buf));
}
inline string SimpleItoa(int64 i) {
char buf[32]; // Longest is -9223372036854775808
return string(buf, FastInt64ToBufferLeft(i, buf));
}
// We need this overload because otherwise SimpleItoa(5ULL) wouldn't compile.
inline string SimpleItoa(uint64 i) {
char buf[32]; // Longest is 18446744073709551615
return string(buf, FastUInt64ToBufferLeft(i, buf));
}
// SimpleAtoi converts a string to an integer.
// Uses safe_strto?() for actual parsing, so strict checking is
// applied, which is to say, the string must be a base-10 integer, optionally
// followed or preceded by whitespace, and value has to be in the range of
// the corresponding integer type.
//
// Returns true if parsing was successful.
template <typename int_type>
bool MUST_USE_RESULT SimpleAtoi(const char* s, int_type* out) {
// Must be of integer type (not pointer type), with more than 16-bitwidth.
COMPILE_ASSERT(sizeof(*out) == 4 || sizeof(*out) == 8,
SimpleAtoiWorksWith32Or64BitInts);
if (std::numeric_limits<int_type>::is_signed) { // Signed
if (sizeof(*out) == 64 / 8) { // 64-bit
return safe_strto64(s, reinterpret_cast<int64*>(out));
} else { // 32-bit
return safe_strto32(s, reinterpret_cast<int32*>(out));
}
} else { // Unsigned
if (sizeof(*out) == 64 / 8) { // 64-bit
return safe_strtou64(s, reinterpret_cast<uint64*>(out));
} else { // 32-bit
return safe_strtou32(s, reinterpret_cast<uint32*>(out));
}
}
}
template <typename int_type>
bool MUST_USE_RESULT SimpleAtoi(const string& s, int_type* out) {
return SimpleAtoi(s.c_str(), out);
}
// ----------------------------------------------------------------------
// SimpleDtoa()
// SimpleFtoa()
// DoubleToBuffer()
// FloatToBuffer()
// Description: converts a double or float to a string which, if
// passed to strtod(), will produce the exact same original double
// (except in case of NaN; all NaNs are considered the same value).
// We try to keep the string short but it's not guaranteed to be as
// short as possible.
//
// DoubleToBuffer() and FloatToBuffer() write the text to the given
// buffer and return it. The buffer must be at least
// kDoubleToBufferSize bytes for doubles and kFloatToBufferSize
// bytes for floats. kFastToBufferSize is also guaranteed to be large
// enough to hold either.
//
// Return value: string
// ----------------------------------------------------------------------
string SimpleDtoa(double value);
string SimpleFtoa(float value);
char* DoubleToBuffer(double i, char* buffer);
char* FloatToBuffer(float i, char* buffer);
// In practice, doubles should never need more than 24 bytes and floats
// should never need more than 14 (including null terminators), but we
// overestimate to be safe.
static const int kDoubleToBufferSize = 32;
static const int kFloatToBufferSize = 24;
// ----------------------------------------------------------------------
// SimpleItoaWithCommas()
// Description: converts an integer to a string.
// Puts commas every 3 spaces.
// Faster than printf("%d")?
//
// Return value: string
// ----------------------------------------------------------------------
string SimpleItoaWithCommas(int32 i);
string SimpleItoaWithCommas(uint32 i);
string SimpleItoaWithCommas(int64 i);
string SimpleItoaWithCommas(uint64 i);
// ----------------------------------------------------------------------
// ItoaKMGT()
// Description: converts an integer to a string
// Truncates values to K, G, M or T as appropriate
// Opposite of atoi_kmgt()
// e.g. 3000 -> 2K 57185920 -> 45M
//
// Return value: string
// ----------------------------------------------------------------------
string ItoaKMGT(int64 i);
// ----------------------------------------------------------------------
// ParseDoubleRange()
// Parse an expression in 'text' of the form: <double><sep><double>
// where <double> may be a double-precision number and <sep> is a
// single char or "..", and must be one of the chars in parameter
// 'separators', which may contain '-' or '.' (which means "..") or
// any chars not allowed in a double. If allow_unbounded_markers,
// <double> may also be a '?' to indicate unboundedness (if on the
// left of <sep>, means unbounded below; if on the right, means
// unbounded above). Depending on num_required_bounds, which may be
// 0, 1, or 2, <double> may also be the empty string, indicating
// unboundedness. If require_separator is false, then a single
// <double> is acceptable and is parsed as a range bounded from
// below. We also check that the character following the range must
// be in acceptable_terminators. If null_terminator_ok, then it is
// also OK if the range ends in \0 or after len chars. If
// allow_currency is true, the first <double> may be optionally
// preceded by a '$', in which case *is_currency will be true, and
// the second <double> may similarly be preceded by a '$'. In these
// cases, the '$' will be ignored (otherwise it's an error). If
// allow_comparators is true, the expression in 'text' may also be
// of the form <comparator><double>, where <comparator> is '<' or
// '>' or '<=' or '>='. separators and require_separator are
// ignored in this format, but all other parameters function as for
// the first format. Return true if the expression parsed
// successfully; false otherwise. If successful, output params are:
// 'end', which points to the char just beyond the expression;
// 'from' and 'to' are set to the values of the <double>s, and are
// -inf and inf (or unchanged, depending on dont_modify_unbounded)
// if unbounded. Output params are undefined if false is
// returned. len is the input length, or -1 if text is
// '\0'-terminated, which is more efficient.
// ----------------------------------------------------------------------
struct DoubleRangeOptions {
const char* separators;
bool require_separator;
const char* acceptable_terminators;
bool null_terminator_ok;
bool allow_unbounded_markers;
uint32 num_required_bounds;
bool dont_modify_unbounded;
bool allow_currency;
bool allow_comparators;
};
// NOTE: The instruction below creates a Module titled
// NumbersFunctions within the auto-generated Doxygen documentation.
// This instruction is needed to expose global functions that are not
// within a namespace.
//
bool ParseDoubleRange(const char* text, int len, const char** end,
double* from, double* to, bool* is_currency,
const DoubleRangeOptions& opts);
// END DOXYGEN SplitFunctions grouping
/* @} */
// These functions are deprecated.
// Do not use in new code.
// // DEPRECATED(wadetregaskis). Just call StringPrintf or SimpleFtoa.
// string FloatToString(float f, const char* format);
// // DEPRECATED(wadetregaskis). Just call StringPrintf or SimpleItoa.
// string IntToString(int i, const char* format);
// // DEPRECATED(wadetregaskis). Just call StringPrintf or SimpleItoa.
// string Int64ToString(int64 i64, const char* format);
// // DEPRECATED(wadetregaskis). Just call StringPrintf or SimpleItoa.
// string UInt64ToString(uint64 ui64, const char* format);
// // DEPRECATED(wadetregaskis). Just call StringPrintf.
// inline string FloatToString(float f) {
// return StringPrintf("%7f", f);
// }
// // DEPRECATED(wadetregaskis). Just call StringPrintf.
// inline string IntToString(int i) {
// return StringPrintf("%7d", i);
// }
// // DEPRECATED(wadetregaskis). Just call StringPrintf.
// inline string Int64ToString(int64 i64) {
// return StringPrintf("%7" PRId64, i64);
// }
// // DEPRECATED(wadetregaskis). Just call StringPrintf.
// inline string UInt64ToString(uint64 ui64) {
// return StringPrintf("%7" PRIu64, ui64);
// }
#endif // STRINGS_NUMBERS_H_