src/kudu/gutil/strings/util.h - kudu - Git at Google

 //
 // Copyright 1999-2006 and onwards Google, Inc.
 //
 // Useful string functions and so forth.  This is a grab-bag file.
 //
 // You might also want to look at memutil.h, which holds mem*()
 // equivalents of a lot of the str*() functions in string.h,
 // eg memstr, mempbrk, etc.
 //
 // These functions work fine for UTF-8 strings as long as you can
 // consider them to be just byte strings.  For example, due to the
 // design of UTF-8 you do not need to worry about accidental matches,
 // as long as all your inputs are valid UTF-8 (use \uHHHH, not \xHH or \oOOO).
 //
 // Caveats:
 // * all the lengths in these routines refer to byte counts,
 //   not character counts.
 // * case-insensitivity in these routines assumes that all the letters
 //   in question are in the range A-Z or a-z.
 //
 // If you need Unicode specific processing (for example being aware of
 // Unicode character boundaries, or knowledge of Unicode casing rules,
 // or various forms of equivalence and normalization), take a look at
 // files in i18n/utf8.

 #ifndef STRINGS_UTIL_H_
 #define STRINGS_UTIL_H_

 #include <strings.h>

 #include <cstddef>
 #include <cstdio>
 #include <cstring>
 #include <functional>
 #include <string>
 #include <vector>

 #include "kudu/gutil/integral_types.h"
 #include "kudu/gutil/port.h"
 #include "kudu/gutil/strings/stringpiece.h"

 // Newer functions.

 namespace strings {

 // Finds the next end-of-line sequence.
 // An end-of-line sequence is one of:
 //   \n    common on unix, including mac os x
 //   \r    common on macos 9 and before
 //   \r\n  common on windows
 //
 // Returns a StringPiece that contains the end-of-line sequence (a pointer into
 // the input, 1 or 2 characters long).
 //
 // If the input does not contain an end-of-line sequence, returns an empty
 // StringPiece located at the end of the input:
 //    StringPiece(sp.data() + sp.length(), 0).

 StringPiece FindEol(StringPiece sp);

 }  // namespace strings

 // Older functions.

 // Duplicates a non-null, non-empty char* string. Returns a pointer to the new
 // string, or NULL if the input is null or empty.
 inline char* strdup_nonempty(const char* src) {
   if (src && src[0]) return strdup(src);
   return NULL;
 }

 // Finds the first occurrence of a character in at most a given number of bytes
 // of a char* string. Returns a pointer to the first occurrence, or NULL if no
 // occurrence found in the first sz bytes.
 // Never searches past the first null character in the string; therefore, only
 // suitable for null-terminated strings.
 // WARNING: Removes const-ness of string argument!
 inline char* strnchr(const char* buf, char c, int sz) {
   const char* end = buf + sz;
   while (buf != end && *buf) {
     if (*buf == c)
       return const_cast<char*>(buf);
     ++buf;
   }
   return NULL;
 }

 // Finds the first occurrence of the null-terminated needle in at most the first
 // haystack_len bytes of haystack. Returns NULL if needle is not found. Returns
 // haystack if needle is empty.
 // WARNING: Removes const-ness of string argument!
 char* strnstr(const char* haystack, const char* needle, size_t haystack_len);

 // Matches a prefix (which must be a char* literal!) against the beginning of
 // str. Returns a pointer past the prefix, or NULL if the prefix wasn't matched.
 // (Like the standard strcasecmp(), but for efficiency doesn't call strlen() on
 // prefix, and returns a pointer rather than an int.)
 //
 // The ""'s catch people who don't pass in a literal for "prefix"
 #ifndef strprefix
 #define strprefix(str, prefix) \
   (strncmp(str, prefix, sizeof("" prefix "")-1) == 0 ? \
       str + sizeof(prefix)-1 :                         \
       NULL)
 #endif

 // Same as strprefix() (immediately above), but matches a case-insensitive
 // prefix.
 #ifndef strcaseprefix
 #define strcaseprefix(str, prefix) \
   (strncasecmp(str, prefix, sizeof("" prefix "")-1) == 0 ? \
       str + sizeof(prefix)-1 :                             \
       NULL)
 #endif

 // Matches a prefix (up to the first needle_size bytes of needle) in the first
 // haystack_size byte of haystack. Returns a pointer past the prefix, or NULL if
 // the prefix wasn't matched. (Unlike strprefix(), prefix doesn't need to be a
 // char* literal. Like the standard strncmp(), but also takes a haystack_size,
 // and returns a pointer rather than an int.)
 //
 // Always returns either NULL or haystack + needle_size.
 //
 // Some windows header sometimes #defines strnprefix to something we
 // don't want.
 #ifdef strnprefix
 #undef strnprefix
 #endif
 const char* strnprefix(const char* haystack, int haystack_size,
                        const char* needle, int needle_size);

 // Matches a case-insensitive prefix (up to the first needle_size bytes of
 // needle) in the first haystack_size byte of haystack. Returns a pointer past
 // the prefix, or NULL if the prefix wasn't matched.
 //
 // Always returns either NULL or haystack + needle_size.
 const char* strncaseprefix(const char* haystack, int haystack_size,
                            const char* needle, int needle_size);

 // Matches a prefix; returns a pointer past the prefix, or NULL if not found.
 // (Like strprefix() and strcaseprefix() but not restricted to searching for
 // char* literals). Templated so searching a const char* returns a const char*,
 // and searching a non-const char* returns a non-const char*.
 template<class CharStar>
 inline CharStar var_strprefix(CharStar str, const char* prefix) {
   const int len = strlen(prefix);
   return strncmp(str, prefix, len) == 0 ?  str + len : NULL;
 }

 // Same as var_strprefix() (immediately above), but matches a case-insensitive
 // prefix.
 template<class CharStar>
 inline CharStar var_strcaseprefix(CharStar str, const char* prefix) {
   const int len = strlen(prefix);
   return strncasecmp(str, prefix, len) == 0 ?  str + len : NULL;
 }

 // Returns input, or "(null)" if NULL. (Useful for logging.)
 inline const char* GetPrintableString(const char* const in) {
   return NULL == in ? "(null)" : in;
 }

 // Returns whether str begins with prefix.
 inline bool HasPrefixString(const StringPiece& str,
                             const StringPiece& prefix) {
   return str.starts_with(prefix);
 }

 // Returns whether str ends with suffix.
 inline bool HasSuffixString(const StringPiece& str,
                             const StringPiece& suffix) {
   return str.ends_with(suffix);
 }

 // Returns true if the string passed in matches the pattern. The pattern
 // string can contain wildcards like * and ?
 // The backslash character (\) is an escape character for * and ?
 // We limit the patterns to having a max of 16 * or ? characters.
 // ? matches 0 or 1 character, while * matches 0 or more characters.
 bool MatchPattern(const StringPiece& str,
                   const StringPiece& pattern);

 // Returns where suffix begins in str, or NULL if str doesn't end with suffix.
 inline char* strsuffix(char* str, const char* suffix) {
   const int lenstr = strlen(str);
   const int lensuffix = strlen(suffix);
   char* strbeginningoftheend = str + lenstr - lensuffix;

   if (lenstr >= lensuffix && 0 == strcmp(strbeginningoftheend, suffix)) {
     return (strbeginningoftheend);
   } else {
     return (NULL);
   }
 }
 inline const char* strsuffix(const char* str, const char* suffix) {
   return const_cast<const char*>(strsuffix(const_cast<char*>(str), suffix));
 }

 // Same as strsuffix() (immediately above), but matches a case-insensitive
 // suffix.
 char* strcasesuffix(char* str, const char* suffix);
 inline const char* strcasesuffix(const char* str, const char* suffix) {
   return const_cast<const char*>(strcasesuffix(const_cast<char*>(str), suffix));
 }

 const char* strnsuffix(const char* haystack, int haystack_size,
                        const char* needle, int needle_size);
 const char* strncasesuffix(const char* haystack, int haystack_size,
                            const char* needle, int needle_size);

 // Returns the number of times a character occurs in a string for a null
 // terminated string.
 inline ptrdiff_t strcount(const char* buf, char c) {
   if (buf == NULL)
     return 0;
   ptrdiff_t num = 0;
   for (const char* bp = buf; *bp != '\0'; bp++) {
     if (*bp == c)
       num++;
   }
   return num;
 }
 // Returns the number of times a character occurs in a string for a string
 // defined by a pointer to the first character and a pointer just past the last
 // character.
 inline ptrdiff_t strcount(const char* buf_begin, const char* buf_end, char c) {
   if (buf_begin == NULL)
     return 0;
   if (buf_end <= buf_begin)
     return 0;
   ptrdiff_t num = 0;
   for (const char* bp = buf_begin; bp != buf_end; bp++) {
     if (*bp == c)
       num++;
   }
   return num;
 }
 // Returns the number of times a character occurs in a string for a string
 // defined by a pointer to the first char and a length:
 inline ptrdiff_t strcount(const char* buf, size_t len, char c) {
   return strcount(buf, buf + len, c);
 }
 // Returns the number of times a character occurs in a string for a C++ string:
 inline ptrdiff_t strcount(const std::string& buf, char c) {
   return strcount(buf.c_str(), buf.size(), c);
 }

 // Returns a pointer to the nth occurrence of a character in a null-terminated
 // string.
 // WARNING: Removes const-ness of string argument!
 char* strchrnth(const char* str, const char& c, int n);

 // Returns a pointer to the nth occurrence of a character in a null-terminated
 // string, or the last occurrence if occurs fewer than n times.
 // WARNING: Removes const-ness of string argument!
 char* AdjustedLastPos(const char* str, char separator, int n);

 // STL-compatible function objects for char* string keys:

 // Compares two char* strings for equality. (Works with NULL, which compares
 // equal only to another NULL). Useful in hash tables:
 //    hash_map<const char*, Value, hash<const char*>, streq> ht;
 struct streq : public std::binary_function<const char*, const char*, bool> {
   bool operator()(const char* s1, const char* s2) const {
     return ((s1 == 0 && s2 == 0) ||
             (s1 && s2 && *s1 == *s2 && strcmp(s1, s2) == 0));
   }
 };

 // Compares two char* strings. (Works with NULL, which compares greater than any
 // non-NULL). Useful in maps:
 //    map<const char*, Value, strlt> m;
 struct strlt : public std::binary_function<const char*, const char*, bool> {
   bool operator()(const char* s1, const char* s2) const {
     return (s1 != s2) && (s2 == 0 || (s1 != 0 && strcmp(s1, s2) < 0));
   }
 };

 // Returns whether str has only Ascii characters (as defined by ascii_isascii()
 // in strings/ascii_ctype.h).
 bool IsAscii(const char* str, int len);
 inline bool IsAscii(const StringPiece& str) {
   return IsAscii(str.data(), str.size());
 }

 // Returns the smallest lexicographically larger string of equal or smaller
 // length. Returns an empty string if there is no such successor (if the input
 // is empty or consists entirely of 0xff bytes).
 // Useful for calculating the smallest lexicographically larger string
 // that will not be prefixed by the input string.
 //
 // Examples:
 // "a" -> "b", "aaa" -> "aab", "aa\xff" -> "ab", "\xff" -> "", "" -> ""
 std::string PrefixSuccessor(const StringPiece& prefix);

 // Returns the immediate lexicographically-following string. This is useful to
 // turn an inclusive range into something that can be used with Bigtable's
 // SetLimitRow():
 //
 //     // Inclusive range [min_element, max_element].
 //     string min_element = ...;
 //     string max_element = ...;
 //
 //     // Equivalent range [range_start, range_end).
 //     string range_start = min_element;
 //     string range_end = ImmediateSuccessor(max_element);
 //
 // WARNING: Returns the input string with a '\0' appended; if you call c_str()
 // on the result, it will compare equal to s.
 //
 // WARNING: Transforms "" -> "\0"; this doesn't account for Bigtable's special
 // treatment of "" as infinity.
 std::string ImmediateSuccessor(const StringPiece& s);

 // Fills in *separator with a short string less than limit but greater than or
 // equal to start. If limit is greater than start, *separator is the common
 // prefix of start and limit, followed by the successor to the next character in
 // start. Examples:
 // FindShortestSeparator("foobar", "foxhunt", &sep) => sep == "fop"
 // FindShortestSeparator("abracadabra", "bacradabra", &sep) => sep == "b"
 // If limit is less than or equal to start, fills in *separator with start.
 void FindShortestSeparator(const StringPiece& start, const StringPiece& limit,
                            std::string* separator);

 // Copies at most n-1 bytes from src to dest, and returns dest. If n >=1, null
 // terminates dest; otherwise, returns dest unchanged. Unlike strncpy(), only
 // puts one null character at the end of dest.
 inline char* safestrncpy(char* dest, const char* src, size_t n) {
   if (n < 1) return dest;

   // Avoid using non-ANSI memccpy(), which is also deprecated in MSVC
   for (size_t i = 0; i < n; ++i) {
     if ((dest[i] = src[i]) == '\0')
       return dest;
   }

   dest[n-1] = '\0';
   return dest;
 }

 namespace strings {

 // BSD-style safe and consistent string copy functions.
 // Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
 // Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
 // long as |dst_size| is not 0.  Returns the length of |src| in characters.
 // If the return value is >= dst_size, then the output was truncated.
 // NOTE: All sizes are in number of characters, NOT in bytes.
 size_t strlcpy(char* dst, const char* src, size_t dst_size);

 } // namespace strings

 // Replaces the first occurrence (if replace_all is false) or all occurrences
 // (if replace_all is true) of oldsub in s with newsub. In the second version,
 // *res must be distinct from all the other arguments.
 std::string StringReplace(const StringPiece& s, const StringPiece& oldsub,
                      const StringPiece& newsub, bool replace_all);
 void StringReplace(const StringPiece& s, const StringPiece& oldsub,
                    const StringPiece& newsub, bool replace_all,
                    std::string* res);

 // Replaces all occurrences of substring in s with replacement. Returns the
 // number of instances replaced. s must be distinct from the other arguments.
 //
 // Less flexible, but faster, than RE::GlobalReplace().
 int GlobalReplaceSubstring(const StringPiece& substring,
                            const StringPiece& replacement,
                            std::string* s);

 // Removes v[i] for every element i in indices. Does *not* preserve the order of
 // v. indices must be sorted in strict increasing order (no duplicates). Runs in
 // O(indices.size()).
 void RemoveStrings(std::vector<std::string>* v, const std::vector<int>& indices);

 // Case-insensitive strstr(); use system strcasestr() instead.
 // WARNING: Removes const-ness of string argument!
 char* gstrcasestr(const char* haystack, const char* needle);

 // Finds (case insensitively) the first occurrence of (null terminated) needle
 // in at most the first len bytes of haystack. Returns a pointer into haystack,
 // or NULL if needle wasn't found.
 // WARNING: Removes const-ness of haystack!
 const char* gstrncasestr(const char* haystack, const char* needle, size_t len);
 char* gstrncasestr(char* haystack, const char* needle, size_t len);

 // Finds (case insensitively), in str (which is a list of tokens separated by
 // non_alpha), a token prefix and a token suffix. Returns a pointer into str of
 // the position of prefix, or NULL if not found.
 // WARNING: Removes const-ness of string argument!
 char* gstrncasestr_split(const char* str,
                          const char* prefix, char non_alpha,
                          const char* suffix,
                          size_t n);

 // Finds (case insensitively) needle in haystack, paying attention only to
 // alphanumerics in either string. Returns a pointer into haystack, or NULL if
 // not found.
 // Example: strcasestr_alnum("This is a longer test string", "IS-A-LONGER")
 // returns a pointer to "is a longer".
 // WARNING: Removes const-ness of string argument!
 char* strcasestr_alnum(const char* haystack, const char* needle);

 // Returns the number times substring appears in text.
 // Note: Runs in O(text.length() * substring.length()). Do *not* use on long
 // strings.
 int CountSubstring(StringPiece text, StringPiece substring);

 // Finds, in haystack (which is a list of tokens separated by delim), an token
 // equal to needle. Returns a pointer into haystack, or NULL if not found (or
 // either needle or haystack is empty).
 const char* strstr_delimited(const char* haystack,
                              const char* needle,
                              char delim);

 // Gets the next token from string *stringp, where tokens are strings separated
 // by characters from delim.
 char* gstrsep(char** stringp, const char* delim);

 // Appends StringPiece(data, len) to *s.
 void FastStringAppend(std::string* s, const char* data, int len);

 // Returns a duplicate of the_string, with memory allocated by new[].
 char* strdup_with_new(const char* the_string);

 // Returns a duplicate of up to the first max_length bytes of the_string, with
 // memory allocated by new[].
 char* strndup_with_new(const char* the_string, int max_length);

 // Finds, in the_string, the first "word" (consecutive !ascii_isspace()
 // characters). Returns pointer to the beginning of the word, and sets *end_ptr
 // to the character after the word (which may be space or '\0'); returns NULL
 // (and *end_ptr is undefined) if no next word found.
 // end_ptr must not be NULL.
 const char* ScanForFirstWord(const char* the_string, const char** end_ptr);
 inline char* ScanForFirstWord(char* the_string, char** end_ptr) {
   // implicit_cast<> would be more appropriate for casting to const,
   // but we save the inclusion of "base/casts.h" here by using const_cast<>.
   return const_cast<char*>(
       ScanForFirstWord(const_cast<const char*>(the_string),
                        const_cast<const char**>(end_ptr)));
 }

 // For the following functions, an "identifier" is a letter or underscore,
 // followed by letters, underscores, or digits.

 // Returns a pointer past the end of the "identifier" (see above) beginning at
 // str, or NULL if str doesn't start with an identifier.
 const char* AdvanceIdentifier(const char* str);
 inline char* AdvanceIdentifier(char* str) {
   // implicit_cast<> would be more appropriate for casting to const,
   // but we save the inclusion of "base/casts.h" here by using const_cast<>.
   return const_cast<char*>(AdvanceIdentifier(const_cast<const char*>(str)));
 }

 // Returns whether str is an "identifier" (see above).
 bool IsIdentifier(const char* str);

 // Finds the first tag and value in a string of tag/value pairs.
 //
 // The first pair begins after the first occurrence of attribute_separator (or
 // string_terminal, if not '\0'); tag_value_separator separates the tag and
 // value; and the value ends before the following occurrence of
 // attribute_separator (or string_terminal, if not '\0').
 //
 // Returns true (and populates tag, tag_len, value, and value_len) if a
 // tag/value pair is founds; returns false otherwise.
 bool FindTagValuePair(const char* in_str, char tag_value_separator,
                       char attribute_separator, char string_terminal,
                       char** tag, int* tag_len,
                       char** value, int* value_len);

 // Inserts separator after every interval characters in *s (but never appends to
 // the end of the original *s).
 void UniformInsertString(std::string* s, int interval, const char* separator);

 // Inserts separator into s at each specified index. indices must be sorted in
 // ascending order.
 void InsertString(
     std::string* s, const std::vector<uint32>& indices, char const* separator);

 // Finds the nth occurrence of c in n; returns the index in s of that
 // occurrence, or string::npos if fewer than n occurrences.
 int FindNth(StringPiece s, char c, int n);

 // Finds the nth-to-last occurrence of c in s; returns the index in s of that
 // occurrence, or string::npos if fewer than n occurrences.
 int ReverseFindNth(StringPiece s, char c, int n);

 // Returns whether s contains only whitespace characters (including the case
 // where s is empty).
 bool OnlyWhitespace(const StringPiece& s);

 // Formats a string in the same fashion as snprintf(), but returns either the
 // number of characters written, or zero if not enough space was available.
 // (snprintf() returns the number of characters that would have been written if
 // enough space had been available.)
 //
 // A drop-in replacement for the safe_snprintf() macro.
 int SafeSnprintf(char* str, size_t size, const char* format, ...)
     PRINTF_ATTRIBUTE(3, 4);

 // Reads a line (terminated by delim) from file into *str. Reads delim from
 // file, but doesn't copy it into *str. Returns true if read a delim-terminated
 // line, or false on end-of-file or error.
 bool GetlineFromStdioFile(FILE* file, std::string* str, char delim);

 #endif  // STRINGS_UTIL_H_
	//
	// Copyright 1999-2006 and onwards Google, Inc.
	//
	// Useful string functions and so forth. This is a grab-bag file.
	//
	// You might also want to look at memutil.h, which holds mem*()
	// equivalents of a lot of the str*() functions in string.h,
	// eg memstr, mempbrk, etc.
	//
	// These functions work fine for UTF-8 strings as long as you can
	// consider them to be just byte strings. For example, due to the
	// design of UTF-8 you do not need to worry about accidental matches,
	// as long as all your inputs are valid UTF-8 (use \uHHHH, not \xHH or \oOOO).
	//
	// Caveats:
	// * all the lengths in these routines refer to byte counts,
	// not character counts.
	// * case-insensitivity in these routines assumes that all the letters
	// in question are in the range A-Z or a-z.
	//
	// If you need Unicode specific processing (for example being aware of
	// Unicode character boundaries, or knowledge of Unicode casing rules,
	// or various forms of equivalence and normalization), take a look at
	// files in i18n/utf8.

	#ifndef STRINGS_UTIL_H_
	#define STRINGS_UTIL_H_

	#include <strings.h>

	#include <cstddef>
	#include <cstdio>
	#include <cstring>
	#include <functional>
	#include <string>
	#include <vector>

	#include "kudu/gutil/integral_types.h"
	#include "kudu/gutil/port.h"
	#include "kudu/gutil/strings/stringpiece.h"

	// Newer functions.

	namespace strings {

	// Finds the next end-of-line sequence.
	// An end-of-line sequence is one of:
	// \n common on unix, including mac os x
	// \r common on macos 9 and before
	// \r\n common on windows
	//
	// Returns a StringPiece that contains the end-of-line sequence (a pointer into
	// the input, 1 or 2 characters long).
	//
	// If the input does not contain an end-of-line sequence, returns an empty
	// StringPiece located at the end of the input:
	// StringPiece(sp.data() + sp.length(), 0).

	StringPiece FindEol(StringPiece sp);

	} // namespace strings

	// Older functions.

	// Duplicates a non-null, non-empty char* string. Returns a pointer to the new
	// string, or NULL if the input is null or empty.
	inline char* strdup_nonempty(const char* src) {
	if (src && src[0]) return strdup(src);
	return NULL;
	}

	// Finds the first occurrence of a character in at most a given number of bytes
	// of a char* string. Returns a pointer to the first occurrence, or NULL if no
	// occurrence found in the first sz bytes.
	// Never searches past the first null character in the string; therefore, only
	// suitable for null-terminated strings.
	// WARNING: Removes const-ness of string argument!
	inline char* strnchr(const char* buf, char c, int sz) {
	const char* end = buf + sz;
	while (buf != end && *buf) {
	if (*buf == c)
	return const_cast<char*>(buf);
	++buf;
	}
	return NULL;
	}

	// Finds the first occurrence of the null-terminated needle in at most the first
	// haystack_len bytes of haystack. Returns NULL if needle is not found. Returns
	// haystack if needle is empty.
	// WARNING: Removes const-ness of string argument!
	char* strnstr(const char* haystack, const char* needle, size_t haystack_len);

	// Matches a prefix (which must be a char* literal!) against the beginning of
	// str. Returns a pointer past the prefix, or NULL if the prefix wasn't matched.
	// (Like the standard strcasecmp(), but for efficiency doesn't call strlen() on
	// prefix, and returns a pointer rather than an int.)
	//
	// The ""'s catch people who don't pass in a literal for "prefix"
	#ifndef strprefix
	#define strprefix(str, prefix) \
	(strncmp(str, prefix, sizeof("" prefix "")-1) == 0 ? \
	str + sizeof(prefix)-1 : \
	NULL)
	#endif

	// Same as strprefix() (immediately above), but matches a case-insensitive
	// prefix.
	#ifndef strcaseprefix
	#define strcaseprefix(str, prefix) \
	(strncasecmp(str, prefix, sizeof("" prefix "")-1) == 0 ? \
	str + sizeof(prefix)-1 : \
	NULL)
	#endif

	// Matches a prefix (up to the first needle_size bytes of needle) in the first
	// haystack_size byte of haystack. Returns a pointer past the prefix, or NULL if
	// the prefix wasn't matched. (Unlike strprefix(), prefix doesn't need to be a
	// char* literal. Like the standard strncmp(), but also takes a haystack_size,
	// and returns a pointer rather than an int.)
	//
	// Always returns either NULL or haystack + needle_size.
	//
	// Some windows header sometimes #defines strnprefix to something we
	// don't want.
	#ifdef strnprefix
	#undef strnprefix
	#endif
	const char* strnprefix(const char* haystack, int haystack_size,
	const char* needle, int needle_size);

	// Matches a case-insensitive prefix (up to the first needle_size bytes of
	// needle) in the first haystack_size byte of haystack. Returns a pointer past
	// the prefix, or NULL if the prefix wasn't matched.
	//
	// Always returns either NULL or haystack + needle_size.
	const char* strncaseprefix(const char* haystack, int haystack_size,
	const char* needle, int needle_size);

	// Matches a prefix; returns a pointer past the prefix, or NULL if not found.
	// (Like strprefix() and strcaseprefix() but not restricted to searching for
	// char* literals). Templated so searching a const char* returns a const char*,
	// and searching a non-const char* returns a non-const char*.
	template<class CharStar>
	inline CharStar var_strprefix(CharStar str, const char* prefix) {
	const int len = strlen(prefix);
	return strncmp(str, prefix, len) == 0 ? str + len : NULL;
	}

	// Same as var_strprefix() (immediately above), but matches a case-insensitive
	// prefix.
	template<class CharStar>
	inline CharStar var_strcaseprefix(CharStar str, const char* prefix) {
	const int len = strlen(prefix);
	return strncasecmp(str, prefix, len) == 0 ? str + len : NULL;
	}

	// Returns input, or "(null)" if NULL. (Useful for logging.)
	inline const char* GetPrintableString(const char* const in) {
	return NULL == in ? "(null)" : in;
	}

	// Returns whether str begins with prefix.
	inline bool HasPrefixString(const StringPiece& str,
	const StringPiece& prefix) {
	return str.starts_with(prefix);
	}

	// Returns whether str ends with suffix.
	inline bool HasSuffixString(const StringPiece& str,
	const StringPiece& suffix) {
	return str.ends_with(suffix);
	}

	// Returns true if the string passed in matches the pattern. The pattern
	// string can contain wildcards like * and ?
	// The backslash character (\) is an escape character for * and ?
	// We limit the patterns to having a max of 16 * or ? characters.
	// ? matches 0 or 1 character, while * matches 0 or more characters.
	bool MatchPattern(const StringPiece& str,
	const StringPiece& pattern);

	// Returns where suffix begins in str, or NULL if str doesn't end with suffix.
	inline char* strsuffix(char* str, const char* suffix) {
	const int lenstr = strlen(str);
	const int lensuffix = strlen(suffix);
	char* strbeginningoftheend = str + lenstr - lensuffix;

	if (lenstr >= lensuffix && 0 == strcmp(strbeginningoftheend, suffix)) {
	return (strbeginningoftheend);
	} else {
	return (NULL);
	}
	}
	inline const char* strsuffix(const char* str, const char* suffix) {
	return const_cast<const char>(strsuffix(const_cast<char>(str), suffix));
	}

	// Same as strsuffix() (immediately above), but matches a case-insensitive
	// suffix.
	char* strcasesuffix(char* str, const char* suffix);
	inline const char* strcasesuffix(const char* str, const char* suffix) {
	return const_cast<const char>(strcasesuffix(const_cast<char>(str), suffix));
	}

	const char* strnsuffix(const char* haystack, int haystack_size,
	const char* needle, int needle_size);
	const char* strncasesuffix(const char* haystack, int haystack_size,
	const char* needle, int needle_size);

	// Returns the number of times a character occurs in a string for a null
	// terminated string.
	inline ptrdiff_t strcount(const char* buf, char c) {
	if (buf == NULL)
	return 0;
	ptrdiff_t num = 0;
	for (const char* bp = buf; *bp != '\0'; bp++) {
	if (*bp == c)
	num++;
	}
	return num;
	}
	// Returns the number of times a character occurs in a string for a string
	// defined by a pointer to the first character and a pointer just past the last
	// character.
	inline ptrdiff_t strcount(const char* buf_begin, const char* buf_end, char c) {
	if (buf_begin == NULL)
	return 0;
	if (buf_end <= buf_begin)
	return 0;
	ptrdiff_t num = 0;
	for (const char* bp = buf_begin; bp != buf_end; bp++) {
	if (*bp == c)
	num++;
	}
	return num;
	}
	// Returns the number of times a character occurs in a string for a string
	// defined by a pointer to the first char and a length:
	inline ptrdiff_t strcount(const char* buf, size_t len, char c) {
	return strcount(buf, buf + len, c);
	}
	// Returns the number of times a character occurs in a string for a C++ string:
	inline ptrdiff_t strcount(const std::string& buf, char c) {
	return strcount(buf.c_str(), buf.size(), c);
	}

	// Returns a pointer to the nth occurrence of a character in a null-terminated
	// string.
	// WARNING: Removes const-ness of string argument!
	char* strchrnth(const char* str, const char& c, int n);

	// Returns a pointer to the nth occurrence of a character in a null-terminated
	// string, or the last occurrence if occurs fewer than n times.
	// WARNING: Removes const-ness of string argument!
	char* AdjustedLastPos(const char* str, char separator, int n);

	// STL-compatible function objects for char* string keys:

	// Compares two char* strings for equality. (Works with NULL, which compares
	// equal only to another NULL). Useful in hash tables:
	// hash_map<const char, Value, hash<const char>, streq> ht;
	struct streq : public std::binary_function<const char, const char, bool> {
	bool operator()(const char* s1, const char* s2) const {
	return ((s1 == 0 && s2 == 0) \|\|
	(s1 && s2 && s1 == s2 && strcmp(s1, s2) == 0));
	}
	};

	// Compares two char* strings. (Works with NULL, which compares greater than any
	// non-NULL). Useful in maps:
	// map<const char*, Value, strlt> m;
	struct strlt : public std::binary_function<const char, const char, bool> {
	bool operator()(const char* s1, const char* s2) const {
	return (s1 != s2) && (s2 == 0 \|\| (s1 != 0 && strcmp(s1, s2) < 0));
	}
	};

	// Returns whether str has only Ascii characters (as defined by ascii_isascii()
	// in strings/ascii_ctype.h).
	bool IsAscii(const char* str, int len);
	inline bool IsAscii(const StringPiece& str) {
	return IsAscii(str.data(), str.size());
	}

	// Returns the smallest lexicographically larger string of equal or smaller
	// length. Returns an empty string if there is no such successor (if the input
	// is empty or consists entirely of 0xff bytes).
	// Useful for calculating the smallest lexicographically larger string
	// that will not be prefixed by the input string.
	//
	// Examples:
	// "a" -> "b", "aaa" -> "aab", "aa\xff" -> "ab", "\xff" -> "", "" -> ""
	std::string PrefixSuccessor(const StringPiece& prefix);

	// Returns the immediate lexicographically-following string. This is useful to
	// turn an inclusive range into something that can be used with Bigtable's
	// SetLimitRow():
	//
	// // Inclusive range [min_element, max_element].
	// string min_element = ...;
	// string max_element = ...;
	//
	// // Equivalent range [range_start, range_end).
	// string range_start = min_element;
	// string range_end = ImmediateSuccessor(max_element);
	//
	// WARNING: Returns the input string with a '\0' appended; if you call c_str()
	// on the result, it will compare equal to s.
	//
	// WARNING: Transforms "" -> "\0"; this doesn't account for Bigtable's special
	// treatment of "" as infinity.
	std::string ImmediateSuccessor(const StringPiece& s);

	// Fills in *separator with a short string less than limit but greater than or
	// equal to start. If limit is greater than start, *separator is the common
	// prefix of start and limit, followed by the successor to the next character in
	// start. Examples:
	// FindShortestSeparator("foobar", "foxhunt", &sep) => sep == "fop"
	// FindShortestSeparator("abracadabra", "bacradabra", &sep) => sep == "b"
	// If limit is less than or equal to start, fills in *separator with start.
	void FindShortestSeparator(const StringPiece& start, const StringPiece& limit,
	std::string* separator);

	// Copies at most n-1 bytes from src to dest, and returns dest. If n >=1, null
	// terminates dest; otherwise, returns dest unchanged. Unlike strncpy(), only
	// puts one null character at the end of dest.
	inline char* safestrncpy(char* dest, const char* src, size_t n) {
	if (n < 1) return dest;

	// Avoid using non-ANSI memccpy(), which is also deprecated in MSVC
	for (size_t i = 0; i < n; ++i) {
	if ((dest[i] = src[i]) == '\0')
	return dest;
	}

	dest[n-1] = '\0';
	return dest;
	}

	namespace strings {

	// BSD-style safe and consistent string copy functions.
	// Copies \|src\| to \|dst\|, where \|dst_size\| is the total allocated size of \|dst\|.
	// Copies at most \|dst_size\|-1 characters, and always NULL terminates \|dst\|, as
	// long as \|dst_size\| is not 0. Returns the length of \|src\| in characters.
	// If the return value is >= dst_size, then the output was truncated.
	// NOTE: All sizes are in number of characters, NOT in bytes.
	size_t strlcpy(char* dst, const char* src, size_t dst_size);

	} // namespace strings

	// Replaces the first occurrence (if replace_all is false) or all occurrences
	// (if replace_all is true) of oldsub in s with newsub. In the second version,
	// *res must be distinct from all the other arguments.
	std::string StringReplace(const StringPiece& s, const StringPiece& oldsub,
	const StringPiece& newsub, bool replace_all);
	void StringReplace(const StringPiece& s, const StringPiece& oldsub,
	const StringPiece& newsub, bool replace_all,
	std::string* res);

	// Replaces all occurrences of substring in s with replacement. Returns the
	// number of instances replaced. s must be distinct from the other arguments.
	//
	// Less flexible, but faster, than RE::GlobalReplace().
	int GlobalReplaceSubstring(const StringPiece& substring,
	const StringPiece& replacement,
	std::string* s);

	// Removes v[i] for every element i in indices. Does not preserve the order of
	// v. indices must be sorted in strict increasing order (no duplicates). Runs in
	// O(indices.size()).
	void RemoveStrings(std::vector<std::string>* v, const std::vector<int>& indices);

	// Case-insensitive strstr(); use system strcasestr() instead.
	// WARNING: Removes const-ness of string argument!
	char* gstrcasestr(const char* haystack, const char* needle);

	// Finds (case insensitively) the first occurrence of (null terminated) needle
	// in at most the first len bytes of haystack. Returns a pointer into haystack,
	// or NULL if needle wasn't found.
	// WARNING: Removes const-ness of haystack!
	const char* gstrncasestr(const char* haystack, const char* needle, size_t len);
	char* gstrncasestr(char* haystack, const char* needle, size_t len);

	// Finds (case insensitively), in str (which is a list of tokens separated by
	// non_alpha), a token prefix and a token suffix. Returns a pointer into str of
	// the position of prefix, or NULL if not found.
	// WARNING: Removes const-ness of string argument!
	char* gstrncasestr_split(const char* str,
	const char* prefix, char non_alpha,
	const char* suffix,
	size_t n);

	// Finds (case insensitively) needle in haystack, paying attention only to
	// alphanumerics in either string. Returns a pointer into haystack, or NULL if
	// not found.
	// Example: strcasestr_alnum("This is a longer test string", "IS-A-LONGER")
	// returns a pointer to "is a longer".
	// WARNING: Removes const-ness of string argument!
	char* strcasestr_alnum(const char* haystack, const char* needle);

	// Returns the number times substring appears in text.
	// Note: Runs in O(text.length() * substring.length()). Do not use on long
	// strings.
	int CountSubstring(StringPiece text, StringPiece substring);

	// Finds, in haystack (which is a list of tokens separated by delim), an token
	// equal to needle. Returns a pointer into haystack, or NULL if not found (or
	// either needle or haystack is empty).
	const char* strstr_delimited(const char* haystack,
	const char* needle,
	char delim);

	// Gets the next token from string *stringp, where tokens are strings separated
	// by characters from delim.
	char* gstrsep(char** stringp, const char* delim);

	// Appends StringPiece(data, len) to *s.
	void FastStringAppend(std::string* s, const char* data, int len);

	// Returns a duplicate of the_string, with memory allocated by new[].
	char* strdup_with_new(const char* the_string);

	// Returns a duplicate of up to the first max_length bytes of the_string, with
	// memory allocated by new[].
	char* strndup_with_new(const char* the_string, int max_length);

	// Finds, in the_string, the first "word" (consecutive !ascii_isspace()
	// characters). Returns pointer to the beginning of the word, and sets *end_ptr
	// to the character after the word (which may be space or '\0'); returns NULL
	// (and *end_ptr is undefined) if no next word found.
	// end_ptr must not be NULL.
	const char* ScanForFirstWord(const char* the_string, const char** end_ptr);
	inline char* ScanForFirstWord(char* the_string, char** end_ptr) {
	// implicit_cast<> would be more appropriate for casting to const,
	// but we save the inclusion of "base/casts.h" here by using const_cast<>.
	return const_cast<char*>(
	ScanForFirstWord(const_cast<const char*>(the_string),
	const_cast<const char**>(end_ptr)));
	}

	// For the following functions, an "identifier" is a letter or underscore,
	// followed by letters, underscores, or digits.

	// Returns a pointer past the end of the "identifier" (see above) beginning at
	// str, or NULL if str doesn't start with an identifier.
	const char* AdvanceIdentifier(const char* str);
	inline char* AdvanceIdentifier(char* str) {
	// implicit_cast<> would be more appropriate for casting to const,
	// but we save the inclusion of "base/casts.h" here by using const_cast<>.
	return const_cast<char>(AdvanceIdentifier(const_cast<const char>(str)));
	}

	// Returns whether str is an "identifier" (see above).
	bool IsIdentifier(const char* str);

	// Finds the first tag and value in a string of tag/value pairs.
	//
	// The first pair begins after the first occurrence of attribute_separator (or
	// string_terminal, if not '\0'); tag_value_separator separates the tag and
	// value; and the value ends before the following occurrence of
	// attribute_separator (or string_terminal, if not '\0').
	//
	// Returns true (and populates tag, tag_len, value, and value_len) if a
	// tag/value pair is founds; returns false otherwise.
	bool FindTagValuePair(const char* in_str, char tag_value_separator,
	char attribute_separator, char string_terminal,
	char** tag, int* tag_len,
	char** value, int* value_len);

	// Inserts separator after every interval characters in *s (but never appends to
	// the end of the original *s).
	void UniformInsertString(std::string* s, int interval, const char* separator);

	// Inserts separator into s at each specified index. indices must be sorted in
	// ascending order.
	void InsertString(
	std::string* s, const std::vector<uint32>& indices, char const* separator);

	// Finds the nth occurrence of c in n; returns the index in s of that
	// occurrence, or string::npos if fewer than n occurrences.
	int FindNth(StringPiece s, char c, int n);

	// Finds the nth-to-last occurrence of c in s; returns the index in s of that
	// occurrence, or string::npos if fewer than n occurrences.
	int ReverseFindNth(StringPiece s, char c, int n);

	// Returns whether s contains only whitespace characters (including the case
	// where s is empty).
	bool OnlyWhitespace(const StringPiece& s);

	// Formats a string in the same fashion as snprintf(), but returns either the
	// number of characters written, or zero if not enough space was available.
	// (snprintf() returns the number of characters that would have been written if
	// enough space had been available.)
	//
	// A drop-in replacement for the safe_snprintf() macro.
	int SafeSnprintf(char* str, size_t size, const char* format, ...)
	PRINTF_ATTRIBUTE(3, 4);

	// Reads a line (terminated by delim) from file into *str. Reads delim from
	// file, but doesn't copy it into *str. Returns true if read a delim-terminated
	// line, or false on end-of-file or error.
	bool GetlineFromStdioFile(FILE* file, std::string* str, char delim);

	#endif // STRINGS_UTIL_H_