be/src/gutil/strings/util.cc - impala - Git at Google

 //
 // Copyright (C) 1999-2005 Google, Inc.
 //

 // TODO(user): visit each const_cast.  Some of them are no longer necessary
 // because last Single Unix Spec and grte v2 are more const-y.

 #include "gutil/strings/util.h"

 #include <assert.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>           // for FastTimeToBuffer()
 #include <algorithm>
 using std::copy;
 using std::max;
 using std::min;
 using std::reverse;
 using std::sort;
 using std::swap;
 #include <string>
 using std::string;
 #include <vector>
 using std::vector;

 #include <common/logging.h>
 #include "gutil/logging-inl.h"
 #include "gutil/strings/ascii_ctype.h"
 #include "gutil/strings/numbers.h"
 #include "gutil/strings/stringpiece.h"
 #include "gutil/stl_util.h"  // for string_as_array, STLAppendToString
 #include "gutil/utf/utf.h"

 #ifdef OS_WINDOWS
 #ifdef min  // windows.h defines this to something silly
 #undef min
 #endif
 #endif

 // Use this instead of gmtime_r if you want to build for Windows.
 // Windows doesn't have a 'gmtime_r', but it has the similar 'gmtime_s'.
 // TODO(user): Probably belongs in //base:time_support.{cc|h}.
 static struct tm* PortableSafeGmtime(const time_t* timep, struct tm* result) {
 #ifdef OS_WINDOWS
   return gmtime_s(result, timep) == 0 ? result : NULL;
 #else
   return gmtime_r(timep, result);
 #endif  // OS_WINDOWS
 }

 char* strnstr(const char* haystack, const char* needle,
                      size_t haystack_len) {
   if (*needle == '\0') {
     return const_cast<char*>(haystack);
   }
   size_t needle_len = strlen(needle);
   char* where;
   while ((where = strnchr(haystack, *needle, haystack_len)) != nullptr) {
     if (where - haystack + needle_len > haystack_len) {
       return nullptr;
     }
     if (strncmp(where, needle, needle_len) == 0) {
       return where;
     }
     haystack_len -= where + 1 - haystack;
     haystack = where + 1;
   }
   return nullptr;
 }

 const char* strnprefix(const char* haystack, int haystack_size,
                               const char* needle, int needle_size) {
   if (needle_size > haystack_size) {
     return nullptr;
   } else {
     if (strncmp(haystack, needle, needle_size) == 0) {
       return haystack + needle_size;
     } else {
       return nullptr;
     }
   }
 }

 const char* strncaseprefix(const char* haystack, int haystack_size,
                                   const char* needle, int needle_size) {
   if (needle_size > haystack_size) {
     return nullptr;
   } else {
     if (strncasecmp(haystack, needle, needle_size) == 0) {
       return haystack + needle_size;
     } else {
       return nullptr;
     }
   }
 }

 char* strcasesuffix(char* str, const char* suffix) {
   const int lenstr = strlen(str);
   const int lensuffix = strlen(suffix);
   char* strbeginningoftheend = str + lenstr - lensuffix;

   if (lenstr >= lensuffix && 0 == strcasecmp(strbeginningoftheend, suffix)) {
     return (strbeginningoftheend);
   } else {
     return (nullptr);
   }
 }

 const char* strnsuffix(const char* haystack, int haystack_size,
                               const char* needle, int needle_size) {
   if (needle_size > haystack_size) {
     return nullptr;
   } else {
     const char* start = haystack + haystack_size - needle_size;
     if (strncmp(start, needle, needle_size) == 0) {
       return start;
     } else {
       return nullptr;
     }
   }
 }

 const char* strncasesuffix(const char* haystack, int haystack_size,
                            const char* needle, int needle_size) {
   if (needle_size > haystack_size) {
     return nullptr;
   } else {
     const char* start = haystack + haystack_size - needle_size;
     if (strncasecmp(start, needle, needle_size) == 0) {
       return start;
     } else {
       return nullptr;
     }
   }
 }

 char* strchrnth(const char* str, const char& c, int n) {
   if (str == nullptr)
     return nullptr;
   if (n <= 0)
     return const_cast<char*>(str);
   const char* sp;
   int k = 0;
   for (sp = str; *sp != '\0'; sp ++) {
     if (*sp == c) {
       ++k;
       if (k >= n)
         break;
     }
   }
   return (k < n) ? nullptr : const_cast<char*>(sp);
 }

 char* AdjustedLastPos(const char* str, char separator, int n) {
   if ( str == nullptr )
     return nullptr;
   const char* pos = nullptr;
   if ( n > 0 )
     pos = strchrnth(str, separator, n);

   // if n <= 0 or separator appears fewer than n times, get the last occurrence
   if ( pos == nullptr)
     pos = strrchr(str, separator);
   return const_cast<char*>(pos);
 }


 // ----------------------------------------------------------------------
 // Misc. routines
 // ----------------------------------------------------------------------

 bool IsAscii(const char* str, int len) {
   const char* end = str + len;
   while (str < end) {
     if (!ascii_isascii(*str++)) {
       return false;
     }
   }
   return true;
 }

 // ----------------------------------------------------------------------
 // StringReplace()
 //    Give me a string and two patterns "old" and "new", and I replace
 //    the first instance of "old" in the string with "new", if it
 //    exists.  If "replace_all" is true then call this repeatedly until it
 //    fails.  RETURN a new string, regardless of whether the replacement
 //    happened or not.
 // ----------------------------------------------------------------------

 string StringReplace(const StringPiece& s, const StringPiece& oldsub,
                      const StringPiece& newsub, bool replace_all) {
   string ret;
   StringReplace(s, oldsub, newsub, replace_all, &ret);
   return ret;
 }


 // ----------------------------------------------------------------------
 // StringReplace()
 //    Replace the "old" pattern with the "new" pattern in a string,
 //    and append the result to "res".  If replace_all is false,
 //    it only replaces the first instance of "old."
 // ----------------------------------------------------------------------

 void StringReplace(const StringPiece& s, const StringPiece& oldsub,
                    const StringPiece& newsub, bool replace_all,
                    string* res) {
   if (oldsub.empty()) {
     res->append(s.data(), s.length());  // If empty, append the given string.
     return;
   }

   StringPiece::size_type start_pos = 0;
   StringPiece::size_type pos;
   do {
     pos = s.find(oldsub, start_pos);
     if (pos == StringPiece::npos) {
       break;
     }
     res->append(s.data() + start_pos, pos - start_pos);
     res->append(newsub.data(), newsub.length());
     // Start searching again after the "old".
     start_pos = pos + oldsub.length();
   } while (replace_all);
   res->append(s.data() + start_pos, s.length() - start_pos);
 }

 // ----------------------------------------------------------------------
 // GlobalReplaceSubstring()
 //    Replaces all instances of a substring in a string.  Does nothing
 //    if 'substring' is empty.  Returns the number of replacements.
 //
 //    NOTE: The string pieces must not overlap s.
 // ----------------------------------------------------------------------

 int GlobalReplaceSubstring(const StringPiece& substring,
                            const StringPiece& replacement,
                            string* s) {
   CHECK(s != nullptr);
   if (s->empty() || substring.empty())
     return 0;
   string tmp;
   int num_replacements = 0;
   size_t pos = 0;
   for (size_t match_pos = s->find(substring.data(), pos, substring.length());
        match_pos != string::npos;
        pos = match_pos + substring.length(),
            match_pos = s->find(substring.data(), pos, substring.length())) {
     ++num_replacements;
     // Append the original content before the match.
     tmp.append(*s, pos, match_pos - pos);
     // Append the replacement for the match.
     tmp.append(replacement.begin(), replacement.end());
   }
   // Append the content after the last match. If no replacements were made, the
   // original string is left untouched.
   if (num_replacements > 0) {
     tmp.append(*s, pos, s->length() - pos);
     s->swap(tmp);
   }
   return num_replacements;
 }

 //---------------------------------------------------------------------------
 // RemoveStrings()
 //   Remove the strings from v given by the (sorted least -> greatest)
 //   numbers in indices.
 //   Order of v is *not* preserved.
 //---------------------------------------------------------------------------
 void RemoveStrings(vector<string>* v, const vector<int>& indices) {
   assert(v);
   assert(indices.size() <= v->size());
   // go from largest index to smallest so that smaller indices aren't
   // invalidated
   for (int lcv = indices.size() - 1; lcv >= 0; --lcv) {
 #ifndef NDEBUG
     // verify that indices is sorted least->greatest
     if (indices.size() >= 2 && lcv > 0)
       // use LT and not LE because we should never see repeat indices
       CHECK_LT(indices[lcv-1], indices[lcv]);
 #endif
     assert(indices[lcv] >= 0);
     assert(indices[lcv] < v->size());
     swap((*v)[indices[lcv]], v->back());
     v->pop_back();
   }
 }

 // ----------------------------------------------------------------------
 // gstrcasestr is a case-insensitive strstr. Eventually we should just
 // use the GNU libc version of strcasestr, but it isn't compiled into
 // RedHat Linux by default in version 6.1.
 //
 // This function uses ascii_tolower() instead of tolower(), for speed.
 // ----------------------------------------------------------------------

 char *gstrcasestr(const char* haystack, const char* needle) {
   char c, sc;
   size_t len;

   if ((c = *needle++) != 0) {
     c = ascii_tolower(c);
     len = strlen(needle);
     do {
       do {
         if ((sc = *haystack++) == 0)
           return nullptr;
       } while (ascii_tolower(sc) != c);
     } while (strncasecmp(haystack, needle, len) != 0);
     haystack--;
   }
   // This is a const violation but strstr() also returns a char*.
   return const_cast<char*>(haystack);
 }

 // ----------------------------------------------------------------------
 // gstrncasestr is a case-insensitive strnstr.
 //    Finds the occurence of the (null-terminated) needle in the
 //    haystack, where no more than len bytes of haystack is searched.
 //    Characters that appear after a '\0' in the haystack are not searched.
 //
 // This function uses ascii_tolower() instead of tolower(), for speed.
 // ----------------------------------------------------------------------
 const char *gstrncasestr(const char* haystack, const char* needle, size_t len) {
   char c, sc;

   if ((c = *needle++) != 0) {
     c = ascii_tolower(c);
     size_t needle_len = strlen(needle);
     do {
       do {
         if (len-- <= needle_len
             || 0 == (sc = *haystack++))
           return nullptr;
       } while (ascii_tolower(sc) != c);
     } while (strncasecmp(haystack, needle, needle_len) != 0);
     haystack--;
   }
   return haystack;
 }

 // ----------------------------------------------------------------------
 // gstrncasestr is a case-insensitive strnstr.
 //    Finds the occurence of the (null-terminated) needle in the
 //    haystack, where no more than len bytes of haystack is searched.
 //    Characters that appear after a '\0' in the haystack are not searched.
 //
 //    This function uses ascii_tolower() instead of tolower(), for speed.
 // ----------------------------------------------------------------------
 char *gstrncasestr(char* haystack, const char* needle, size_t len) {
   return const_cast<char *>(gstrncasestr(static_cast<const char *>(haystack),
                                          needle, len));
 }
 // ----------------------------------------------------------------------
 // gstrncasestr_split performs a case insensitive search
 // on (prefix, non_alpha, suffix).
 // ----------------------------------------------------------------------
 char *gstrncasestr_split(const char* str,
                          const char* prefix, char non_alpha,
                          const char* suffix,
                          size_t n) {
   int prelen = prefix == nullptr ? 0 : strlen(prefix);
   int suflen = suffix == nullptr ? 0 : strlen(suffix);

   // adjust the string and its length to avoid unnessary searching.
   // an added benefit is to avoid unnecessary range checks in the if
   // statement in the inner loop.
   if (suflen + prelen >= n)  return nullptr;
   str += prelen;
   n -= prelen;
   n -= suflen;

   const char* where = nullptr;

   // for every occurance of non_alpha in the string ...
   while ((where = static_cast<const char*>(
             memchr(str, non_alpha, n))) != nullptr) {
     // ... test whether it is followed by suffix and preceded by prefix
     if ((!suflen || strncasecmp(where + 1, suffix, suflen) == 0) &&
         (!prelen || strncasecmp(where - prelen, prefix, prelen) == 0)) {
       return const_cast<char*>(where - prelen);
     }
     // if not, advance the pointer, and adjust the length according
     n -= (where + 1) - str;
     str = where + 1;
   }

   return nullptr;
 }

 // ----------------------------------------------------------------------
 // strcasestr_alnum is like a case-insensitive strstr, except that it
 // ignores non-alphanumeric characters in both strings for the sake of
 // comparison.
 //
 // This function uses ascii_isalnum() instead of isalnum() and
 // ascii_tolower() instead of tolower(), for speed.
 //
 // E.g. strcasestr_alnum("i use google all the time", " !!Google!! ")
 // returns pointer to "google all the time"
 // ----------------------------------------------------------------------
 char *strcasestr_alnum(const char *haystack, const char *needle) {
   const char *haystack_ptr;
   const char *needle_ptr;

   // Skip non-alnums at beginning
   while ( !ascii_isalnum(*needle) )
     if ( *needle++ == '\0' )
       return const_cast<char*>(haystack);
   needle_ptr = needle;

   // Skip non-alnums at beginning
   while ( !ascii_isalnum(*haystack) )
     if ( *haystack++ == '\0' )
       return nullptr;
   haystack_ptr = haystack;

   while ( *needle_ptr != '\0' ) {
     // Non-alnums - advance
     while ( !ascii_isalnum(*needle_ptr) )
       if ( *needle_ptr++ == '\0' )
         return const_cast<char *>(haystack);

     while ( !ascii_isalnum(*haystack_ptr) )
       if ( *haystack_ptr++ == '\0' )
         return nullptr;

     if ( ascii_tolower(*needle_ptr) == ascii_tolower(*haystack_ptr) ) {
       // Case-insensitive match - advance
       needle_ptr++;
       haystack_ptr++;
     } else {
       // No match - rollback to next start point in haystack
       haystack++;
       while ( !ascii_isalnum(*haystack) )
         if ( *haystack++ == '\0' )
           return nullptr;
       haystack_ptr = haystack;
       needle_ptr = needle;
     }
   }
   return const_cast<char *>(haystack);
 }


 // ----------------------------------------------------------------------
 // CountSubstring()
 //    Return the number times a "substring" appears in the "text"
 //    NOTE: this function's complexity is O(|text| * |substring|)
 //          It is meant for short "text" (such as to ensure the
 //          printf format string has the right number of arguments).
 //          DO NOT pass in long "text".
 // ----------------------------------------------------------------------
 int CountSubstring(StringPiece text, StringPiece substring) {
   CHECK_GT(substring.length(), 0);

   int count = 0;
   StringPiece::size_type curr = 0;
   while (StringPiece::npos != (curr = text.find(substring, curr))) {
     ++count;
     ++curr;
   }
   return count;
 }

 // ----------------------------------------------------------------------
 // strstr_delimited()
 //    Just like strstr(), except it ensures that the needle appears as
 //    a complete item (or consecutive series of items) in a delimited
 //    list.
 //
 //    Like strstr(), returns haystack if needle is empty, or NULL if
 //    either needle/haystack is NULL.
 // ----------------------------------------------------------------------
 const char* strstr_delimited(const char* haystack,
                              const char* needle,
                              char delim) {
   if (!needle || !haystack) return nullptr;
   if (*needle == '\0') return haystack;

   int needle_len = strlen(needle);

   while (true) {
     // Skip any leading delimiters.
     while (*haystack == delim) ++haystack;

     // Walk down the haystack, matching every character in the needle.
     const char* this_match = haystack;
     int i = 0;
     for (; i < needle_len; i++) {
       if (*haystack != needle[i]) {
         // We ran out of haystack or found a non-matching character.
         break;
       }
       ++haystack;
     }

     // If we matched the whole needle, ensure that it's properly delimited.
     if (i == needle_len && (*haystack == '\0' || *haystack == delim)) {
       return this_match;
     }

     // No match. Consume non-delimiter characters until we run out of them.
     while (*haystack != delim) {
       if (*haystack == '\0') return nullptr;
       ++haystack;
     }
   }
   LOG(FATAL) << "Unreachable statement";
   return nullptr;
 }


 // ----------------------------------------------------------------------
 // Older versions of libc have a buggy strsep.
 // ----------------------------------------------------------------------

 char* gstrsep(char** stringp, const char* delim) {
   char *s;
   const char *spanp;
   int c, sc;
   char *tok;

   if ((s = *stringp) == nullptr)
     return nullptr;

   tok = s;
   while (true) {
     c = *s++;
     spanp = delim;
     do {
       if ((sc = *spanp++) == c) {
         if (c == 0)
           s = nullptr;
         else
           s[-1] = 0;
         *stringp = s;
         return tok;
       }
     } while (sc != 0);
   }

   return nullptr; /* should not happen */
 }

 void FastStringAppend(string* s, const char* data, int len) {
   STLAppendToString(s, data, len);
 }


 // TODO(user): add a microbenchmark and revisit
 // the optimizations done here.
 //
 // Several converters use this table to reduce
 // division and modulo operations.
 extern const char two_ASCII_digits[100][2];

 const char two_ASCII_digits[100][2] = {
   {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
   {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
   {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
   {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
   {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
   {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
   {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
   {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
   {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
   {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
   {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
   {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
   {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
   {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
   {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
   {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
   {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
   {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
   {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
   {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}
 };

 static inline void PutTwoDigits(int i, char* p) {
   DCHECK_GE(i, 0);
   DCHECK_LT(i, 100);
   p[0] = two_ASCII_digits[i][0];
   p[1] = two_ASCII_digits[i][1];
 }

 char* FastTimeToBuffer(time_t s, char* buffer) {
   if (s == 0) {
     time(&s);
   }

   struct tm tm;
   if (PortableSafeGmtime(&s, &tm) == nullptr) {
     // Error message must fit in 30-char buffer.
     memcpy(buffer, "Invalid:", sizeof("Invalid:"));
     FastInt64ToBufferLeft(s, buffer+strlen(buffer));
     return buffer;
   }

   // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
   // but strftime does locale stuff which we do not want
   // plus strftime takes > 10x the time of hard code

   const char* weekday_name = "Xxx";
   switch (tm.tm_wday) {
     default: { DLOG(FATAL) << "tm.tm_wday: " << tm.tm_wday; } break;
     case 0:  weekday_name = "Sun"; break;
     case 1:  weekday_name = "Mon"; break;
     case 2:  weekday_name = "Tue"; break;
     case 3:  weekday_name = "Wed"; break;
     case 4:  weekday_name = "Thu"; break;
     case 5:  weekday_name = "Fri"; break;
     case 6:  weekday_name = "Sat"; break;
   }

   const char* month_name = "Xxx";
   switch (tm.tm_mon) {
     default:  { DLOG(FATAL) << "tm.tm_mon: " << tm.tm_mon; } break;
     case 0:   month_name = "Jan"; break;
     case 1:   month_name = "Feb"; break;
     case 2:   month_name = "Mar"; break;
     case 3:   month_name = "Apr"; break;
     case 4:   month_name = "May"; break;
     case 5:   month_name = "Jun"; break;
     case 6:   month_name = "Jul"; break;
     case 7:   month_name = "Aug"; break;
     case 8:   month_name = "Sep"; break;
     case 9:   month_name = "Oct"; break;
     case 10:  month_name = "Nov"; break;
     case 11:  month_name = "Dec"; break;
   }

   // Write out the buffer.

   memcpy(buffer+0, weekday_name, 3);
   buffer[3] = ',';
   buffer[4] = ' ';

   PutTwoDigits(tm.tm_mday, buffer+5);
   buffer[7] = ' ';

   memcpy(buffer+8, month_name, 3);
   buffer[11] = ' ';

   int32 year = tm.tm_year + 1900;
   PutTwoDigits(year/100, buffer+12);
   PutTwoDigits(year%100, buffer+14);
   buffer[16] = ' ';

   PutTwoDigits(tm.tm_hour, buffer+17);
   buffer[19] = ':';

   PutTwoDigits(tm.tm_min, buffer+20);
   buffer[22] = ':';

   PutTwoDigits(tm.tm_sec, buffer+23);

   // includes ending NUL
   memcpy(buffer+25, " GMT", 5);

   return buffer;
 }

 // ----------------------------------------------------------------------
 // strdup_with_new()
 // strndup_with_new()
 //
 //    strdup_with_new() is the same as strdup() except that the memory
 //    is allocated by new[] and hence an exception will be generated
 //    if out of memory.
 //
 //    strndup_with_new() is the same as strdup_with_new() except that it will
 //    copy up to the specified number of characters.  This function
 //    is useful when we want to copy a substring out of a string
 //    and didn't want to (or cannot) modify the string
 // ----------------------------------------------------------------------
 char* strdup_with_new(const char* the_string) {
   if (the_string == nullptr)
     return nullptr;
   else
     return strndup_with_new(the_string, strlen(the_string));
 }

 char* strndup_with_new(const char* the_string, int max_length) {
   if (the_string == nullptr)
     return nullptr;

   auto result = new char[max_length + 1];
   result[max_length] = '\0';  // terminate the string because strncpy might not
   return strncpy(result, the_string, max_length);
 }


 // ----------------------------------------------------------------------
 // ScanForFirstWord()
 //    This function finds the first word in the string "the_string" given.
 //    A word is defined by consecutive !ascii_isspace() characters.
 //    If no valid words are found,
 //        return NULL and *end_ptr will contain junk
 //    else
 //        return the beginning of the first word and
 //        *end_ptr will store the address of the first invalid character
 //        (ascii_isspace() or '\0').
 //
 //    Precondition: (end_ptr != NULL)
 // ----------------------------------------------------------------------
 const char* ScanForFirstWord(const char* the_string, const char** end_ptr) {
   CHECK(end_ptr != nullptr) << ": precondition violated";

   if (the_string == nullptr)  // empty string
     return nullptr;

   const char* curr = the_string;
   while ((*curr != '\0') && ascii_isspace(*curr))  // skip initial spaces
     ++curr;

   if (*curr == '\0')  // no valid word found
     return nullptr;

   // else has a valid word
   const char* first_word = curr;

   // now locate the end of the word
   while ((*curr != '\0') && !ascii_isspace(*curr))
     ++curr;

   *end_ptr = curr;
   return first_word;
 }

 // ----------------------------------------------------------------------
 // AdvanceIdentifier()
 //    This function returns a pointer past the end of the longest C-style
 //    identifier that is a prefix of str or NULL if str does not start with
 //    one.  A C-style identifier begins with an ASCII letter or underscore
 //    and continues with ASCII letters, digits, or underscores.
 // ----------------------------------------------------------------------
 const char *AdvanceIdentifier(const char *str) {
   // Not using isalpha and isalnum so as not to rely on the locale.
   // We could have used ascii_isalpha and ascii_isalnum.
   char ch = *str++;
   if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'))
     return nullptr;
   while (true) {
     ch = *str;
     if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
           (ch >= '0' && ch <= '9') || ch == '_'))
       return str;
     str++;
   }
 }


 // ----------------------------------------------------------------------
 // IsIdentifier()
 //    This function returns true if str is a C-style identifier.
 //    A C-style identifier begins with an ASCII letter or underscore
 //    and continues with ASCII letters, digits, or underscores.
 // ----------------------------------------------------------------------
 bool IsIdentifier(const char *str) {
   const char *end = AdvanceIdentifier(str);
   return end && *end == '\0';
 }

 static bool IsWildcard(Rune character) {
   return character == '*' || character == '?';
 }

 // Move the strings pointers to the point where they start to differ.
 template <typename CHAR, typename NEXT>
 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
                          const CHAR** string, const CHAR* string_end,
                          NEXT next) {
   const CHAR* escape = nullptr;
   while (*pattern != pattern_end && *string != string_end) {
     if (!escape && IsWildcard(**pattern)) {
       // We don't want to match wildcard here, except if it's escaped.
       return;
     }

     // Check if the escapement char is found. If so, skip it and move to the
     // next character.
     if (!escape && **pattern == '\\') {
       escape = *pattern;
       next(pattern, pattern_end);
       continue;
     }

     // Check if the chars match, if so, increment the ptrs.
     const CHAR* pattern_next = *pattern;
     const CHAR* string_next = *string;
     Rune pattern_char = next(&pattern_next, pattern_end);
     if (pattern_char == next(&string_next, string_end) &&
         pattern_char != Runeerror &&
         pattern_char <= Runemax) {
       *pattern = pattern_next;
       *string = string_next;
     } else {
       // Uh ho, it did not match, we are done. If the last char was an
       // escapement, that means that it was an error to advance the ptr here,
       // let's put it back where it was. This also mean that the MatchPattern
       // function will return false because if we can't match an escape char
       // here, then no one will.
       if (escape) {
         *pattern = escape;
       }
       return;
     }

     escape = nullptr;
   }
 }

 template <typename CHAR, typename NEXT>
 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
   while (*pattern != end) {
     if (!IsWildcard(**pattern))
       return;
     next(pattern, end);
   }
 }

 template <typename CHAR, typename NEXT>
 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
                           const CHAR* pattern, const CHAR* pattern_end,
                           int depth,
                           NEXT next) {
   const int kMaxDepth = 16;
   if (depth > kMaxDepth)
     return false;

   // Eat all the matching chars.
   EatSameChars(&pattern, pattern_end, &eval, eval_end, next);

   // If the string is empty, then the pattern must be empty too, or contains
   // only wildcards.
   if (eval == eval_end) {
     EatWildcard(&pattern, pattern_end, next);
     return pattern == pattern_end;
   }

   // Pattern is empty but not string, this is not a match.
   if (pattern == pattern_end)
     return false;

   // If this is a question mark, then we need to compare the rest with
   // the current string or the string with one character eaten.
   const CHAR* next_pattern = pattern;
   next(&next_pattern, pattern_end);
   if (pattern[0] == '?') {
     if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
                       depth + 1, next))
       return true;
     const CHAR* next_eval = eval;
     next(&next_eval, eval_end);
     if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
                       depth + 1, next))
       return true;
   }

   // This is a *, try to match all the possible substrings with the remainder
   // of the pattern.
   if (pattern[0] == '*') {
     // Collapse duplicate wild cards (********** into *) so that the
     // method does not recurse unnecessarily. http://crbug.com/52839
     EatWildcard(&next_pattern, pattern_end, next);

     while (eval != eval_end) {
       if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
                         depth + 1, next))
         return true;
       eval++;
     }

     // We reached the end of the string, let see if the pattern contains only
     // wildcards.
     if (eval == eval_end) {
       EatWildcard(&pattern, pattern_end, next);
       if (pattern != pattern_end)
         return false;
       return true;
     }
   }

   return false;
 }

 struct NextCharUTF8 {
   Rune operator()(const char** p, const char* end) {
     Rune c;
     int offset = charntorune(&c, *p, static_cast<int>(end - *p));
     *p += offset;
     return c;
   }
 };

 bool MatchPattern(const StringPiece& eval,
                   const StringPiece& pattern) {
   return MatchPatternT(eval.data(), eval.data() + eval.size(),
                        pattern.data(), pattern.data() + pattern.size(),
                        0, NextCharUTF8());
 }


 // ----------------------------------------------------------------------
 // FindTagValuePair
 //    Given a string of the form
 //    <something><attr_sep><tag><tag_value_sep><value><attr_sep>...<string_term>
 //    where the part before the first attr_sep is optional,
 //    this function extracts the first tag and value, if any.
 //    The function returns true if successful, in which case "tag" and "value"
 //    are set to point to the beginning of the tag and the value, respectively,
 //    and "tag_len" and "value_len" are set to the respective lengths.
 // ----------------------------------------------------------------------

 bool FindTagValuePair(const char* arg_str, char tag_value_separator,
                       char attribute_separator, char string_terminal,
                       char **tag, int *tag_len,
                       char **value, int *value_len) {
   char* in_str = const_cast<char*>(arg_str);  // For msvc8.
   if (in_str == nullptr)
     return false;
   char tv_sep_or_term[3] = {tag_value_separator, string_terminal, '\0'};
   char attr_sep_or_term[3] = {attribute_separator, string_terminal, '\0'};

   // Look for beginning of tag
   *tag = strpbrk(in_str, attr_sep_or_term);
   // If string_terminal is '\0', strpbrk won't find it but return null.
   if (*tag == nullptr || **tag == string_terminal)
     *tag = in_str;
   else
     (*tag)++;   // Move past separator
   // Now look for value...
   char *tv_sep_pos = strpbrk(*tag, tv_sep_or_term);
   if (tv_sep_pos == nullptr || *tv_sep_pos == string_terminal)
     return false;
   // ...and end of value
   char *attr_sep_pos = strpbrk(tv_sep_pos, attr_sep_or_term);

   *tag_len = tv_sep_pos - *tag;
   *value = tv_sep_pos + 1;
   if (attr_sep_pos != nullptr)
     *value_len = attr_sep_pos - *value;
   else
     *value_len = strlen(*value);
   return true;
 }

 void UniformInsertString(string* s, int interval, const char* separator) {
   const size_t separator_len = strlen(separator);

   if (interval < 1 ||      // invalid interval
       s->empty() ||        // nothing to do
       separator_len == 0)  // invalid separator
     return;

   int num_inserts = (s->size() - 1) / interval;  // -1 to avoid appending at end
   if (num_inserts == 0)  // nothing to do
     return;

   string tmp;
   tmp.reserve(s->size() + num_inserts * separator_len + 1);

   for (int i = 0; i < num_inserts ; ++i) {
     // append this interval
     tmp.append(*s, i * interval, interval);
     // append a separator
     tmp.append(separator, separator_len);
   }

   // append the tail
   const size_t tail_pos = num_inserts * interval;
   tmp.append(*s, tail_pos, s->size() - tail_pos);

   s->swap(tmp);
 }

 void InsertString(string *const s,
                   const vector<uint32> &indices,
                   char const *const separator) {
   const unsigned num_indices(indices.size());
   if (num_indices == 0) {
     return;  // nothing to do...
   }

   const unsigned separator_len(strlen(separator));
   if (separator_len == 0) {
     return;  // still nothing to do...
   }

   string tmp;
   const unsigned s_len(s->size());
   tmp.reserve(s_len + separator_len * num_indices);

   vector<uint32>::const_iterator const ind_end(indices.end());
   auto ind_pos(indices.begin());

   uint32 last_pos(0);
   while (ind_pos != ind_end) {
     const uint32 pos(*ind_pos);
     DCHECK_GE(pos, last_pos);
     DCHECK_LE(pos, s_len);

     tmp.append(s->substr(last_pos, pos - last_pos));
     tmp.append(separator);

     last_pos = pos;
     ++ind_pos;
   }
   tmp.append(s->substr(last_pos));

   s->swap(tmp);
 }

 //------------------------------------------------------------------------
 // FindNth()
 //  return index of nth occurrence of c in the string,
 //  or string::npos if n > number of occurrences of c.
 //  (returns string::npos = -1 if n <= 0)
 //------------------------------------------------------------------------
 int FindNth(StringPiece s, char c, int n) {
   size_t pos = string::npos;

   for ( int i = 0; i < n; ++i ) {
     pos = s.find_first_of(c, pos + 1);
     if ( pos == StringPiece::npos ) {
       break;
     }
   }
   return pos;
 }

 //------------------------------------------------------------------------
 // ReverseFindNth()
 //  return index of nth-to-last occurrence of c in the string,
 //  or string::npos if n > number of occurrences of c.
 //  (returns string::npos if n <= 0)
 //------------------------------------------------------------------------
 int ReverseFindNth(StringPiece s, char c, int n) {
   if ( n <= 0 ) {
     return static_cast<int>(StringPiece::npos);
   }

   size_t pos = s.size();

   for ( int i = 0; i < n; ++i ) {
     // If pos == 0, we return StringPiece::npos right away. Otherwise,
     // the following find_last_of call would take (pos - 1) as string::npos,
     // which means it would again search the entire input string.
     if (pos == 0) {
       return static_cast<int>(StringPiece::npos);
     }
     pos = s.find_last_of(c, pos - 1);
     if ( pos == string::npos ) {
       break;
     }
   }
   return pos;
 }

 namespace strings {

 // FindEol()
 // Returns the location of the next end-of-line sequence.

 StringPiece FindEol(StringPiece s) {
   for (size_t i = 0; i < s.length(); ++i) {
     if (s[i] == '\n') {
       return StringPiece(s.data() + i, 1);
     }
     if (s[i] == '\r') {
       if (i+1 < s.length() && s[i+1] == '\n') {
         return StringPiece(s.data() + i, 2);
       } else {
         return StringPiece(s.data() + i, 1);
       }
     }
   }
   return StringPiece(s.data() + s.length(), 0);
 }

 }  // namespace strings

 //------------------------------------------------------------------------
 // OnlyWhitespace()
 //  return true if string s contains only whitespace characters
 //------------------------------------------------------------------------
 bool OnlyWhitespace(const StringPiece& s) {
   for (const auto& c : s) {
     if ( !ascii_isspace(c) ) return false;
   }
   return true;
 }

 string PrefixSuccessor(const StringPiece& prefix) {
   // We can increment the last character in the string and be done
   // unless that character is 255, in which case we have to erase the
   // last character and increment the previous character, unless that
   // is 255, etc. If the string is empty or consists entirely of
   // 255's, we just return the empty string.
   bool done = false;
   string limit(prefix.data(), prefix.size());
   int index = limit.length() - 1;
   while (!done && index >= 0) {
     if (limit[index] == 255) {
       limit.erase(index);
       index--;
     } else {
       limit[index]++;
       done = true;
     }
   }
   if (!done) {
     return "";
   } else {
     return limit;
   }
 }

 string ImmediateSuccessor(const StringPiece& s) {
   // Return the input string, with an additional NUL byte appended.
   string out;
   out.reserve(s.size() + 1);
   out.append(s.data(), s.size());
   out.push_back('\0');
   return out;
 }

 void FindShortestSeparator(const StringPiece& start,
                            const StringPiece& limit,
                            string* separator) {
   // Find length of common prefix
   size_t min_length = min(start.size(), limit.size());
   size_t diff_index = 0;
   while ((diff_index < min_length) &&
          (start[diff_index] == limit[diff_index])) {
     diff_index++;
   }

   if (diff_index >= min_length) {
     // Handle the case where either string is a prefix of the other
     // string, or both strings are identical.
     start.CopyToString(separator);
     return;
   }

   if (diff_index+1 == start.size()) {
     // If the first difference is in the last character, do not bother
     // incrementing that character since the separator will be no
     // shorter than "start".
     start.CopyToString(separator);
     return;
   }

   if (start[diff_index] == 0xff) {
     // Avoid overflow when incrementing start[diff_index]
     start.CopyToString(separator);
     return;
   }

   separator->assign(start.data(), diff_index);
   separator->push_back(start[diff_index] + 1);
   if (*separator >= limit) {
     // Never pick a separator that causes confusion with "limit"
     start.CopyToString(separator);
   }
 }

 int SafeSnprintf(char *str, size_t size, const char *format, ...) {
   va_list printargs;
   va_start(printargs, format);
   int ncw = vsnprintf(str, size, format, printargs);
   va_end(printargs);
   return (ncw < size && ncw >= 0) ? ncw : 0;
 }

 bool GetlineFromStdioFile(FILE* file, string* str, char delim) {
   str->erase();
   while (true) {
     if (feof(file) || ferror(file)) {
       return false;
     }
     int c = getc(file);
     if (c == EOF) return false;
     if (c == delim) return true;
     str->push_back(c);
   }
 }

 namespace {

 template <typename CHAR>
 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
   for (size_t i = 0; i < dst_size; ++i) {
     if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
       return i;
   }

   // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
   if (dst_size != 0)
     dst[dst_size - 1] = 0;

   // Count the rest of the |src|, and return it's length in characters.
   while (src[dst_size]) ++dst_size;
   return dst_size;
 }

 }  // namespace

 size_t strings::strlcpy(char* dst, const char* src, size_t dst_size) {
   return lcpyT<char>(dst, src, dst_size);
 }