be/src/gutil/strings/split.cc - impala - Git at Google

 // Copyright 2008 and onwards Google Inc.  All rights reserved.
 //
 // Maintainer: Greg Miller <jgm@google.com>

 #include "gutil/strings/split.h"

 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include <iterator>
 using std::back_insert_iterator;
 using std::iterator_traits;
 #include <limits>
 using std::numeric_limits;

 using std::unordered_map;
 using std::unordered_set;

 #include "gutil/integral_types.h"
 #include <common/logging.h>
 #include "gutil/logging-inl.h"
 #include "gutil/macros.h"
 #include "gutil/strtoint.h"
 #include "gutil/strings/ascii_ctype.h"
 #include "gutil/strings/util.h"
 #include "gutil/hash/hash.h"

 // Implementations for some of the Split2 API. Much of the Split2 API is
 // templated so it exists in header files, either strings/split.h or
 // strings/split_iternal.h.
 namespace strings {
 namespace delimiter {

 namespace {

 // This GenericFind() template function encapsulates the finding algorithm
 // shared between the Literal and AnyOf delimiters. The FindPolicy template
 // parameter allows each delimiter to customize the actual find function to use
 // and the length of the found delimiter. For example, the Literal delimiter
 // will ultimately use StringPiece::find(), and the AnyOf delimiter will use
 // StringPiece::find_first_of().
 template <typename FindPolicy>
 StringPiece GenericFind(
     StringPiece text,
     StringPiece delimiter,
     FindPolicy find_policy) {
   if (delimiter.empty() && text.length() > 0) {
     // Special case for empty string delimiters: always return a zero-length
     // StringPiece referring to the item at position 1.
     return StringPiece(text.begin() + 1, 0);
   }
   int found_pos = StringPiece::npos;
   StringPiece found(text.end(), 0);  // By default, not found
   found_pos = find_policy.Find(text, delimiter);
   if (found_pos != StringPiece::npos) {
     found.set(text.data() + found_pos, find_policy.Length(delimiter));
   }
   return found;
 }

 // Finds using StringPiece::find(), therefore the length of the found delimiter
 // is delimiter.length().
 struct LiteralPolicy {
   int Find(StringPiece text, StringPiece delimiter) {
     return text.find(delimiter);
   }
   int Length(StringPiece delimiter) {
     return delimiter.length();
   }
 };

 // Finds using StringPiece::find_first_of(), therefore the length of the found
 // delimiter is 1.
 struct AnyOfPolicy {
   size_t Find(StringPiece text, StringPiece delimiter) {
     return text.find_first_of(delimiter);
   }
   int Length(StringPiece delimiter) {
     return 1;
   }
 };

 }  // namespace

 //
 // Literal
 //

 Literal::Literal(StringPiece sp) : delimiter_(sp.ToString()) {
 }

 StringPiece Literal::Find(StringPiece text) const {
   return GenericFind(text, delimiter_, LiteralPolicy());
 }

 //
 // AnyOf
 //

 AnyOf::AnyOf(StringPiece sp) : delimiters_(sp.ToString()) {
 }

 StringPiece AnyOf::Find(StringPiece text) const {
   return GenericFind(text, delimiters_, AnyOfPolicy());
 }

 }  // namespace delimiter
 }  // namespace strings

 //
 // ==================== LEGACY SPLIT FUNCTIONS ====================
 //

 using ::strings::SkipEmpty;
 using ::strings::delimiter::AnyOf;
 using ::strings::delimiter::Limit;

 namespace {

 // Appends the results of a split to the specified container. This function has
 // the following overloads:
 // - vector<string>           - for better performance
 // - map<string, string>      - to change append semantics
 // - unordered_map<string, string> - to change append semantics
 template <typename Container, typename Splitter>
 void AppendToImpl(Container* container, Splitter splitter) {
   Container c = splitter;  // Calls implicit conversion operator.
   std::copy(c.begin(), c.end(), std::inserter(*container, container->end()));
 }

 // Overload of AppendToImpl() that is optimized for appending to vector<string>.
 // This version eliminates a couple string copies by using a vector<StringPiece>
 // as the intermediate container.
 template <typename Splitter>
 void AppendToImpl(vector<string>* container, Splitter splitter) {
   vector<StringPiece> vsp = splitter;  // Calls implicit conversion operator.
   size_t container_size = container->size();
   container->resize(container_size + vsp.size());
   for (const auto& sp : vsp) {
     sp.CopyToString(&(*container)[container_size++]);
   }
 }

 // Here we define two AppendToImpl() overloads for map<> and unordered_map<>. Both of
 // these overloads call through to this AppendToMap() function. This is needed
 // because inserting a duplicate key into a map does NOT overwrite the previous
 // value, which was not the behavior of the split1 Split*() functions. Consider
 // this example:
 //
 //   map<string, string> m;
 //   m.insert(std::make_pair("a", "1"));
 //   m.insert(std::make_pair("a", "2"));  // <-- doesn't actually insert.
 //   ASSERT_EQ(m["a"], "1");  // <-- "a" has value "1" not "2".
 //
 // Due to this behavior of map::insert, we can't rely on a normal std::inserter
 // for a maps. Instead, maps and unordered_maps need to be special cased to implement
 // the desired append semantic of inserting an existing value overwrites the
 // previous value.
 //
 // This same issue is true with sets as well. However, since sets don't have a
 // separate key and value, failing to overwrite an existing value in a set is
 // fine because the value already exists in the set.
 //
 template <typename Map, typename Splitter>
 void AppendToMap(Map* m, Splitter splitter) {
   Map tmp = splitter;  // Calls implicit conversion operator.
   for (typename Map::const_iterator it = tmp.begin(); it != tmp.end(); ++it) {
     (*m)[it->first] = it->second;
   }
 }

 template <typename Splitter>
 void AppendToImpl(map<string, string>* map_container, Splitter splitter) {
   AppendToMap(map_container, splitter);
 }

 template <typename Splitter>
 void AppendToImpl(unordered_map<string, string>* map_container, Splitter splitter) {
   AppendToMap(map_container, splitter);
 }

 // Appends the results of a call to strings::Split() to the specified container.
 // This function is used with the new strings::Split() API to implement the
 // append semantics of the legacy Split*() functions.
 //
 // The "Splitter" template parameter is intended to be a
 // ::strings::internal::Splitter<>, which is the return value of a call to
 // strings::Split(). Sample usage:
 //
 //   vector<string> v;
 //   ... add stuff to "v" ...
 //   AppendTo(&v, strings::Split("a,b,c", ","));
 //
 template <typename Container, typename Splitter>
 void AppendTo(Container* container, Splitter splitter) {
   if (container->empty()) {
     // "Appending" to an empty container is by far the common case. For this we
     // assign directly to the output container, which is more efficient than
     // explicitly appending.
     *container = splitter;  // Calls implicit conversion operator.
   } else {
     AppendToImpl(container, splitter);
   }
 }

 }  // anonymous namespace

 // Constants for ClipString()
 static const int kMaxOverCut = 12;
 // The ellipsis to add to strings that are too long
 static const char kCutStr[] = "...";
 static const int kCutStrSize = sizeof(kCutStr) - 1;

 // ----------------------------------------------------------------------
 // Return the place to clip the string at, or -1
 // if the string doesn't need to be clipped.
 // ----------------------------------------------------------------------
 static int ClipStringHelper(const char* str, int max_len, bool use_ellipsis) {
   if (strlen(str) <= max_len)
     return -1;

   int max_substr_len = max_len;

   if (use_ellipsis && max_len > kCutStrSize) {
     max_substr_len -= kCutStrSize;
   }

   const char* cut_by =
       (max_substr_len < kMaxOverCut ? str : str + max_len - kMaxOverCut);
   const char* cut_at = str + max_substr_len;
   while (!ascii_isspace(*cut_at) && cut_at > cut_by)
     cut_at--;

   if (cut_at == cut_by) {
     // No space was found
     return max_substr_len;
   } else {
     return cut_at-str;
   }
 }

 // ----------------------------------------------------------------------
 // ClipString
 //    Clip a string to a max length. We try to clip on a word boundary
 //    if this is possible. If the string is clipped, we append an
 //    ellipsis.
 // ----------------------------------------------------------------------

 void ClipString(char* str, int max_len) {
   int cut_at = ClipStringHelper(str, max_len, true);
   if (cut_at != -1) {
     if (max_len > kCutStrSize) {
       strcpy(str+cut_at, kCutStr);
     } else {
       strcpy(str+cut_at, "");
     }
   }
 }

 // ----------------------------------------------------------------------
 // ClipString
 //    Version of ClipString() that uses string instead of char*.
 // ----------------------------------------------------------------------
 void ClipString(string* full_str, int max_len) {
   int cut_at = ClipStringHelper(full_str->c_str(), max_len, true);
   if (cut_at != -1) {
     full_str->erase(cut_at);
     if (max_len > kCutStrSize) {
       full_str->append(kCutStr);
     }
   }
 }

 // ----------------------------------------------------------------------
 // SplitStringToIteratorAllowEmpty()
 //    Split a string using a character delimiter. Append the components
 //    to 'result'.  If there are consecutive delimiters, this function
 //    will return corresponding empty strings. The string is split into
 //    at most the specified number of pieces greedily. This means that the
 //    last piece may possibly be split further. To split into as many pieces
 //    as possible, specify 0 as the number of pieces.
 //
 //    If "full" is the empty string, yields an empty string as the only value.
 //
 //    If "pieces" is negative for some reason, it returns the whole string
 // ----------------------------------------------------------------------
 template <typename StringType, typename ITR>
 static inline
 void SplitStringToIteratorAllowEmpty(const StringType& full,
                                      const char* delim,
                                      int pieces,
                                      ITR& result) {
   string::size_type begin_index, end_index;
   begin_index = 0;

   for (int i = 0; (i < pieces-1) || (pieces == 0); i++) {
     end_index = full.find_first_of(delim, begin_index);
     if (end_index == string::npos) {
       *result++ = full.substr(begin_index);
       return;
     }
     *result++ = full.substr(begin_index, (end_index - begin_index));
     begin_index = end_index + 1;
   }
   *result++ = full.substr(begin_index);
 }

 void SplitStringIntoNPiecesAllowEmpty(const string& full,
                                       const char* delim,
                                       int pieces,
                                       vector<string>* result) {
   if (pieces == 0) {
     // No limit when pieces is 0.
     AppendTo(result, strings::Split(full, AnyOf(delim)));
   } else {
     // The input argument "pieces" specifies the max size that *result should
     // be. However, the argument to the Limit() delimiter is the max number of
     // delimiters, which should be one less than "pieces". Example: "a,b,c" has
     // 3 pieces and two comma delimiters.
     int limit = std::max(pieces - 1, 0);
     AppendTo(result, strings::Split(full, Limit(AnyOf(delim), limit)));
   }
 }

 // ----------------------------------------------------------------------
 // SplitStringAllowEmpty
 //    Split a string using a character delimiter. Append the components
 //    to 'result'.  If there are consecutive delimiters, this function
 //    will return corresponding empty strings.
 // ----------------------------------------------------------------------
 void SplitStringAllowEmpty(const string& full, const char* delim,
                            vector<string>* result) {
   AppendTo(result, strings::Split(full, AnyOf(delim)));
 }

 // If we know how much to allocate for a vector of strings, we can
 // allocate the vector<string> only once and directly to the right size.
 // This saves in between 33-66 % of memory space needed for the result,
 // and runs faster in the microbenchmarks.
 //
 // The reserve is only implemented for the single character delim.
 //
 // The implementation for counting is cut-and-pasted from
 // SplitStringToIteratorUsing. I could have written my own counting iterator,
 // and use the existing template function, but probably this is more clear
 // and more sure to get optimized to reasonable code.
 static int CalculateReserveForVector(const string& full, const char* delim) {
   int count = 0;
   if (delim[0] != '\0' && delim[1] == '\0') {
     // Optimize the common case where delim is a single character.
     char c = delim[0];
     const char* p = full.data();
     const char* end = p + full.size();
     while (p != end) {
       if (*p == c) {  // This could be optimized with hasless(v,1) trick.
         ++p;
       } else {
         while (++p != end && *p != c) {
           // Skip to the next occurence of the delimiter.
         }
         ++count;
       }
     }
   }
   return count;
 }

 // ----------------------------------------------------------------------
 // SplitStringUsing()
 // SplitStringToHashsetUsing()
 // SplitStringToSetUsing()
 // SplitStringToMapUsing()
 // SplitStringToHashmapUsing()
 //    Split a string using a character delimiter. Append the components
 //    to 'result'.
 //
 // Note: For multi-character delimiters, this routine will split on *ANY* of
 // the characters in the string, not the entire string as a single delimiter.
 // ----------------------------------------------------------------------
 template <typename StringType, typename ITR>
 static inline
 void SplitStringToIteratorUsing(const StringType& full,
                                 const char* delim,
                                 ITR& result) {
   // Optimize the common case where delim is a single character.
   if (delim[0] != '\0' && delim[1] == '\0') {
     char c = delim[0];
     const char* p = full.data();
     const char* end = p + full.size();
     while (p != end) {
       if (*p == c) {
         ++p;
       } else {
         const char* start = p;
         while (++p != end && *p != c) {
           // Skip to the next occurence of the delimiter.
         }
         *result++ = StringType(start, p - start);
       }
     }
     return;
   }

   string::size_type begin_index, end_index;
   begin_index = full.find_first_not_of(delim);
   while (begin_index != string::npos) {
     end_index = full.find_first_of(delim, begin_index);
     if (end_index == string::npos) {
       *result++ = full.substr(begin_index);
       return;
     }
     *result++ = full.substr(begin_index, (end_index - begin_index));
     begin_index = full.find_first_not_of(delim, end_index);
   }
 }

 void SplitStringUsing(const string& full,
                       const char* delim,
                       vector<string>* result) {
   result->reserve(result->size() + CalculateReserveForVector(full, delim));
   std::back_insert_iterator< vector<string> > it(*result);
   SplitStringToIteratorUsing(full, delim, it);
 }

 void SplitStringToHashsetUsing(const string& full, const char* delim,
                                unordered_set<string>* result) {
   AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty()));
 }

 void SplitStringToSetUsing(const string& full, const char* delim,
                            set<string>* result) {
   AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty()));
 }

 void SplitStringToMapUsing(const string& full, const char* delim,
                            map<string, string>* result) {
   AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty()));
 }

 void SplitStringToHashmapUsing(const string& full, const char* delim,
                                unordered_map<string, string>* result) {
   AppendTo(result, strings::Split(full, AnyOf(delim), strings::SkipEmpty()));
 }

 // ----------------------------------------------------------------------
 // SplitStringPieceToVector()
 //    Split a StringPiece into sub-StringPieces based on delim
 //    and appends the pieces to 'vec'.
 //    If omit empty strings is true, empty strings are omitted
 //    from the resulting vector.
 // ----------------------------------------------------------------------
 void SplitStringPieceToVector(const StringPiece& full,
                               const char* delim,
                               vector<StringPiece>* vec,
                               bool omit_empty_strings) {
   if (omit_empty_strings) {
     AppendTo(vec, strings::Split(full, AnyOf(delim), SkipEmpty()));
   } else {
     AppendTo(vec, strings::Split(full, AnyOf(delim)));
   }
 }

 // ----------------------------------------------------------------------
 // SplitUsing()
 //    Split a string using a string of delimiters, returning vector
 //    of strings. The original string is modified to insert nulls.
 // ----------------------------------------------------------------------

 vector<char*>* SplitUsing(char* full, const char* delim) {
   auto vec = new vector<char*>;
   SplitToVector(full, delim, vec, true);        // Omit empty strings
   return vec;
 }

 void SplitToVector(char* full, const char* delim, vector<char*>* vec,
                    bool omit_empty_strings) {
   char* next  = full;
   while ((next = gstrsep(&full, delim)) != nullptr) {
     if (omit_empty_strings && next[0] == '\0') continue;
     vec->push_back(next);
   }
   // Add last element (or full string if no delimeter found):
   if (full != nullptr) {
     vec->push_back(full);
   }
 }

 void SplitToVector(char* full, const char* delim, vector<const char*>* vec,
                    bool omit_empty_strings) {
   char* next  = full;
   while ((next = gstrsep(&full, delim)) != nullptr) {
     if (omit_empty_strings && next[0] == '\0') continue;
     vec->push_back(next);
   }
   // Add last element (or full string if no delimeter found):
   if (full != nullptr) {
     vec->push_back(full);
   }
 }

 // ----------------------------------------------------------------------
 // SplitOneStringToken()
 //   Mainly a stringified wrapper around strpbrk()
 // ----------------------------------------------------------------------
 string SplitOneStringToken(const char ** source, const char * delim) {
   assert(source);
   assert(delim);
   if (!*source) {
     return string();
   }
   const char * begin = *source;
   // Optimize the common case where delim is a single character.
   if (delim[0] != '\0' && delim[1] == '\0') {
     *source = strchr(*source, delim[0]);
   } else {
     *source = strpbrk(*source, delim);
   }
   if (*source) {
     return string(begin, (*source)++);
   } else {
     return string(begin);
   }
 }

 // ----------------------------------------------------------------------
 // SplitStringWithEscaping()
 // SplitStringWithEscapingAllowEmpty()
 // SplitStringWithEscapingToSet()
 // SplitStringWithWithEscapingToHashset()
 //   Split the string using the specified delimiters, taking escaping into
 //   account. '\' is not allowed as a delimiter.
 // ----------------------------------------------------------------------
 template <typename ITR>
 static inline
 void SplitStringWithEscapingToIterator(const string& src,
                                        const strings::CharSet& delimiters,
                                        const bool allow_empty,
                                        ITR* result) {
   CHECK(!delimiters.Test('\\')) << "\\ is not allowed as a delimiter.";
   CHECK(result);
   string part;

   for (uint32 i = 0; i < src.size(); ++i) {
     char current_char = src[i];
     if (delimiters.Test(current_char)) {
       // Push substrings when we encounter delimiters.
       if (allow_empty || !part.empty()) {
         *(*result)++ = part;
         part.clear();
       }
     } else if (current_char == '\\' && ++i < src.size()) {
       // If we see a backslash, the next delimiter or backslash is literal.
       current_char = src[i];
       if (current_char != '\\' && !delimiters.Test(current_char)) {
         // Don't honour unknown escape sequences: emit \f for \f.
         part.push_back('\\');
       }
       part.push_back(current_char);
     } else {
       // Otherwise, we have a normal character or trailing backslash.
       part.push_back(current_char);
     }
   }

   // Push the trailing part.
   if (allow_empty || !part.empty()) {
     *(*result)++ = part;
   }
 }

 void SplitStringWithEscaping(const string &full,
                              const strings::CharSet& delimiters,
                              vector<string> *result) {
   std::back_insert_iterator< vector<string> > it(*result);
   SplitStringWithEscapingToIterator(full, delimiters, false, &it);
 }

 void SplitStringWithEscapingAllowEmpty(const string &full,
                                        const strings::CharSet& delimiters,
                                        vector<string> *result) {
   std::back_insert_iterator< vector<string> > it(*result);
   SplitStringWithEscapingToIterator(full, delimiters, true, &it);
 }

 void SplitStringWithEscapingToSet(const string &full,
                                   const strings::CharSet& delimiters,
                                   set<string> *result) {
   std::insert_iterator< set<string> > it(*result, result->end());
   SplitStringWithEscapingToIterator(full, delimiters, false, &it);
 }

 void SplitStringWithEscapingToHashset(const string &full,
                                       const strings::CharSet& delimiters,
                                       unordered_set<string> *result) {
   std::insert_iterator< unordered_set<string> > it(*result, result->end());
   SplitStringWithEscapingToIterator(full, delimiters, false, &it);
 }


 // ----------------------------------------------------------------------
 // SplitOneIntToken()
 // SplitOneInt32Token()
 // SplitOneUint32Token()
 // SplitOneInt64Token()
 // SplitOneUint64Token()
 // SplitOneDoubleToken()
 // SplitOneFloatToken()
 // SplitOneDecimalIntToken()
 // SplitOneDecimalInt32Token()
 // SplitOneDecimalUint32Token()
 // SplitOneDecimalInt64Token()
 // SplitOneDecimalUint64Token()
 // SplitOneHexUint32Token()
 // SplitOneHexUint64Token()
 //   Mainly a stringified wrapper around strtol/strtoul/strtod
 // ----------------------------------------------------------------------
 // Curried functions for the macro below
 static inline long strto32_0(const char * source, char ** end) {
   return strto32(source, end, 0); }
 static inline unsigned long strtou32_0(const char * source, char ** end) {
   return strtou32(source, end, 0); }
 static inline int64 strto64_0(const char * source, char ** end) {
   return strto64(source, end, 0); }
 static inline uint64 strtou64_0(const char * source, char ** end) {
   return strtou64(source, end, 0); }
 static inline long strto32_10(const char * source, char ** end) {
   return strto32(source, end, 10); }
 static inline unsigned long strtou32_10(const char * source, char ** end) {
   return strtou32(source, end, 10); }
 static inline int64 strto64_10(const char * source, char ** end) {
   return strto64(source, end, 10); }
 static inline uint64 strtou64_10(const char * source, char ** end) {
   return strtou64(source, end, 10); }
 static inline uint32 strtou32_16(const char * source, char ** end) {
   return strtou32(source, end, 16); }
 static inline uint64 strtou64_16(const char * source, char ** end) {
   return strtou64(source, end, 16); }

 #define DEFINE_SPLIT_ONE_NUMBER_TOKEN(name, type, function) \
 bool SplitOne##name##Token(const char ** source, const char * delim, \
                            type * value) {                      \
   assert(source);                                               \
   assert(delim);                                                \
   assert(value);                                                \
   if (!*source)                                                 \
     return false;                                               \
   /* Parse int */                                               \
   char * end;                                                   \
   *value = function(*source, &end);                             \
   if (end == *source)                                           \
     return false; /* number not present at start of string */   \
   if (end[0] && !strchr(delim, end[0]))                         \
     return false; /* Garbage characters after int */            \
   /* Advance past token */                                      \
   if (*end != '\0')                                             \
     *source = const_cast<const char *>(end+1);                  \
   else                                                          \
     *source = NULL;                                             \
   return true;                                                  \
 }

 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int, int, strto32_0)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int32, int32, strto32_0)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Uint32, uint32, strtou32_0)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Int64, int64, strto64_0)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Uint64, uint64, strtou64_0)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Double, double, strtod)
 #ifdef _MSC_VER  // has no strtof()
 // Note: does an implicit cast to float.
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Float, float, strtod)
 #else
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(Float, float, strtof)
 #endif
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalInt, int, strto32_10)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalInt32, int32, strto32_10)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalUint32, uint32, strtou32_10)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalInt64, int64, strto64_10)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(DecimalUint64, uint64, strtou64_10)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint32, uint32, strtou32_16)
 DEFINE_SPLIT_ONE_NUMBER_TOKEN(HexUint64, uint64, strtou64_16)


 // ----------------------------------------------------------------------
 // SplitRange()
 //    Splits a string of the form "<from>-<to>".  Either or both can be
 //    missing.  A raw number (<to>) is interpreted as "<to>-".  Modifies
 //    parameters insofar as they're specified by the string.  RETURNS
 //    true iff the input is a well-formed range.  If it RETURNS false,
 //    from and to remain unchanged.  The range in rangestr should be
 //    terminated either by "\0" or by whitespace.
 // ----------------------------------------------------------------------

 #define EOS(ch)  ( (ch) == '\0' || ascii_isspace(ch) )
 bool SplitRange(const char* rangestr, int* from, int* to) {
   // We need to do the const-cast because strol takes a char**, not const char**
   char* val = const_cast<char*>(rangestr);
   if (val == nullptr || EOS(*val))  return true;  // we'll say nothingness is ok

   if ( val[0] == '-' && EOS(val[1]) )    // CASE 1: -
     return true;                         // nothing changes

   if ( val[0] == '-' ) {                 // CASE 2: -<i2>
     const int int2 = strto32(val+1, &val, 10);
     if ( !EOS(*val) )  return false;     // not a valid integer
     *to = int2;                          // only "to" changes
     return true;

   } else {
     const int int1 = strto32(val, &val, 10);
     if ( EOS(*val) || (*val == '-' && EOS(*(val+1))) ) {
       *from = int1;                      // CASE 3: <i1>, same as <i1>-
       return true;                       // only "from" changes
     } else if (*val != '-') {            // not a valid range
       return false;
     }
     const int int2 = strto32(val+1, &val, 10);
     if ( !EOS(*val) )  return false;     // not a valid integer
     *from = int1;                        // CASE 4: <i1>-<i2>
     *to = int2;
     return true;
   }
 }

 void SplitCSVLineWithDelimiter(char* line, char delimiter,
                                vector<char*>* cols) {
   char* end_of_line = line + strlen(line);
   char* end;
   char* start;

   for (; line < end_of_line; line++) {
     // Skip leading whitespace, unless said whitespace is the delimiter.
     while (ascii_isspace(*line) && *line != delimiter)
       ++line;

     if (*line == '"' && delimiter == ',') {     // Quoted value...
       start = ++line;
       end = start;
       for (; *line; line++) {
         if (*line == '"') {
           line++;
           if (*line != '"')  // [""] is an escaped ["]
             break;           // but just ["] is end of value
         }
         *end++ = *line;
       }
       // All characters after the closing quote and before the comma
       // are ignored.
       line = strchr(line, delimiter);
       if (!line) line = end_of_line;
     } else {
       start = line;
       line = strchr(line, delimiter);
       if (!line) line = end_of_line;
       // Skip all trailing whitespace, unless said whitespace is the delimiter.
       for (end = line; end > start; --end) {
         if (!ascii_isspace(end[-1]) || end[-1] == delimiter)
           break;
       }
     }
     const bool need_another_column =
       (*line == delimiter) && (line == end_of_line - 1);
     *end = '\0';
     cols->push_back(start);
     // If line was something like [paul,] (comma is the last character
     // and is not proceeded by whitespace or quote) then we are about
     // to eliminate the last column (which is empty). This would be
     // incorrect.
     if (need_another_column)
       cols->push_back(end);

     assert(*line == '\0' || *line == delimiter);
   }
 }

 void SplitCSVLine(char* line, vector<char*>* cols) {
   SplitCSVLineWithDelimiter(line, ',', cols);
 }

 void SplitCSVLineWithDelimiterForStrings(const string &line,
                                          char delimiter,
                                          vector<string> *cols) {
   // Unfortunately, the interface requires char* instead of const char*
   // which requires copying the string.
   char *cline = strndup_with_new(line.c_str(), line.size());
   vector<char *> v;
   SplitCSVLineWithDelimiter(cline, delimiter, &v);
   for (vector<char*>::const_iterator ci = v.begin(); ci != v.end(); ++ci) {
     cols->push_back(*ci);
   }
   delete[] cline;
 }

 // ----------------------------------------------------------------------
 namespace {

 // Helper class used by SplitStructuredLineInternal.
 class ClosingSymbolLookup {
  public:
   explicit ClosingSymbolLookup(const char* symbol_pairs)
       : closing_(),
         valid_closing_() {
     // Initialize the opening/closing arrays.
     for (const char* symbol = symbol_pairs; *symbol != 0; ++symbol) {
       unsigned char opening = *symbol;
       ++symbol;
       // If the string ends before the closing character has been found,
       // use the opening character as the closing character.
       unsigned char closing = *symbol != 0 ? *symbol : opening;
       closing_[opening] = closing;
       valid_closing_[closing] = true;
       if (*symbol == 0) break;
     }
   }

   // Returns the closing character corresponding to an opening one,
   // or 0 if the argument is not an opening character.
   char GetClosingChar(char opening) const {
     return closing_[static_cast<unsigned char>(opening)];
   }

   // Returns true if the argument is a closing character.
   bool IsClosing(char c) const {
     return valid_closing_[static_cast<unsigned char>(c)];
   }

  private:
   // Maps an opening character to its closing. If the entry contains 0,
   // the character is not in the opening set.
   char closing_[256];
   // Valid closing characters.
   bool valid_closing_[256];

   DISALLOW_COPY_AND_ASSIGN(ClosingSymbolLookup);
 };

 char* SplitStructuredLineInternal(char* line,
                                   char delimiter,
                                   const char* symbol_pairs,
                                   vector<char*>* cols,
                                   bool with_escapes) {
   ClosingSymbolLookup lookup(symbol_pairs);

   // Stack of symbols expected to close the current opened expressions.
   vector<char> expected_to_close;
   bool in_escape = false;

   CHECK(cols);
   cols->push_back(line);
   char* current;
   for (current = line; *current; ++current) {
     char c = *current;
     if (in_escape) {
       in_escape = false;
     } else if (with_escapes && c == '\\') {
       // We are escaping the next character. Note the escape still appears
       // in the output.
       in_escape = true;
     } else if (expected_to_close.empty() && c == delimiter) {
       // We don't have any open expression, this is a valid separator.
       *current = 0;
       cols->push_back(current + 1);
     } else if (!expected_to_close.empty() && c == expected_to_close.back()) {
       // Can we close the currently open expression?
       expected_to_close.pop_back();
     } else if (lookup.GetClosingChar(c)) {
       // If this is an opening symbol, we open a new expression and push
       // the expected closing symbol on the stack.
       expected_to_close.push_back(lookup.GetClosingChar(c));
     } else if (lookup.IsClosing(c)) {
       // Error: mismatched closing symbol.
       return current;
     }
   }
   if (!expected_to_close.empty()) {
     return current;  // Missing closing symbol(s)
   }
   return nullptr;  // Success
 }

 bool SplitStructuredLineInternal(StringPiece line,
                                  char delimiter,
                                  const char* symbol_pairs,
                                  vector<StringPiece>* cols,
                                  bool with_escapes) {
   ClosingSymbolLookup lookup(symbol_pairs);

   // Stack of symbols expected to close the current opened expressions.
   vector<char> expected_to_close;
   bool in_escape = false;

   CHECK_NOTNULL(cols);
   cols->push_back(line);
   for (int i = 0; i < line.size(); ++i) {
     char c = line[i];
     if (in_escape) {
       in_escape = false;
     } else if (with_escapes && c == '\\') {
       // We are escaping the next character. Note the escape still appears
       // in the output.
       in_escape = true;
     } else if (expected_to_close.empty() && c == delimiter) {
       // We don't have any open expression, this is a valid separator.
       cols->back().remove_suffix(line.size() - i);
       cols->push_back(StringPiece(line, i + 1));
     } else if (!expected_to_close.empty() && c == expected_to_close.back()) {
       // Can we close the currently open expression?
       expected_to_close.pop_back();
     } else if (lookup.GetClosingChar(c)) {
       // If this is an opening symbol, we open a new expression and push
       // the expected closing symbol on the stack.
       expected_to_close.push_back(lookup.GetClosingChar(c));
     } else if (lookup.IsClosing(c)) {
       // Error: mismatched closing symbol.
       return false;
     }
   }
   if (!expected_to_close.empty()) {
     return false;  // Missing closing symbol(s)
   }
   return true;  // Success
 }

 }  // anonymous namespace

 char* SplitStructuredLine(char* line,
                           char delimiter,
                           const char *symbol_pairs,
                           vector<char*>* cols) {
   return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
                                      false);
 }

 bool SplitStructuredLine(StringPiece line,
                          char delimiter,
                          const char* symbol_pairs,
                          vector<StringPiece>* cols) {
   return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
                                      false);
 }

 char* SplitStructuredLineWithEscapes(char* line,
                                      char delimiter,
                                      const char *symbol_pairs,
                                      vector<char*>* cols) {
   return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
                                      true);
 }

 bool SplitStructuredLineWithEscapes(StringPiece line,
                                      char delimiter,
                                      const char* symbol_pairs,
                                      vector<StringPiece>* cols) {
   return SplitStructuredLineInternal(line, delimiter, symbol_pairs, cols,
                                      true);
 }


 // ----------------------------------------------------------------------
 // SplitStringIntoKeyValues()
 // ----------------------------------------------------------------------
 bool SplitStringIntoKeyValues(const string& line,
                               const string& key_value_delimiters,
                               const string& value_value_delimiters,
                               string *key, vector<string> *values) {
   key->clear();
   values->clear();

   // find the key string
   size_t end_key_pos = line.find_first_of(key_value_delimiters);
   if (end_key_pos == string::npos) {
     VLOG(1) << "cannot parse key from line: " << line;
     return false;    // no key
   }
   key->assign(line, 0, end_key_pos);

   // find the values string
   string remains(line, end_key_pos, line.size() - end_key_pos);
   size_t begin_values_pos = remains.find_first_not_of(key_value_delimiters);
   if (begin_values_pos == string::npos) {
     VLOG(1) << "cannot parse value from line: " << line;
     return false;   // no value
   }
   string values_string(remains,
                        begin_values_pos,
                        remains.size() - begin_values_pos);

   // construct the values vector
   if (value_value_delimiters.empty()) {  // one value
     values->push_back(values_string);
   } else {                               // multiple values
     SplitStringUsing(values_string, value_value_delimiters.c_str(), values);
     if (values->size() < 1) {
       VLOG(1) << "cannot parse value from line: " << line;
       return false;  // no value
     }
   }
   return true;
 }

 bool SplitStringIntoKeyValuePairs(const string& line,
                                   const string& key_value_delimiters,
                                   const string& key_value_pair_delimiters,
                                   vector<pair<string, string> >* kv_pairs) {
   kv_pairs->clear();

   vector<string> pairs;
   SplitStringUsing(line, key_value_pair_delimiters.c_str(), &pairs);

   bool success = true;
   for (const auto& pair : pairs) {
     string key;
     vector<string> value;
     if (!SplitStringIntoKeyValues(pair,
                                   key_value_delimiters,
                                   "", &key, &value)) {
       // Don't return here, to allow for keys without associated
       // values; just record that our split failed.
       success = false;
     }
     // we expect atmost one value because we passed in an empty vsep to
     // SplitStringIntoKeyValues
     DCHECK_LE(value.size(), 1);
     kv_pairs->push_back(make_pair(key, value.empty()? "" : value[0]));
   }
   return success;
 }

 // ----------------------------------------------------------------------
 // SplitLeadingDec32Values()
 // SplitLeadingDec64Values()
 //    A simple parser for space-separated decimal int32/int64 values.
 //    Appends parsed integers to the end of the result vector, stopping
 //    at the first unparsable spot.  Skips past leading and repeated
 //    whitespace (does not consume trailing whitespace), and returns
 //    a pointer beyond the last character parsed.
 // --------------------------------------------------------------------
 const char* SplitLeadingDec32Values(const char *str, vector<int32> *result) {
   for (;;) {
     char *end = nullptr;
     long value = strtol(str, &end, 10);
     if (end == str)
       break;
     // Limit long values to int32 min/max.  Needed for lp64.
     if (value > numeric_limits<int32>::max()) {
       value = numeric_limits<int32>::max();
     } else if (value < numeric_limits<int32>::min()) {
       value = numeric_limits<int32>::min();
     }
     result->push_back(value);
     str = end;
     if (!ascii_isspace(*end))
       break;
   }
   return str;
 }

 const char* SplitLeadingDec64Values(const char *str, vector<int64> *result) {
   for (;;) {
     char *end = nullptr;
     const int64 value = strtoll(str, &end, 10);
     if (end == str)
       break;
     result->push_back(value);
     str = end;
     if (!ascii_isspace(*end))
       break;
   }
   return str;
 }

 void SplitStringToLines(const char* full,
                         int max_len,
                         int num_lines,
                         vector<string>* result) {
   if (max_len <= 0) {
     return;
   }
   int pos = 0;
   for (int i = 0; (i < num_lines || num_lines <= 0); i++) {
     int cut_at = ClipStringHelper(full+pos, max_len, (i == num_lines - 1));
     if (cut_at == -1) {
       result->push_back(string(full+pos));
       return;
     }
     result->push_back(string(full+pos, cut_at));
     if (i == num_lines - 1 && max_len > kCutStrSize) {
       result->at(i).append(kCutStr);
     }
     pos += cut_at;
   }
 }