src/kudu/gutil/strings/escaping.h - kudu - Git at Google

 // Copyright 2006 Google Inc. All Rights Reserved.
 // Authors: Numerous. Principal maintainers are csilvers and zunger.
 //
 // This is a grab-bag file for string utilities involved in escaping and
 // unescaping strings in various ways. Who knew there were so many?
 //
 // NOTE: Although the functions declared here have been imported into
 // the global namespace, the using statements are slated for removal.
 // Do not refer to these symbols without properly namespace-qualifying
 // them with "strings::". Of course you may also use "using" statements
 // within a .cc file.
 //
 // There are more escaping functions in:
 //   webutil/html/tagutils.h (Escaping strings for HTML, PRE, JavaScript, etc.)
 //   webutil/url/url.h (Escaping for URL's, both RFC-2396 and other methods)
 //   template/template_modifiers.h (All sorts of stuff)
 //   util/regex/re2/re2.h (Escaping for literals within regular expressions
 //                         - see RE2::QuoteMeta).
 // And probably many more places, as well.

 #ifndef STRINGS_ESCAPING_H_
 #define STRINGS_ESCAPING_H_

 #include <cstddef>
 #include <ostream>
 #include <string>
 #include <vector>

 #include <glog/logging.h>

 #include "kudu/gutil/strings/ascii_ctype.h"
 #include "kudu/gutil/strings/charset.h"
 #include "kudu/gutil/strings/stringpiece.h"

 namespace strings {

 // ----------------------------------------------------------------------
 // EscapeStrForCSV()
 //    Escapes the quotes in 'src' by doubling them. This is necessary
 //    for generating CSV files (see SplitCSVLine).
 //    Returns the number of characters written into dest (not counting
 //    the \0) or -1 if there was insufficient space.
 //
 //    Example: [some "string" to test] --> [some ""string"" to test]
 // ----------------------------------------------------------------------
 int EscapeStrForCSV(const char* src, char* dest, int dest_len);

 // ----------------------------------------------------------------------
 // UnescapeCEscapeSequences()
 //    Copies "source" to "dest", rewriting C-style escape sequences
 //    -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
 //    equivalents.  "dest" must be sufficiently large to hold all
 //    the characters in the rewritten string (i.e. at least as large
 //    as strlen(source) + 1 should be safe, since the replacements
 //    are always shorter than the original escaped sequences).  It's
 //    safe for source and dest to be the same.  RETURNS the length
 //    of dest.
 //
 //    It allows hex sequences \xhh, or generally \xhhhhh with an
 //    arbitrary number of hex digits, but all of them together must
 //    specify a value of a single byte (e.g. \x0045 is equivalent
 //    to \x45, and \x1234 is erroneous). If the value is too large,
 //    it is truncated to 8 bits and an error is set. This is also
 //    true of octal values that exceed 0xff.
 //
 //    It also allows escape sequences of the form \uhhhh (exactly four
 //    hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
 //    hex digits, upper or lower case) to specify a Unicode code
 //    point. The dest array will contain the UTF8-encoded version of
 //    that code-point (e.g., if source contains \u2019, then dest will
 //    contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
 //    transformation, use UniLib::UTF8EscapeString
 //    (util/utf8/public/unilib.h), not CEscapeString.
 //
 //    Errors: In the first form of the call, errors are reported with
 //    LOG(ERROR). The same is true for the second form of the call if
 //    the pointer to the string vector is NULL; otherwise, error
 //    messages are stored in the vector. In either case, the effect on
 //    the dest array is not defined, but rest of the source will be
 //    processed.
 //
 //    *** DEPRECATED: Use CUnescape() in new code ***
 //    ----------------------------------------------------------------------
 int UnescapeCEscapeSequences(const char* source, char* dest);
 int UnescapeCEscapeSequences(const char* source, char* dest,
                              std::vector<std::string>* errors);

 // ----------------------------------------------------------------------
 // UnescapeCEscapeString()
 //    This does the same thing as UnescapeCEscapeSequences, but creates
 //    a new string. The caller does not need to worry about allocating
 //    a dest buffer. This should be used for non performance critical
 //    tasks such as printing debug messages. It is safe for src and dest
 //    to be the same.
 //
 //    The second call stores its errors in a supplied string vector.
 //    If the string vector pointer is NULL, it reports the errors with LOG().
 //
 //    In the first and second calls, the length of dest is returned. In the
 //    the third call, the new string is returned.
 //
 //    *** DEPRECATED: Use CUnescape() in new code ***
 // ----------------------------------------------------------------------
 int UnescapeCEscapeString(const std::string& src, std::string* dest);
 int UnescapeCEscapeString(const std::string& src, std::string* dest,
                           std::vector<std::string>* errors);
 std::string UnescapeCEscapeString(const std::string& src);

 // ----------------------------------------------------------------------
 // CUnescape()
 //    Copies "source" to "dest", rewriting C-style escape sequences
 //    -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
 //    equivalents.  "dest" must be sufficiently large to hold all
 //    the characters in the rewritten string (i.e. at least as large
 //    as source.size() should be safe, since the replacements
 //    are never longer than the original escaped sequences).  It's
 //    safe for source and dest to be the same.  RETURNS true if
 //    conversion was successful, false otherwise. Stores the size of
 //    the result in 'dest_len'.
 //
 //    It allows hex sequences \xhh, or generally \xhhhhh with an
 //    arbitrary number of hex digits, but all of them together must
 //    specify a value of a single byte (e.g. \x0045 is equivalent
 //    to \x45, and \x1234 is erroneous). If the value is too large,
 //    an error is set. This is also true of octal values that exceed 0xff.
 //
 //    It also allows escape sequences of the form \uhhhh (exactly four
 //    hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
 //    hex digits, upper or lower case) to specify a Unicode code
 //    point. The dest array will contain the UTF8-encoded version of
 //    that code-point (e.g., if source contains \u2019, then dest will
 //    contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
 //    transformation, use UniLib::UTF8EscapeString
 //    (util/utf8/public/unilib.h), not CEscapeString.
 //
 //    Errors: Sets the description of the first encountered error in
 //    'error'. To disable error reporting, set 'error' to NULL.
 // ----------------------------------------------------------------------
 bool CUnescape(const StringPiece& source, char* dest, int* dest_len,
                std::string* error);

 bool CUnescape(const StringPiece& source, std::string* dest, std::string* error);

 // A version with no error reporting.
 inline bool CUnescape(const StringPiece& source, std::string* dest) {
   return CUnescape(source, dest, nullptr);
 }

 // A version which CHECK fails if the string can not be unescaped.
 inline std::string CUnescapeOrDie(const StringPiece& source) {
   std::string dest;
   std::string err;
   CHECK(CUnescape(source, &dest, &err)) << err;
   return dest;
 }

 // ----------------------------------------------------------------------
 // CUnescapeForNullTerminatedString()
 //
 // This has the same behavior as CUnescape, except that each octal, hex,
 // or Unicode escape sequence that resolves to a null character ('\0')
 // is left in its original escaped form.  The result is a
 // display-formatted string that can be interpreted as a null-terminated
 // const char* and will not be cut short if it contains embedded null
 // characters.
 //
 // ----------------------------------------------------------------------

 bool CUnescapeForNullTerminatedString(const StringPiece& source,
                                       char* dest,
                                       int* dest_len,
                                       std::string* error);

 bool CUnescapeForNullTerminatedString(const StringPiece& source,
                                       std::string* dest,
                                       std::string* error);

 // A version with no error reporting.
 inline bool CUnescapeForNullTerminatedString(const StringPiece& source,
                                              std::string* dest) {
   return CUnescapeForNullTerminatedString(source, dest, NULL);
 }

 // ----------------------------------------------------------------------
 // CEscapeString()
 // CHexEscapeString()
 // Utf8SafeCEscapeString()
 // Utf8SafeCHexEscapeString()
 //    Copies 'src' to 'dest', escaping dangerous characters using
 //    C-style escape sequences. This is very useful for preparing query
 //    flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
 //    hexadecimal rather than octal sequences. The 'Utf8Safe' version
 //    doesn't touch UTF-8 bytes.
 //    Returns the number of bytes written to 'dest' (not including the \0)
 //    or -1 if there was insufficient space.
 //
 //    Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped.
 // ----------------------------------------------------------------------
 int CEscapeString(const char* src, int src_len, char* dest, int dest_len);
 int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len);
 int Utf8SafeCEscapeString(const char* src, int src_len, char* dest,
                           int dest_len);
 int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest,
                              int dest_len);

 // ----------------------------------------------------------------------
 // CEscape()
 // CHexEscape()
 // Utf8SafeCEscape()
 // Utf8SafeCHexEscape()
 //    More convenient form of CEscapeString: returns result as a "string".
 //    This version is slower than CEscapeString() because it does more
 //    allocation.  However, it is much more convenient to use in
 //    non-speed-critical code like logging messages etc.
 // ----------------------------------------------------------------------
 std::string CEscape(const StringPiece& src);
 std::string CHexEscape(const StringPiece& src);
 std::string Utf8SafeCEscape(const StringPiece& src);
 std::string Utf8SafeCHexEscape(const StringPiece& src);

 // ----------------------------------------------------------------------
 // BackslashEscape()
 //    Given a string and a list of characters to escape, replace any
 //    instance of one of those characters with \ + that character. For
 //    example, when exporting maps to /varz, label values need to have
 //    all dots escaped. Appends the result to dest.
 // BackslashUnescape()
 //    Replace \ + any of the indicated "unescape me" characters with just
 //    that character. Appends the result to dest.
 //
 //    IMPORTANT:
 //    This function does not escape \ by default, so if you do not include
 //    it in the chars to escape you will most certainly get an undesirable
 //    result. That is, it won't be a reversible operation:
 //      string src = "foo\\:bar";
 //      BackslashUnescape(BackslashEscape(src, ":"), ":") == "foo\\\\:bar"
 //    On the other hand, for all strings "src", the following is true:
 //      BackslashUnescape(BackslashEscape(src, ":\\"), ":\\") == src
 // ----------------------------------------------------------------------
 void BackslashEscape(const StringPiece& src,
                      const strings::CharSet& to_escape,
                      std::string* dest);
 void BackslashUnescape(const StringPiece& src,
                        const strings::CharSet& to_unescape,
                        std::string* dest);

 inline std::string BackslashEscape(const StringPiece& src,
                               const strings::CharSet& to_escape) {
   std::string s;
   BackslashEscape(src, to_escape, &s);
   return s;
 }

 inline std::string BackslashUnescape(const StringPiece& src,
                                 const strings::CharSet& to_unescape) {
   std::string s;
   BackslashUnescape(src, to_unescape, &s);
   return s;
 }

 // ----------------------------------------------------------------------
 // QuotedPrintableUnescape()
 //    Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for
 //    more details, only briefly implemented. But from the web...
 //    Quoted-printable is an encoding method defined in the MIME
 //    standard. It is used primarily to encode 8-bit text (such as text
 //    that includes foreign characters) into 7-bit US ASCII, creating a
 //    document that is mostly readable by humans, even in its encoded
 //    form. All MIME compliant applications can decode quoted-printable
 //    text, though they may not necessarily be able to properly display the
 //    document as it was originally intended. As quoted-printable encoding
 //    is implemented most commonly, printable ASCII characters (values 33
 //    through 126, excluding 61), tabs and spaces that do not appear at the
 //    end of lines, and end-of-line characters are not encoded. Other
 //    characters are represented by an equal sign (=) immediately followed
 //    by that character's hexadecimal value. Lines that are longer than 76
 //    characters are shortened by line breaks, with the equal sign marking
 //    where the breaks occurred.
 //
 //    Note that QuotedPrintableUnescape is different from 'Q'-encoding as
 //    defined in rfc2047. In particular, This does not treat '_'s as spaces.
 //
 //    See QEncodingUnescape().
 //
 //    Copies "src" to "dest", rewriting quoted printable escape sequences
 //    =XX to their ASCII equivalents. src is not null terminated, instead
 //    specify len. I recommend that slen<szdest, but we honor szdest
 //    anyway.
 //    RETURNS the length of dest.
 // ----------------------------------------------------------------------
 int QuotedPrintableUnescape(const char* src, int slen, char* dest, int szdest);

 // ----------------------------------------------------------------------
 // QEncodingUnescape()
 //    This is very similar to QuotedPrintableUnescape except that we convert
 //    '_'s into spaces. (See RFC 2047)
 //    http://www.faqs.org/rfcs/rfc2047.html.
 //
 //    Copies "src" to "dest", rewriting q-encoding escape sequences
 //    =XX to their ASCII equivalents. src is not null terminated, instead
 //    specify len. I recommend that slen<szdest, but we honour szdest
 //    anyway.
 //    RETURNS the length of dest.
 // ----------------------------------------------------------------------
 int QEncodingUnescape(const char* src, int slen, char* dest, int szdest);

 // ----------------------------------------------------------------------
 // Base64Unescape()
 // WebSafeBase64Unescape()
 //    Copies "src" to "dest", where src is in base64 and is written to its
 //    ASCII equivalents. src is not null terminated, instead specify len.
 //    I recommend that slen<szdest, but we honor szdest anyway.
 //    RETURNS the length of dest, or -1 if src contains invalid chars.
 //    The WebSafe variation use '-' instead of '+' and '_' instead of '/'.
 //    The variations that store into a string clear the string first, and
 //    return false (with dest empty) if src contains invalid chars; for
 //    these versions src and dest must be different strings.
 // ----------------------------------------------------------------------
 int Base64Unescape(const char* src, int slen, char* dest, int szdest);
 bool Base64Unescape(const char* src, int slen, std::string* dest);
 inline bool Base64Unescape(const std::string& src, std::string* dest) {
   return Base64Unescape(src.data(), src.size(), dest);
 }

 int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest);
 bool WebSafeBase64Unescape(const char* src, int slen, std::string* dest);
 inline bool WebSafeBase64Unescape(const std::string& src, std::string* dest) {
   return WebSafeBase64Unescape(src.data(), src.size(), dest);
 }

 // Return the length to use for the output buffer given to the base64 escape
 // routines. Make sure to use the same value for do_padding in both.
 // This function may return incorrect results if given input_len values that
 // are extremely high, which should happen rarely.
 int CalculateBase64EscapedLen(int input_len, bool do_padding);
 // Use this version when calling Base64Escape without a do_padding arg.
 int CalculateBase64EscapedLen(int input_len);

 // ----------------------------------------------------------------------
 // Base64Escape()
 // WebSafeBase64Escape()
 //    Encode "src" to "dest" using base64 encoding.
 //    src is not null terminated, instead specify len.
 //    'dest' should have at least CalculateBase64EscapedLen() length.
 //    RETURNS the length of dest.
 //    The WebSafe variation use '-' instead of '+' and '_' instead of '/'
 //    so that we can place the out in the URL or cookies without having
 //    to escape them.  It also has an extra parameter "do_padding",
 //    which when set to false will prevent padding with "=".
 // ----------------------------------------------------------------------
 int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest);
 int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest,
                         int szdest, bool do_padding);
 // Encode src into dest with padding.
 void Base64Escape(const std::string& src, std::string* dest);
 // Encode src into dest web-safely without padding.
 void WebSafeBase64Escape(const std::string& src, std::string* dest);
 // Encode src into dest web-safely with padding.
 void WebSafeBase64EscapeWithPadding(const std::string& src, std::string* dest);

 void Base64Escape(const unsigned char* src, int szsrc,
                   std::string* dest, bool do_padding);
 void WebSafeBase64Escape(const unsigned char* src, int szsrc,
                          std::string* dest, bool do_padding);

 // ----------------------------------------------------------------------
 // Base32Unescape()
 //    Copies "src" to "dest", where src is in base32 and is written to its
 //    ASCII equivalents. src is not null terminated, instead specify len.
 //    RETURNS the length of dest, or -1 if src contains invalid chars.
 // ----------------------------------------------------------------------
 int Base32Unescape(const char* src, int slen, char* dest, int szdest);
 bool Base32Unescape(const char* src, int slen, std::string* dest);
 inline bool Base32Unescape(const std::string& src, std::string* dest) {
   return Base32Unescape(src.data(), src.size(), dest);
 }

 // ----------------------------------------------------------------------
 // Base32Escape()
 //    Encode "src" to "dest" using base32 encoding.
 //    src is not null terminated, instead specify len.
 //    'dest' should have at least CalculateBase32EscapedLen() length.
 //    RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
 //    too small to fit the fully encoded result.  'dest' is padded with '='.
 //
 //    Note that this is "Base 32 Encoding" from RFC 4648 section 6.
 // ----------------------------------------------------------------------
 int Base32Escape(const unsigned char* src, size_t szsrc,
                  char* dest, size_t szdest);
 bool Base32Escape(const std::string& src, std::string* dest);

 // ----------------------------------------------------------------------
 // Base32HexEscape()
 //    Encode "src" to "dest" using base32hex encoding.
 //    src is not null terminated, instead specify len.
 //    'dest' should have at least CalculateBase32EscapedLen() length.
 //    RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
 //    too small to fit the fully encoded result.  'dest' is padded with '='.
 //
 //    Note that this is "Base 32 Encoding with Extended Hex Alphabet"
 //    from RFC 4648 section 7.
 // ----------------------------------------------------------------------
 int Base32HexEscape(const unsigned char* src, size_t szsrc,
                     char* dest, size_t szdest);
 bool Base32HexEscape(const std::string& src, std::string* dest);

 // Return the length to use for the output buffer given to the base32 escape
 // routines.  This function may return incorrect results if given input_len
 // values that are extremely high, which should happen rarely.
 int CalculateBase32EscapedLen(size_t input_len);

 // ----------------------------------------------------------------------
 // EightBase32DigitsToTenHexDigits()
 // TenHexDigitsToEightBase32Digits()
 //    Convert base32 to and from hex.
 //
 //   for EightBase32DigitsToTenHexDigits():
 //     *in must point to 8 base32 digits.
 //     *out must point to 10 bytes.
 //
 //   for TenHexDigitsToEightBase32Digits():
 //     *in must point to 10 hex digits.
 //     *out must point to 8 bytes.
 //
 //   Note that the Base64 functions above are different. They convert base64
 //   to and from binary data. We convert to and from string representations
 //   of hex. They deal with arbitrary lengths and we deal with single,
 //   whole base32 quanta.
 //
 //   See RFC3548 at http://www.ietf.org/rfc/rfc3548.txt
 //   for details on base32.
 // ----------------------------------------------------------------------
 void EightBase32DigitsToTenHexDigits(const char* in, char* out);
 void TenHexDigitsToEightBase32Digits(const char* in, char* out);

 // ----------------------------------------------------------------------
 // EightBase32DigitsToFiveBytes()
 // FiveBytesToEightBase32Digits()
 //   Convert base32 to and from binary
 //
 //   for EightBase32DigitsToTenHexDigits():
 //     *in must point to 8 base32 digits.
 //     *out must point to 5 bytes.
 //
 //   for TenHexDigitsToEightBase32Digits():
 //     *in must point to 5 bytes.
 //     *out must point to 8 bytes.
 //
 //   Note that the Base64 functions above are different.  They deal with
 //   arbitrary lengths and we deal with single, whole base32 quanta.
 // ----------------------------------------------------------------------
 void EightBase32DigitsToFiveBytes(const char* in, unsigned char* bytes_out);
 void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out);

 // ----------------------------------------------------------------------
 // EscapeFileName()
 // UnescapeFileName()
 //   Utility functions to (un)escape strings to make them suitable for use in
 //   filenames. Characters not in [a-zA-Z0-9-_.] will be escaped into %XX.
 //   E.g: "Hello, world!" will be escaped as "Hello%2c%20world%21"
 //
 //   NB that this function escapes slashes, so the output will be a flat
 //   filename and will not keep the directory structure. Slashes are replaced
 //   with '~', instead of a %XX sequence to make it easier for people to
 //   understand the escaped form when the original string is a file path.
 //
 //   WARNING: filenames produced by these functions may not be compatible with
 //   Colossus FS. In particular, the '%' character has a special meaning in
 //   CFS.
 //
 //   The versions that receive a string for the output will append to it.
 // ----------------------------------------------------------------------
 void EscapeFileName(const StringPiece& src, std::string* dst);
 void UnescapeFileName(const StringPiece& src, std::string* dst);
 inline std::string EscapeFileName(const StringPiece& src) {
   std::string r;
   EscapeFileName(src, &r);
   return r;
 }
 inline std::string UnescapeFileName(const StringPiece& src) {
   std::string r;
   UnescapeFileName(src, &r);
   return r;
 }

 // ----------------------------------------------------------------------
 // Here are a couple utility methods to change ints to hex chars & back
 // ----------------------------------------------------------------------

 inline int int_to_hex_digit(int i) {
   DCHECK((i >= 0) && (i <= 15));
   return ((i < 10) ? (i + '0') : ((i - 10) + 'A'));
 }

 inline int int_to_lower_hex_digit(int i) {
   DCHECK((i >= 0) && (i <= 15));
   return (i < 10) ? (i + '0') : ((i - 10) + 'a');
 }

 inline int hex_digit_to_int(char c) {
   /* Assume ASCII. */
   DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
   DCHECK(ascii_isxdigit(c));
   int x = static_cast<unsigned char>(c);
   if (x > '9') {
     x += 9;
   }
   return x & 0xf;
 }

 // ----------------------------------------------------------------------
 // a2b_hex()
 //  Description: Ascii-to-Binary hex conversion.  This converts
 //         2*'num' hexadecimal characters to 'num' binary data.
 //        Return value: 'num' bytes of binary data (via the 'to' argument)
 // ----------------------------------------------------------------------
 void a2b_hex(const char* from, unsigned char* to, int num);
 void a2b_hex(const char* from, char* to, int num);
 void a2b_hex(const char* from, std::string* to, int num);
 std::string a2b_hex(const std::string& a);

 // ----------------------------------------------------------------------
 // a2b_bin()
 //  Description: Ascii-to-Binary binary conversion.  This converts
 //        a.size() binary characters (ascii '0' or '1') to
 //        ceil(a.size()/8) bytes of binary data.  The first character is
 //        considered the most significant if byte_order_msb is set.  a is
 //        considered to be padded with trailing 0s if its size is not a
 //        multiple of 8.
 //        Return value: ceil(a.size()/8) bytes of binary data
 // ----------------------------------------------------------------------
 std::string a2b_bin(const std::string& a, bool byte_order_msb);

 // ----------------------------------------------------------------------
 // b2a_hex()
 //  Description: Binary-to-Ascii hex conversion.  This converts
 //   'num' bytes of binary to a 2*'num'-character hexadecimal representation
 //    Return value: 2*'num' characters of ascii text (via the 'to' argument)
 // ----------------------------------------------------------------------
 void b2a_hex(const unsigned char* from, char* to, int num);
 void b2a_hex(const unsigned char* from, std::string* to, int num);

 // ----------------------------------------------------------------------
 // b2a_hex()
 //  Description: Binary-to-Ascii hex conversion.  This converts
 //   'num' bytes of binary to a 2*'num'-character hexadecimal representation
 //    Return value: 2*'num' characters of ascii string
 // ----------------------------------------------------------------------
 std::string b2a_hex(const char* from, int num);
 std::string b2a_hex(const StringPiece& b);

 // ----------------------------------------------------------------------
 // b2a_bin()
 //  Description: Binary-to-Ascii binary conversion.  This converts
 //   b.size() bytes of binary to a 8*b.size() character representation
 //   (ascii '0' or '1').  The highest order bit in each byte is returned
 //   first in the string if byte_order_msb is set.
 //   Return value: 8*b.size() characters of ascii text
 // ----------------------------------------------------------------------
 std::string b2a_bin(const std::string& b, bool byte_order_msb);

 // ----------------------------------------------------------------------
 // ShellEscape
 //   Make a shell command argument from a string.
 //   Returns a Bourne shell string literal such that, once the shell finishes
 //   expanding the argument, the argument passed on to the program being
 //   run will be the same as whatever you passed in.
 //   NOTE: This is "ported" from python2.2's commands.mkarg(); it should be
 //         safe for Bourne shell syntax (i.e. sh, bash), but mileage may vary
 //         with other shells.
 // ----------------------------------------------------------------------
 std::string ShellEscape(StringPiece src);

 // Runs ShellEscape() on the arguments, concatenates them with a space, and
 // returns the resulting string.
 template <class InputIterator>
 std::string ShellEscapeCommandLine(InputIterator begin, const InputIterator& end) {
   std::string result;
   for (; begin != end; ++begin) {
     if (!result.empty()) result.append(" ");
     result.append(ShellEscape(*begin));
   }
   return result;
 }

 // Reads at most bytes_to_read from binary_string and writes it to
 // ascii_string in lower case hex.
 void ByteStringToAscii(const std::string& binary_string, int bytes_to_read,
                        std::string* ascii_string);

 inline std::string ByteStringToAscii(const std::string& binary_string,
                                 int bytes_to_read) {
   std::string result;
   ByteStringToAscii(binary_string, bytes_to_read, &result);
   return result;
 }

 // Converts the hex from ascii_string into binary data and
 // writes the binary data into binary_string.
 // Empty input successfully converts to empty output.
 // Returns false and may modify output if it is
 // unable to parse the hex string.
 bool ByteStringFromAscii(const std::string& ascii_string, std::string* binary_string);

 // Clean up a multi-line string to conform to Unix line endings.
 // Reads from src and appends to dst, so usually dst should be empty.
 // If there is no line ending at the end of a non-empty string, it can
 // be added automatically.
 //
 // Four different types of input are correctly handled:
 //
 //   - Unix/Linux files: line ending is LF, pass through unchanged
 //
 //   - DOS/Windows files: line ending is CRLF: convert to LF
 //
 //   - Legacy Mac files: line ending is CR: convert to LF
 //
 //   - Garbled files: random line endings, covert gracefully
 //                    lonely CR, lonely LF, CRLF: convert to LF
 //
 //   @param src The multi-line string to convert
 //   @param dst The converted string is appended to this string
 //   @param auto_end_last_line Automatically terminate the last line
 //
 //   Limitations:
 //
 //     This does not do the right thing for CRCRLF files created by
 //     broken programs that do another Unix->DOS conversion on files
 //     that are already in CRLF format.
 void CleanStringLineEndings(const std::string& src, std::string* dst,
                             bool auto_end_last_line);

 // Same as above, but transforms the argument in place.
 void CleanStringLineEndings(std::string* str, bool auto_end_last_line);

 }  // namespace strings

 #endif  // STRINGS_ESCAPING_H_