blob: 3b0fb4d949e5736873ab9b7a4746da8eaff0e080 [file] [log] [blame]
// Copyright 2006 Google Inc. All Rights Reserved.
// Authors: Numerous. Principal maintainers are csilvers and zunger.
//
// This is a grab-bag file for string utilities involved in escaping and
// unescaping strings in various ways. Who knew there were so many?
//
// NOTE: Although the functions declared here have been imported into
// the global namespace, the using statements are slated for removal.
// Do not refer to these symbols without properly namespace-qualifying
// them with "strings::". Of course you may also use "using" statements
// within a .cc file.
//
// There are more escaping functions in:
// webutil/html/tagutils.h (Escaping strings for HTML, PRE, JavaScript, etc.)
// webutil/url/url.h (Escaping for URL's, both RFC-2396 and other methods)
// template/template_modifiers.h (All sorts of stuff)
// util/regex/re2/re2.h (Escaping for literals within regular expressions
// - see RE2::QuoteMeta).
// And probably many more places, as well.
#ifndef STRINGS_ESCAPING_H_
#define STRINGS_ESCAPING_H_
#include <cstddef>
#include <ostream>
#include <string>
#include <vector>
#include <glog/logging.h>
#include "kudu/gutil/strings/ascii_ctype.h"
#include "kudu/gutil/strings/charset.h"
#include "kudu/gutil/strings/stringpiece.h"
namespace strings {
// ----------------------------------------------------------------------
// EscapeStrForCSV()
// Escapes the quotes in 'src' by doubling them. This is necessary
// for generating CSV files (see SplitCSVLine).
// Returns the number of characters written into dest (not counting
// the \0) or -1 if there was insufficient space.
//
// Example: [some "string" to test] --> [some ""string"" to test]
// ----------------------------------------------------------------------
int EscapeStrForCSV(const char* src, char* dest, int dest_len);
// ----------------------------------------------------------------------
// UnescapeCEscapeSequences()
// Copies "source" to "dest", rewriting C-style escape sequences
// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
// equivalents. "dest" must be sufficiently large to hold all
// the characters in the rewritten string (i.e. at least as large
// as strlen(source) + 1 should be safe, since the replacements
// are always shorter than the original escaped sequences). It's
// safe for source and dest to be the same. RETURNS the length
// of dest.
//
// It allows hex sequences \xhh, or generally \xhhhhh with an
// arbitrary number of hex digits, but all of them together must
// specify a value of a single byte (e.g. \x0045 is equivalent
// to \x45, and \x1234 is erroneous). If the value is too large,
// it is truncated to 8 bits and an error is set. This is also
// true of octal values that exceed 0xff.
//
// It also allows escape sequences of the form \uhhhh (exactly four
// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
// hex digits, upper or lower case) to specify a Unicode code
// point. The dest array will contain the UTF8-encoded version of
// that code-point (e.g., if source contains \u2019, then dest will
// contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
// transformation, use UniLib::UTF8EscapeString
// (util/utf8/public/unilib.h), not CEscapeString.
//
// Errors: In the first form of the call, errors are reported with
// LOG(ERROR). The same is true for the second form of the call if
// the pointer to the string vector is NULL; otherwise, error
// messages are stored in the vector. In either case, the effect on
// the dest array is not defined, but rest of the source will be
// processed.
//
// *** DEPRECATED: Use CUnescape() in new code ***
// ----------------------------------------------------------------------
int UnescapeCEscapeSequences(const char* source, char* dest);
int UnescapeCEscapeSequences(const char* source, char* dest,
std::vector<std::string>* errors);
// ----------------------------------------------------------------------
// UnescapeCEscapeString()
// This does the same thing as UnescapeCEscapeSequences, but creates
// a new string. The caller does not need to worry about allocating
// a dest buffer. This should be used for non performance critical
// tasks such as printing debug messages. It is safe for src and dest
// to be the same.
//
// The second call stores its errors in a supplied string vector.
// If the string vector pointer is NULL, it reports the errors with LOG().
//
// In the first and second calls, the length of dest is returned. In the
// the third call, the new string is returned.
//
// *** DEPRECATED: Use CUnescape() in new code ***
// ----------------------------------------------------------------------
int UnescapeCEscapeString(const std::string& src, std::string* dest);
int UnescapeCEscapeString(const std::string& src, std::string* dest,
std::vector<std::string>* errors);
std::string UnescapeCEscapeString(const std::string& src);
// ----------------------------------------------------------------------
// CUnescape()
// Copies "source" to "dest", rewriting C-style escape sequences
// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII
// equivalents. "dest" must be sufficiently large to hold all
// the characters in the rewritten string (i.e. at least as large
// as source.size() should be safe, since the replacements
// are never longer than the original escaped sequences). It's
// safe for source and dest to be the same. RETURNS true if
// conversion was successful, false otherwise. Stores the size of
// the result in 'dest_len'.
//
// It allows hex sequences \xhh, or generally \xhhhhh with an
// arbitrary number of hex digits, but all of them together must
// specify a value of a single byte (e.g. \x0045 is equivalent
// to \x45, and \x1234 is erroneous). If the value is too large,
// an error is set. This is also true of octal values that exceed 0xff.
//
// It also allows escape sequences of the form \uhhhh (exactly four
// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight
// hex digits, upper or lower case) to specify a Unicode code
// point. The dest array will contain the UTF8-encoded version of
// that code-point (e.g., if source contains \u2019, then dest will
// contain the three bytes 0xE2, 0x80, and 0x99). For the inverse
// transformation, use UniLib::UTF8EscapeString
// (util/utf8/public/unilib.h), not CEscapeString.
//
// Errors: Sets the description of the first encountered error in
// 'error'. To disable error reporting, set 'error' to NULL.
// ----------------------------------------------------------------------
bool CUnescape(const StringPiece& source, char* dest, int* dest_len,
std::string* error);
bool CUnescape(const StringPiece& source, std::string* dest, std::string* error);
// A version with no error reporting.
inline bool CUnescape(const StringPiece& source, std::string* dest) {
return CUnescape(source, dest, nullptr);
}
// A version which CHECK fails if the string can not be unescaped.
inline std::string CUnescapeOrDie(const StringPiece& source) {
std::string dest;
std::string err;
CHECK(CUnescape(source, &dest, &err)) << err;
return dest;
}
// ----------------------------------------------------------------------
// CUnescapeForNullTerminatedString()
//
// This has the same behavior as CUnescape, except that each octal, hex,
// or Unicode escape sequence that resolves to a null character ('\0')
// is left in its original escaped form. The result is a
// display-formatted string that can be interpreted as a null-terminated
// const char* and will not be cut short if it contains embedded null
// characters.
//
// ----------------------------------------------------------------------
bool CUnescapeForNullTerminatedString(const StringPiece& source,
char* dest,
int* dest_len,
std::string* error);
bool CUnescapeForNullTerminatedString(const StringPiece& source,
std::string* dest,
std::string* error);
// A version with no error reporting.
inline bool CUnescapeForNullTerminatedString(const StringPiece& source,
std::string* dest) {
return CUnescapeForNullTerminatedString(source, dest, NULL);
}
// ----------------------------------------------------------------------
// CEscapeString()
// CHexEscapeString()
// Utf8SafeCEscapeString()
// Utf8SafeCHexEscapeString()
// Copies 'src' to 'dest', escaping dangerous characters using
// C-style escape sequences. This is very useful for preparing query
// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
// hexadecimal rather than octal sequences. The 'Utf8Safe' version
// doesn't touch UTF-8 bytes.
// Returns the number of bytes written to 'dest' (not including the \0)
// or -1 if there was insufficient space.
//
// Currently only \n, \r, \t, ", ', \ and !ascii_isprint() chars are escaped.
// ----------------------------------------------------------------------
int CEscapeString(const char* src, int src_len, char* dest, int dest_len);
int CHexEscapeString(const char* src, int src_len, char* dest, int dest_len);
int Utf8SafeCEscapeString(const char* src, int src_len, char* dest,
int dest_len);
int Utf8SafeCHexEscapeString(const char* src, int src_len, char* dest,
int dest_len);
// ----------------------------------------------------------------------
// CEscape()
// CHexEscape()
// Utf8SafeCEscape()
// Utf8SafeCHexEscape()
// More convenient form of CEscapeString: returns result as a "string".
// This version is slower than CEscapeString() because it does more
// allocation. However, it is much more convenient to use in
// non-speed-critical code like logging messages etc.
// ----------------------------------------------------------------------
std::string CEscape(const StringPiece& src);
std::string CHexEscape(const StringPiece& src);
std::string Utf8SafeCEscape(const StringPiece& src);
std::string Utf8SafeCHexEscape(const StringPiece& src);
// ----------------------------------------------------------------------
// BackslashEscape()
// Given a string and a list of characters to escape, replace any
// instance of one of those characters with \ + that character. For
// example, when exporting maps to /varz, label values need to have
// all dots escaped. Appends the result to dest.
// BackslashUnescape()
// Replace \ + any of the indicated "unescape me" characters with just
// that character. Appends the result to dest.
//
// IMPORTANT:
// This function does not escape \ by default, so if you do not include
// it in the chars to escape you will most certainly get an undesirable
// result. That is, it won't be a reversible operation:
// string src = "foo\\:bar";
// BackslashUnescape(BackslashEscape(src, ":"), ":") == "foo\\\\:bar"
// On the other hand, for all strings "src", the following is true:
// BackslashUnescape(BackslashEscape(src, ":\\"), ":\\") == src
// ----------------------------------------------------------------------
void BackslashEscape(const StringPiece& src,
const strings::CharSet& to_escape,
std::string* dest);
void BackslashUnescape(const StringPiece& src,
const strings::CharSet& to_unescape,
std::string* dest);
inline std::string BackslashEscape(const StringPiece& src,
const strings::CharSet& to_escape) {
std::string s;
BackslashEscape(src, to_escape, &s);
return s;
}
inline std::string BackslashUnescape(const StringPiece& src,
const strings::CharSet& to_unescape) {
std::string s;
BackslashUnescape(src, to_unescape, &s);
return s;
}
// ----------------------------------------------------------------------
// QuotedPrintableUnescape()
// Check out http://www.cis.ohio-state.edu/htbin/rfc/rfc2045.html for
// more details, only briefly implemented. But from the web...
// Quoted-printable is an encoding method defined in the MIME
// standard. It is used primarily to encode 8-bit text (such as text
// that includes foreign characters) into 7-bit US ASCII, creating a
// document that is mostly readable by humans, even in its encoded
// form. All MIME compliant applications can decode quoted-printable
// text, though they may not necessarily be able to properly display the
// document as it was originally intended. As quoted-printable encoding
// is implemented most commonly, printable ASCII characters (values 33
// through 126, excluding 61), tabs and spaces that do not appear at the
// end of lines, and end-of-line characters are not encoded. Other
// characters are represented by an equal sign (=) immediately followed
// by that character's hexadecimal value. Lines that are longer than 76
// characters are shortened by line breaks, with the equal sign marking
// where the breaks occurred.
//
// Note that QuotedPrintableUnescape is different from 'Q'-encoding as
// defined in rfc2047. In particular, This does not treat '_'s as spaces.
//
// See QEncodingUnescape().
//
// Copies "src" to "dest", rewriting quoted printable escape sequences
// =XX to their ASCII equivalents. src is not null terminated, instead
// specify len. I recommend that slen<szdest, but we honor szdest
// anyway.
// RETURNS the length of dest.
// ----------------------------------------------------------------------
int QuotedPrintableUnescape(const char* src, int slen, char* dest, int szdest);
// ----------------------------------------------------------------------
// QEncodingUnescape()
// This is very similar to QuotedPrintableUnescape except that we convert
// '_'s into spaces. (See RFC 2047)
// http://www.faqs.org/rfcs/rfc2047.html.
//
// Copies "src" to "dest", rewriting q-encoding escape sequences
// =XX to their ASCII equivalents. src is not null terminated, instead
// specify len. I recommend that slen<szdest, but we honour szdest
// anyway.
// RETURNS the length of dest.
// ----------------------------------------------------------------------
int QEncodingUnescape(const char* src, int slen, char* dest, int szdest);
// ----------------------------------------------------------------------
// Base64Unescape()
// WebSafeBase64Unescape()
// Copies "src" to "dest", where src is in base64 and is written to its
// ASCII equivalents. src is not null terminated, instead specify len.
// I recommend that slen<szdest, but we honor szdest anyway.
// RETURNS the length of dest, or -1 if src contains invalid chars.
// The WebSafe variation use '-' instead of '+' and '_' instead of '/'.
// The variations that store into a string clear the string first, and
// return false (with dest empty) if src contains invalid chars; for
// these versions src and dest must be different strings.
// ----------------------------------------------------------------------
int Base64Unescape(const char* src, int slen, char* dest, int szdest);
bool Base64Unescape(const char* src, int slen, std::string* dest);
inline bool Base64Unescape(const std::string& src, std::string* dest) {
return Base64Unescape(src.data(), src.size(), dest);
}
int WebSafeBase64Unescape(const char* src, int slen, char* dest, int szdest);
bool WebSafeBase64Unescape(const char* src, int slen, std::string* dest);
inline bool WebSafeBase64Unescape(const std::string& src, std::string* dest) {
return WebSafeBase64Unescape(src.data(), src.size(), dest);
}
// Return the length to use for the output buffer given to the base64 escape
// routines. Make sure to use the same value for do_padding in both.
// This function may return incorrect results if given input_len values that
// are extremely high, which should happen rarely.
int CalculateBase64EscapedLen(int input_len, bool do_padding);
// Use this version when calling Base64Escape without a do_padding arg.
int CalculateBase64EscapedLen(int input_len);
// ----------------------------------------------------------------------
// Base64Escape()
// WebSafeBase64Escape()
// Encode "src" to "dest" using base64 encoding.
// src is not null terminated, instead specify len.
// 'dest' should have at least CalculateBase64EscapedLen() length.
// RETURNS the length of dest.
// The WebSafe variation use '-' instead of '+' and '_' instead of '/'
// so that we can place the out in the URL or cookies without having
// to escape them. It also has an extra parameter "do_padding",
// which when set to false will prevent padding with "=".
// ----------------------------------------------------------------------
int Base64Escape(const unsigned char* src, int slen, char* dest, int szdest);
int WebSafeBase64Escape(const unsigned char* src, int slen, char* dest,
int szdest, bool do_padding);
// Encode src into dest with padding.
void Base64Escape(const std::string& src, std::string* dest);
// Encode src into dest web-safely without padding.
void WebSafeBase64Escape(const std::string& src, std::string* dest);
// Encode src into dest web-safely with padding.
void WebSafeBase64EscapeWithPadding(const std::string& src, std::string* dest);
void Base64Escape(const unsigned char* src, int szsrc,
std::string* dest, bool do_padding);
void WebSafeBase64Escape(const unsigned char* src, int szsrc,
std::string* dest, bool do_padding);
// ----------------------------------------------------------------------
// Base32Unescape()
// Copies "src" to "dest", where src is in base32 and is written to its
// ASCII equivalents. src is not null terminated, instead specify len.
// RETURNS the length of dest, or -1 if src contains invalid chars.
// ----------------------------------------------------------------------
int Base32Unescape(const char* src, int slen, char* dest, int szdest);
bool Base32Unescape(const char* src, int slen, std::string* dest);
inline bool Base32Unescape(const std::string& src, std::string* dest) {
return Base32Unescape(src.data(), src.size(), dest);
}
// ----------------------------------------------------------------------
// Base32Escape()
// Encode "src" to "dest" using base32 encoding.
// src is not null terminated, instead specify len.
// 'dest' should have at least CalculateBase32EscapedLen() length.
// RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
// too small to fit the fully encoded result. 'dest' is padded with '='.
//
// Note that this is "Base 32 Encoding" from RFC 4648 section 6.
// ----------------------------------------------------------------------
int Base32Escape(const unsigned char* src, size_t szsrc,
char* dest, size_t szdest);
bool Base32Escape(const std::string& src, std::string* dest);
// ----------------------------------------------------------------------
// Base32HexEscape()
// Encode "src" to "dest" using base32hex encoding.
// src is not null terminated, instead specify len.
// 'dest' should have at least CalculateBase32EscapedLen() length.
// RETURNS the length of dest. RETURNS 0 if szsrc is zero, or szdest is
// too small to fit the fully encoded result. 'dest' is padded with '='.
//
// Note that this is "Base 32 Encoding with Extended Hex Alphabet"
// from RFC 4648 section 7.
// ----------------------------------------------------------------------
int Base32HexEscape(const unsigned char* src, size_t szsrc,
char* dest, size_t szdest);
bool Base32HexEscape(const std::string& src, std::string* dest);
// Return the length to use for the output buffer given to the base32 escape
// routines. This function may return incorrect results if given input_len
// values that are extremely high, which should happen rarely.
int CalculateBase32EscapedLen(size_t input_len);
// ----------------------------------------------------------------------
// EightBase32DigitsToTenHexDigits()
// TenHexDigitsToEightBase32Digits()
// Convert base32 to and from hex.
//
// for EightBase32DigitsToTenHexDigits():
// *in must point to 8 base32 digits.
// *out must point to 10 bytes.
//
// for TenHexDigitsToEightBase32Digits():
// *in must point to 10 hex digits.
// *out must point to 8 bytes.
//
// Note that the Base64 functions above are different. They convert base64
// to and from binary data. We convert to and from string representations
// of hex. They deal with arbitrary lengths and we deal with single,
// whole base32 quanta.
//
// See RFC3548 at http://www.ietf.org/rfc/rfc3548.txt
// for details on base32.
// ----------------------------------------------------------------------
void EightBase32DigitsToTenHexDigits(const char* in, char* out);
void TenHexDigitsToEightBase32Digits(const char* in, char* out);
// ----------------------------------------------------------------------
// EightBase32DigitsToFiveBytes()
// FiveBytesToEightBase32Digits()
// Convert base32 to and from binary
//
// for EightBase32DigitsToTenHexDigits():
// *in must point to 8 base32 digits.
// *out must point to 5 bytes.
//
// for TenHexDigitsToEightBase32Digits():
// *in must point to 5 bytes.
// *out must point to 8 bytes.
//
// Note that the Base64 functions above are different. They deal with
// arbitrary lengths and we deal with single, whole base32 quanta.
// ----------------------------------------------------------------------
void EightBase32DigitsToFiveBytes(const char* in, unsigned char* bytes_out);
void FiveBytesToEightBase32Digits(const unsigned char* in_bytes, char* out);
// ----------------------------------------------------------------------
// EscapeFileName()
// UnescapeFileName()
// Utility functions to (un)escape strings to make them suitable for use in
// filenames. Characters not in [a-zA-Z0-9-_.] will be escaped into %XX.
// E.g: "Hello, world!" will be escaped as "Hello%2c%20world%21"
//
// NB that this function escapes slashes, so the output will be a flat
// filename and will not keep the directory structure. Slashes are replaced
// with '~', instead of a %XX sequence to make it easier for people to
// understand the escaped form when the original string is a file path.
//
// WARNING: filenames produced by these functions may not be compatible with
// Colossus FS. In particular, the '%' character has a special meaning in
// CFS.
//
// The versions that receive a string for the output will append to it.
// ----------------------------------------------------------------------
void EscapeFileName(const StringPiece& src, std::string* dst);
void UnescapeFileName(const StringPiece& src, std::string* dst);
inline std::string EscapeFileName(const StringPiece& src) {
std::string r;
EscapeFileName(src, &r);
return r;
}
inline std::string UnescapeFileName(const StringPiece& src) {
std::string r;
UnescapeFileName(src, &r);
return r;
}
// ----------------------------------------------------------------------
// Here are a couple utility methods to change ints to hex chars & back
// ----------------------------------------------------------------------
inline int int_to_hex_digit(int i) {
DCHECK((i >= 0) && (i <= 15));
return ((i < 10) ? (i + '0') : ((i - 10) + 'A'));
}
inline int int_to_lower_hex_digit(int i) {
DCHECK((i >= 0) && (i <= 15));
return (i < 10) ? (i + '0') : ((i - 10) + 'a');
}
inline int hex_digit_to_int(char c) {
/* Assume ASCII. */
DCHECK('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61);
DCHECK(ascii_isxdigit(c));
int x = static_cast<unsigned char>(c);
if (x > '9') {
x += 9;
}
return x & 0xf;
}
// ----------------------------------------------------------------------
// a2b_hex()
// Description: Ascii-to-Binary hex conversion. This converts
// 2*'num' hexadecimal characters to 'num' binary data.
// Return value: 'num' bytes of binary data (via the 'to' argument)
// ----------------------------------------------------------------------
void a2b_hex(const char* from, unsigned char* to, int num);
void a2b_hex(const char* from, char* to, int num);
void a2b_hex(const char* from, std::string* to, int num);
std::string a2b_hex(const std::string& a);
// ----------------------------------------------------------------------
// a2b_bin()
// Description: Ascii-to-Binary binary conversion. This converts
// a.size() binary characters (ascii '0' or '1') to
// ceil(a.size()/8) bytes of binary data. The first character is
// considered the most significant if byte_order_msb is set. a is
// considered to be padded with trailing 0s if its size is not a
// multiple of 8.
// Return value: ceil(a.size()/8) bytes of binary data
// ----------------------------------------------------------------------
std::string a2b_bin(const std::string& a, bool byte_order_msb);
// ----------------------------------------------------------------------
// b2a_hex()
// Description: Binary-to-Ascii hex conversion. This converts
// 'num' bytes of binary to a 2*'num'-character hexadecimal representation
// Return value: 2*'num' characters of ascii text (via the 'to' argument)
// ----------------------------------------------------------------------
void b2a_hex(const unsigned char* from, char* to, int num);
void b2a_hex(const unsigned char* from, std::string* to, int num);
// ----------------------------------------------------------------------
// b2a_hex()
// Description: Binary-to-Ascii hex conversion. This converts
// 'num' bytes of binary to a 2*'num'-character hexadecimal representation
// Return value: 2*'num' characters of ascii string
// ----------------------------------------------------------------------
std::string b2a_hex(const char* from, int num);
std::string b2a_hex(const StringPiece& b);
// ----------------------------------------------------------------------
// b2a_bin()
// Description: Binary-to-Ascii binary conversion. This converts
// b.size() bytes of binary to a 8*b.size() character representation
// (ascii '0' or '1'). The highest order bit in each byte is returned
// first in the string if byte_order_msb is set.
// Return value: 8*b.size() characters of ascii text
// ----------------------------------------------------------------------
std::string b2a_bin(const std::string& b, bool byte_order_msb);
// ----------------------------------------------------------------------
// ShellEscape
// Make a shell command argument from a string.
// Returns a Bourne shell string literal such that, once the shell finishes
// expanding the argument, the argument passed on to the program being
// run will be the same as whatever you passed in.
// NOTE: This is "ported" from python2.2's commands.mkarg(); it should be
// safe for Bourne shell syntax (i.e. sh, bash), but mileage may vary
// with other shells.
// ----------------------------------------------------------------------
std::string ShellEscape(StringPiece src);
// Runs ShellEscape() on the arguments, concatenates them with a space, and
// returns the resulting string.
template <class InputIterator>
std::string ShellEscapeCommandLine(InputIterator begin, const InputIterator& end) {
std::string result;
for (; begin != end; ++begin) {
if (!result.empty()) result.append(" ");
result.append(ShellEscape(*begin));
}
return result;
}
// Reads at most bytes_to_read from binary_string and writes it to
// ascii_string in lower case hex.
void ByteStringToAscii(const std::string& binary_string, int bytes_to_read,
std::string* ascii_string);
inline std::string ByteStringToAscii(const std::string& binary_string,
int bytes_to_read) {
std::string result;
ByteStringToAscii(binary_string, bytes_to_read, &result);
return result;
}
// Converts the hex from ascii_string into binary data and
// writes the binary data into binary_string.
// Empty input successfully converts to empty output.
// Returns false and may modify output if it is
// unable to parse the hex string.
bool ByteStringFromAscii(const std::string& ascii_string, std::string* binary_string);
// Clean up a multi-line string to conform to Unix line endings.
// Reads from src and appends to dst, so usually dst should be empty.
// If there is no line ending at the end of a non-empty string, it can
// be added automatically.
//
// Four different types of input are correctly handled:
//
// - Unix/Linux files: line ending is LF, pass through unchanged
//
// - DOS/Windows files: line ending is CRLF: convert to LF
//
// - Legacy Mac files: line ending is CR: convert to LF
//
// - Garbled files: random line endings, covert gracefully
// lonely CR, lonely LF, CRLF: convert to LF
//
// @param src The multi-line string to convert
// @param dst The converted string is appended to this string
// @param auto_end_last_line Automatically terminate the last line
//
// Limitations:
//
// This does not do the right thing for CRCRLF files created by
// broken programs that do another Unix->DOS conversion on files
// that are already in CRLF format.
void CleanStringLineEndings(const std::string& src, std::string* dst,
bool auto_end_last_line);
// Same as above, but transforms the argument in place.
void CleanStringLineEndings(std::string* str, bool auto_end_last_line);
} // namespace strings
#endif // STRINGS_ESCAPING_H_