blob: c2b5ee15bc20ece56187f74b66f4071319b8dd11 [file] [log] [blame]
/*
* Copyright 2011 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: vitaliyl@google.com (Vitaliy Lvin)
#include "pagespeed/kernel/base/escaping.h"
#include <cstddef>
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
// We escape backslash, double-quote, CR and LF while forming a string
// from the code. Single quotes are escaped as well, if we don't know we're
// explicitly double-quoting. Appends to *escaped.
//
// This is /almost/ completely right: U+2028 and U+2029 are
// line terminators as well (ECMA 262-5 --- 7.3, 7.8.4), so should really be
// escaped, too, but we don't have the encoding here.
void EscapeToJsStringLiteral(const StringPiece& original,
bool add_quotes,
GoogleString* escaped) {
// Optimistically assume no escaping will be required and reserve enough space
// for that result. This assumes that either escaped is empty (or nearly so),
// or reserve(...) behaves sanely and only vector doubles rather than
// increasing size linearly. The latter is true in gcc at least (but not true
// of some implementations of std::vector, thus the caveat).
escaped->reserve(escaped->size() + original.size() + (add_quotes ? 2 : 0));
if (add_quotes) {
(*escaped) += "\"";
}
for (size_t c = 0; c < original.length(); ++c) {
switch (original[c]) {
case '\\':
(*escaped) += "\\\\";
break;
case '"':
(*escaped) += "\\\"";
break;
case '\r':
(*escaped) += "\\r";
break;
case '\n':
(*escaped) += "\\n";
break;
case '\'':
if (!add_quotes) {
(*escaped) += "\\'";
} else {
(*escaped) += '\'';
}
break;
case '<': {
// Surprisingly, seeing <!-- and <script can affect how parsing
// of scripts inside HTML works, so we need to escape the <
// in them.
// (See the "script data escaped" HTML lexer states in the HTML5 spec).
StringPiece rest_of_input = original.substr(c);
if (StringCaseStartsWith(rest_of_input, "<script") ||
HasPrefixString(rest_of_input, "<!--")) {
*(escaped) += "\\u003c";
} else {
*(escaped) += '<';
}
break;
}
case '-': {
// Similarly to <!-- (see above) --> can be special.
if (HasPrefixString(original.substr(c), "-->")) {
*(escaped) += "\\u002d";
} else {
*(escaped) += '-';
}
break;
}
case '/':
// Forward slashes are generally OK, but </script> is trouble
// if it happens inside an inline <script>. We therefore escape the
// forward slash if we see /script>
if (StringCaseStartsWith(original.substr(c), "/script")) {
(*escaped) += '\\';
}
FALLTHROUGH_INTENDED;
default:
(*escaped) += original[c];
}
}
if (add_quotes) {
(*escaped) += "\"";
}
}
void EscapeToJsonStringLiteral(const StringPiece& original,
bool add_quotes,
GoogleString* escaped) {
// Optimistically assume no escaping will be required and reserve enough space
// for that result.
escaped->reserve(escaped->size() + original.size() + (add_quotes ? 2 : 0));
if (add_quotes) {
(*escaped) += "\"";
}
for (size_t c = 0; c < original.length(); ++c) {
unsigned char code = static_cast<unsigned char>(original[c]);
if (code <= 0x1F || code > 0x7F || code == '<' || code == '>' ||
code == '"' || code == '\\') {
*(escaped) += StringPrintf("\\u00%02x", code);
} else {
*(escaped) += original[c];
}
}
if (add_quotes) {
(*escaped) += "\"";
}
}
} // namespace net_instaweb