blob: 1d4b34840a6b037e2fbbf72eef473652b52b6a50 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef PAGESPEED_KERNEL_BASE_STRING_UTIL_H_
#define PAGESPEED_KERNEL_BASE_STRING_UTIL_H_
#include <cctype> // for isascii
#include <cstddef>
#include <cstdlib> // NOLINT
#include <iostream>
#include <map>
#include <set>
#include <string> // NOLINT
#include <vector>
#include "absl/strings/internal/memutil.h" // StripAsciiWhitespace
#include "absl/strings/match.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/string_view.h"
#include "base/logging.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/string.h"
static const int32 kint32max = 0x7FFFFFFF;
static const int32 kint32min = -kint32max - 1;
using absl::StrAppend;
using absl::StrCat;
class StringPiece : public absl::string_view {
public:
// We accept nullptr for historical reasons.
StringPiece(const char* c) : absl::string_view(absl::NullSafeStringView(c)) {}
StringPiece(const absl::string_view& s) : absl::string_view(s) {}
StringPiece(const GoogleString& s) : absl::string_view(s.data(), s.size()) {}
using absl::string_view::string_view;
// We accept nullptr for historical reasons.
constexpr bool operator==(const char* rhs) const noexcept {
return absl::NullSafeStringView(rhs) == *this;
}
// We accept nullptr for historical reasons.
constexpr bool operator!=(const char* rhs) const noexcept {
return absl::NullSafeStringView(rhs) != *this;
}
void CopyToString(GoogleString* dest) const { *dest = std::string(*this); }
void AppendToString(GoogleString* dest) const {
(*dest).append(this->data(), this->size());
}
GoogleString as_string() const {
return empty() ? std::string() : std::string(*this);
}
bool starts_with(StringPiece prefix) const {
return absl::StartsWith(*this, prefix);
}
bool ends_with(StringPiece postfix) const {
return absl::EndsWith(*this, postfix);
}
void set(StringPiece newvalue, uint32_t size) {
StringPiece tmp(newvalue.data(), size);
*this = tmp;
}
StringPiece substr(uint32_t from, uint32_t to) const {
return StringPiece(absl::string_view::substr(from, to));
}
StringPiece substr(uint32_t from) const {
return StringPiece(absl::string_view::substr(from));
}
};
void StringAppendV(std::string* dst, const char* format, va_list ap);
// XXX(oschaaf): check(!!)
typedef size_t stringpiece_ssize_type;
namespace strings {
using absl::EndsWith;
using absl::StartsWith;
} // namespace strings
// Quick macro to get the size of a static char[] without trailing '\0'.
// Note: Cannot be used for char*, std::string, etc.
#ifndef STATIC_STRLEN
#define STATIC_STRLEN(static_string) (arraysize(static_string) - 1)
#endif
namespace net_instaweb {
struct StringCompareInsensitive;
typedef std::map<GoogleString, GoogleString> StringStringMap;
typedef std::map<GoogleString, int> StringIntMap;
typedef std::set<GoogleString> StringSet;
typedef std::set<GoogleString, StringCompareInsensitive> StringSetInsensitive;
typedef std::vector<GoogleString> StringVector;
typedef std::vector<StringPiece> StringPieceVector;
typedef std::vector<const GoogleString*> ConstStringStarVector;
typedef std::vector<GoogleString*> StringStarVector;
typedef std::vector<const char*> CharStarVector;
inline GoogleString IntegerToString(const int i) { return absl::StrCat(i); }
inline GoogleString UintToString(const unsigned int i) {
return absl::StrCat(i);
}
inline GoogleString Integer64ToString(const int64 i) { return absl::StrCat(i); }
inline GoogleString PointerToString(const void* pointer) {
return absl::StrFormat("%p", pointer);
}
// NOTE: For a string of the form "45x", this sets *out = 45 but returns false.
// It sets *out = 0 given "Junk45" or "".
inline bool StringToInt(StringPiece in, int* out) {
return absl::SimpleAtoi<int>(in, out);
}
inline bool StringToInt(const char* in, int* out) {
return absl::SimpleAtoi<int>(in, out);
}
inline bool StringToInt64(const char* in, int64* out) {
return absl::SimpleAtoi<int64>(StringPiece(in), out);
}
inline bool StringToInt64(StringPiece in, int64* out) {
return absl::SimpleAtoi<int64>(in, out);
}
inline bool StringToInt(const GoogleString& in, int* out) {
return absl::SimpleAtoi<int>(in, out);
}
inline bool StringToInt64(const GoogleString& in, int64* out) {
return absl::SimpleAtoi<int64>(in, out);
}
// Parses valid floating point number and returns true if string contains only
// that floating point number (ignoring leading/trailing whitespace).
// Note: This also parses hex and exponential float notation.
bool StringToDouble(const char* in, double* out);
inline bool StringToDouble(GoogleString in, double* out) {
const char* in_c_str = in.c_str();
if (strlen(in_c_str) != in.size()) {
// If there are embedded nulls, always fail.
return false;
}
return StringToDouble(in_c_str, out);
}
inline bool StringToDouble(StringPiece in, double* out) {
return StringToDouble(GoogleString(in), out);
}
// Returns the part of the piece after the first '=', trimming any
// white space found at the beginning or end of the resulting piece.
// Returns an empty string if '=' was not found.
StringPiece PieceAfterEquals(StringPiece piece);
// Split sp into pieces that are separated by any character in the given string
// of separators, and push those pieces in order onto components.
void SplitStringPieceToVector(StringPiece sp, StringPiece separators,
StringPieceVector* components,
bool omit_empty_strings);
// Splits string 'full' using substr by searching it incrementally from
// left. Empty tokens are removed from the final result.
void SplitStringUsingSubstr(StringPiece full, StringPiece substr,
StringPieceVector* result);
void BackslashEscape(StringPiece src, StringPiece to_escape,
GoogleString* dest);
GoogleString CEscape(StringPiece src);
// TODO(jmarantz): Eliminate these definitions of HasPrefixString,
// UpperString, and LowerString, and re-add dependency on protobufs
// which also provide definitions for these.
bool HasPrefixString(StringPiece str, StringPiece prefix);
void UpperString(GoogleString* str);
void LowerString(GoogleString* str);
inline bool OnlyWhitespace(const GoogleString& str) {
return absl::StripAsciiWhitespace(str).empty();
}
// Replaces all instances of 'substring' in 's' with 'replacement'.
// Returns the number of instances replaced. Replacements are not
// subject to re-matching.
//
// NOTE: The string pieces must not overlap 's'.
int GlobalReplaceSubstring(StringPiece substring, StringPiece replacement,
GoogleString* s);
// Returns the index of the start of needle in haystack, or
// StringPiece::npos if it's not present.
StringPiece::size_type FindIgnoreCase(StringPiece haystack, StringPiece needle);
// Erase shortest substrings in string bracketed by left and right, working
// from the left.
// ("[", "]", "abc[def]g[h]i]j[k") -> "abcgi]j[k"
// Returns the number of substrings erased.
int GlobalEraseBracketedSubstring(StringPiece left, StringPiece right,
GoogleString* string);
// Output a string which is the combination of all values in vector, separated
// by delim. Does not ignore empty strings in vector. So:
// JoinStringStar({"foo", "", "bar"}, ", ") == "foo, , bar". (Pseudocode)
GoogleString JoinStringStar(const ConstStringStarVector& vector,
StringPiece delim);
// See also: ./src/third_party/css_parser/src/third_party/css_parser/src/strings/ascii_ctype.h
// We probably don't want our core string header file to have a
// dependecy on the Google CSS parser, so for now we'll write this here:
// upper-case a single character and return it.
// toupper() changes based on locale. We don't want this!
inline char UpperChar(char c) {
if ((c >= 'a') && (c <= 'z')) {
c += 'A' - 'a';
}
return c;
}
// lower-case a single character and return it.
// tolower() changes based on locale. We don't want this!
inline char LowerChar(char c) {
if ((c >= 'A') && (c <= 'Z')) {
c += 'a' - 'A';
}
return c;
}
// Check if given character is an HTML (or CSS) space (not the same as isspace,
// and not locale-dependent!). Note in particular that isspace always includes
// '\v' and HTML does not. See:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/common-microsyntaxes.html#space-character
// http://www.w3.org/TR/CSS21/grammar.html
inline char IsHtmlSpace(char c) {
return (c == ' ') || (c == '\t') || (c == '\r') || (c == '\n') || (c == '\f');
}
/* inline char* strdup(const char* str) {
return absl::strdup(str);
}*/
// Case-insensitive string comparison that is locale-independent.
int StringCaseCompare(StringPiece s1, StringPiece s2);
// Determines whether the character is a US Ascii number or letter. This
// is preferable to isalnum() for working with computer languages, as
// opposed to human languages.
inline bool IsAsciiAlphaNumeric(char ch) {
return (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
((ch >= '0') && (ch <= '9')));
}
// Convenience functions.
inline bool IsHexDigit(char c) {
return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') ||
('a' <= c && c <= 'f');
}
inline bool IsDecimalDigit(char c) { return (c >= '0' && c <= '9'); }
// In-place removal of leading and trailing HTML whitespace. Returns true if
// any whitespace was trimmed.
bool TrimWhitespace(StringPiece* str);
// In-place removal of leading and trailing quote. Removes whitespace as well.
void TrimQuote(StringPiece* str);
// In-place removal of multiple levels of leading and trailing quotes,
// include url-escaped quotes, optionally backslashed. Removes
// whitespace as well.
void TrimUrlQuotes(StringPiece* str);
// Trims leading HTML whitespace. Returns true if any whitespace was trimmed.
bool TrimLeadingWhitespace(StringPiece* str);
// Trims trailing HTML whitespace. Returns true if any whitespace was trimmed.
bool TrimTrailingWhitespace(StringPiece* str);
// Non-destructive TrimWhitespace.
// WARNING: in should not point inside output!
inline void TrimWhitespace(StringPiece in, GoogleString* output) {
DCHECK((in.data() < output->data()) ||
(in.data() >= (output->data() + output->length())))
<< "Illegal argument aliasing in TrimWhitespace";
StringPiece temp(in); // Mutable copy
TrimWhitespace(&temp); // Modifies temp
*output = GoogleString(temp);
}
// Accumulates a decimal value from 'c' into *value.
// Returns false and leaves *value unchanged if c is not a decimal digit.
bool AccumulateDecimalValue(char c, uint32* value);
// Accumulates a hex value from 'c' into *value
// Returns false and leaves *value unchanged if c is not a hex digit.
bool AccumulateHexValue(char c, uint32* value);
// Return true iff the two strings are equal, ignoring case.
bool MemCaseEqual(const char* s1, size_t size1, const char* s2, size_t size2);
inline bool StringCaseEqual(StringPiece s1, StringPiece s2) {
return MemCaseEqual(s1.data(), s1.size(), s2.data(), s2.size());
}
// Return true iff str starts with prefix, ignoring case.
bool StringCaseStartsWith(StringPiece str, StringPiece prefix);
// Return true iff str ends with suffix, ignoring case.
bool StringCaseEndsWith(StringPiece str, StringPiece suffix);
// Return true if str is equal to the concatenation of first and second. Note
// that this respects case.
bool StringEqualConcat(StringPiece str, StringPiece first, StringPiece second);
// Return the number of mismatched chars in two strings. Useful for string
// comparisons without short-circuiting to prevent timing attacks.
// See http://codahale.com/a-lesson-in-timing-attacks/
int CountCharacterMismatches(StringPiece s1, StringPiece s2);
struct CharStarCompareInsensitive {
bool operator()(const char* s1, const char* s2) const {
return (StringCaseCompare(s1, s2) < 0);
}
};
struct CharStarCompareSensitive {
bool operator()(const char* s1, const char* s2) const {
return (strcmp(s1, s2) < 0);
}
};
struct StringCompareSensitive {
bool operator()(StringPiece s1, StringPiece s2) const { return s1 < s2; }
};
struct StringCompareInsensitive {
bool operator()(StringPiece s1, StringPiece s2) const {
return (StringCaseCompare(s1, s2) < 0);
}
};
// Parse a list of integers into a vector. Empty values are ignored.
// Returns true if all non-empty values are converted into integers.
bool SplitStringPieceToIntegerVector(StringPiece src, StringPiece separators,
std::vector<int>* ints);
// Does a path end in slash?
inline bool EndsInSlash(StringPiece path) { return absl::EndsWith(path, "/"); }
// Make sure directory's path ends in '/'.
inline void EnsureEndsInSlash(GoogleString* dir) {
if (!EndsInSlash(*dir)) {
dir->append("/");
}
}
// Given a string such as: a b "c d" e 'f g'
// Parse it into a vector: ["a", "b", "c d", "e", "f g"]
// NOTE: actually used for html doctype recognition,
// so assumes HtmlSpace separation.
void ParseShellLikeString(StringPiece input, std::vector<GoogleString>* output);
// Counts the number of times that substring appears in text
// Note: for a substring that can overlap itself, it counts not necessarily
// disjoint occurrences of the substring.
// For example: "aaa" appears in "aaaaa" 3 times, not once
int CountSubstring(StringPiece text, StringPiece substring);
// Appends new empty string to a StringVector and returns a pointer to it.
inline GoogleString* StringVectorAdd(StringVector* v) {
v->push_back(GoogleString());
return &v->back();
}
// Append string-like objects accessed through an iterator.
template <typename I>
void AppendJoinIterator(GoogleString* dest, I start, I end, StringPiece sep) {
if (start == end) {
// Skip a lot of set-up and tear-down in empty case.
return;
}
size_t size = dest->size();
size_t sep_size = 0; // No separator before initial element
for (I str = start; str != end; ++str) {
size += str->size() + sep_size;
sep_size = sep.size();
}
dest->reserve(size);
StringPiece to_prepend("");
for (I str = start; str != end; ++str) {
StrAppend(dest, to_prepend, *str);
to_prepend = sep;
}
}
// Append an arbitrary iterable collection of strings such as a StringSet,
// StringVector, or StringPieceVector, separated by a given separator, with
// given initial and final strings. Argument order chosen to be consistent
// with StrAppend.
template <typename C>
void AppendJoinCollection(GoogleString* dest, const C& collection,
StringPiece sep) {
AppendJoinIterator(dest, collection.begin(), collection.end(), sep);
}
template <typename C>
GoogleString JoinCollection(const C& collection, StringPiece sep) {
GoogleString result;
AppendJoinCollection(&result, collection, sep);
return result;
}
// Converts a boolean to string.
inline const char* BoolToString(bool b) { return (b ? "true" : "false"); }
// Using isascii with signed chars is unfortunately undefined.
inline bool IsAscii(char c) { return isascii(static_cast<unsigned char>(c)); }
// Tests if c is a standard (non-control) ASCII char 0x20-0x7E.
// Note: This does not include TAB (0x09), LF (0x0A) or CR (0x0D).
inline bool IsNonControlAscii(char c) { return ('\x20' <= c) && (c <= '\x7E'); }
} // namespace net_instaweb
#endif // PAGESPEED_KERNEL_BASE_STRING_UTIL_H_