blob: cdde845a4bae12db653e5bbb128fd1766e649150 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
// Refactored from contributions of various authors in strings/strutil.h
//
// This file contains functions that remove a defined part from the string,
// i.e., strip the string.
#ifndef STRINGS_STRIP_H_
#define STRINGS_STRIP_H_
#include <stddef.h>
#include <string>
using std::string;
#include "gutil/strings/ascii_ctype.h"
#include "gutil/strings/stringpiece.h"
// Given a string and a putative prefix, returns the string minus the
// prefix string if the prefix matches, otherwise the original
// string.
string StripPrefixString(StringPiece str, const StringPiece& prefix);
// Like StripPrefixString, but return true if the prefix was
// successfully matched. Write the output to *result.
// It is safe for result to point back to the input string.
bool TryStripPrefixString(StringPiece str, const StringPiece& prefix,
string* result);
// Given a string and a putative suffix, returns the string minus the
// suffix string if the suffix matches, otherwise the original
// string.
string StripSuffixString(StringPiece str, const StringPiece& suffix);
// Like StripSuffixString, but return true if the suffix was
// successfully matched. Write the output to *result.
// It is safe for result to point back to the input string.
bool TryStripSuffixString(StringPiece str, const StringPiece& suffix,
string* result);
// ----------------------------------------------------------------------
// StripString
// Replaces any occurrence of the character 'remove' (or the characters
// in 'remove') with the character 'replacewith'.
// Good for keeping html characters or protocol characters (\t) out
// of places where they might cause a problem.
// ----------------------------------------------------------------------
inline void StripString(char* str, char remove, char replacewith) {
for (; *str; str++) {
if (*str == remove)
*str = replacewith;
}
}
void StripString(char* str, StringPiece remove, char replacewith);
void StripString(char* str, int len, StringPiece remove, char replacewith);
void StripString(string* s, StringPiece remove, char replacewith);
// ----------------------------------------------------------------------
// StripDupCharacters
// Replaces any repeated occurrence of the character 'dup_char'
// with single occurrence. e.g.,
// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
// Return the number of characters removed
// ----------------------------------------------------------------------
int StripDupCharacters(string* s, char dup_char, int start_pos);
// ----------------------------------------------------------------------
// StripWhiteSpace
// "Removes" whitespace from both sides of string. Pass in a pointer to an
// array of characters, and its length. The function changes the pointer
// and length to refer to a substring that does not contain leading or
// trailing spaces; it does not modify the string itself. If the caller is
// using NUL-terminated strings, it is the caller's responsibility to insert
// the NUL character at the end of the substring."
//
// Note: to be completely type safe, this function should be
// parameterized as a template: template<typename anyChar> void
// StripWhiteSpace(anyChar** str, int* len), where the expectation
// is that anyChar could be char, const char, w_char, const w_char,
// unicode_char, or any other character type we want. However, we
// just provided a version for char and const char. C++ is
// inconvenient, but correct, here. Ask Amit is you want to know
// the type safety details.
// ----------------------------------------------------------------------
void StripWhiteSpace(const char** str, int* len);
//------------------------------------------------------------------------
// StripTrailingWhitespace()
// Removes whitespace at the end of the string *s.
//------------------------------------------------------------------------
void StripTrailingWhitespace(string* s);
//------------------------------------------------------------------------
// StripTrailingNewline(string*)
// Strips the very last trailing newline or CR+newline from its
// input, if one exists. Useful for dealing with MapReduce's text
// input mode, which appends '\n' to each map input. Returns true
// if a newline was stripped.
//------------------------------------------------------------------------
bool StripTrailingNewline(string* s);
inline void StripWhiteSpace(char** str, int* len) {
// The "real" type for StripWhiteSpace is ForAll char types C, take
// (C, int) as input and return (C, int) as output. We're using the
// cast here to assert that we can take a char*, even though the
// function thinks it's assigning to const char*.
StripWhiteSpace(const_cast<const char**>(str), len);
}
inline void StripWhiteSpace(StringPiece* str) {
const char* data = str->data();
int len = str->size();
StripWhiteSpace(&data, &len);
str->set(data, len);
}
void StripWhiteSpace(string* str);
namespace strings {
template <typename Collection>
inline void StripWhiteSpaceInCollection(Collection* collection) {
for (typename Collection::iterator it = collection->begin();
it != collection->end(); ++it)
StripWhiteSpace(&(*it));
}
} // namespace strings
// ----------------------------------------------------------------------
// StripLeadingWhiteSpace
// "Removes" whitespace from beginning of string. Returns ptr to first
// non-whitespace character if one is present, NULL otherwise. Assumes
// "line" is null-terminated.
// ----------------------------------------------------------------------
inline const char* StripLeadingWhiteSpace(const char* line) {
// skip leading whitespace
while (ascii_isspace(*line))
++line;
if ('\0' == *line) // end of line, no non-whitespace
return NULL;
return line;
}
// StripLeadingWhiteSpace for non-const strings.
inline char* StripLeadingWhiteSpace(char* line) {
return const_cast<char*>(
StripLeadingWhiteSpace(const_cast<const char*>(line)));
}
void StripLeadingWhiteSpace(string* str);
// Remove leading, trailing, and duplicate internal whitespace.
void RemoveExtraWhitespace(string* s);
// ----------------------------------------------------------------------
// SkipLeadingWhiteSpace
// Returns str advanced past white space characters, if any.
// Never returns NULL. "str" must be terminated by a null character.
// ----------------------------------------------------------------------
inline const char* SkipLeadingWhiteSpace(const char* str) {
while (ascii_isspace(*str))
++str;
return str;
}
inline char* SkipLeadingWhiteSpace(char* str) {
while (ascii_isspace(*str))
++str;
return str;
}
// ----------------------------------------------------------------------
// StripCurlyBraces
// Strips everything enclosed in pairs of curly braces and the curly
// braces. Doesn't touch open braces. It doesn't handle nested curly
// braces. This is used for removing things like {:stopword} from
// queries.
// StripBrackets does the same, but allows the caller to specify different
// left and right bracket characters, such as '(' and ')'.
// ----------------------------------------------------------------------
void StripCurlyBraces(string* s);
void StripBrackets(char left, char right, string* s);
// ----------------------------------------------------------------------
// StripMarkupTags
// Strips everything enclosed in pairs of angle brackets and the angle
// brackets.
// This is used for stripping strings of markup; e.g. going from
// "the quick <b>brown</b> fox" to "the quick brown fox."
// If you want to skip entire sections of markup (e.g. the word "brown"
// too in that example), see webutil/pageutil/pageutil.h .
// This function was designed for stripping the bold tags (inserted by the
// docservers) from the titles of news stories being returned by RSS.
// This implementation DOES NOT cover all cases in html documents
// like tags that contain quoted angle-brackets, or HTML comment.
// For example <IMG SRC = "foo.gif" ALT = "A > B">
// or <!-- <A comment> -->
// See "perldoc -q html"
// ----------------------------------------------------------------------
void StripMarkupTags(string* s);
string OutputWithMarkupTagsStripped(const string& s);
// ----------------------------------------------------------------------
// TrimStringLeft
// Removes any occurrences of the characters in 'remove' from the start
// of the string. Returns the number of chars trimmed.
// ----------------------------------------------------------------------
int TrimStringLeft(string* s, const StringPiece& remove);
// ----------------------------------------------------------------------
// TrimStringRight
// Removes any occurrences of the characters in 'remove' from the end
// of the string. Returns the number of chars trimmed.
// ----------------------------------------------------------------------
int TrimStringRight(string* s, const StringPiece& remove);
// ----------------------------------------------------------------------
// TrimString
// Removes any occurrences of the characters in 'remove' from either
// end of the string.
// ----------------------------------------------------------------------
inline int TrimString(string* s, const StringPiece& remove) {
return TrimStringRight(s, remove) + TrimStringLeft(s, remove);
}
// ----------------------------------------------------------------------
// TrimRunsInString
// Removes leading and trailing runs, and collapses middle
// runs of a set of characters into a single character (the
// first one specified in 'remove'). Useful for collapsing
// runs of repeated delimiters, whitespace, etc. E.g.,
// TrimRunsInString(&s, " :,()") removes leading and trailing
// delimiter chars and collapses and converts internal runs
// of delimiters to single ' ' characters, so, for example,
// " a:(b):c " -> "a b c"
// "first,last::(area)phone, ::zip" -> "first last area phone zip"
// ----------------------------------------------------------------------
void TrimRunsInString(string* s, StringPiece remove);
// ----------------------------------------------------------------------
// RemoveNullsInString
// Removes any internal \0 characters from the string.
// ----------------------------------------------------------------------
void RemoveNullsInString(string* s);
// ----------------------------------------------------------------------
// strrm()
// memrm()
// Remove all occurrences of a given character from a string.
// Returns the new length.
// ----------------------------------------------------------------------
int strrm(char* str, char c);
int memrm(char* str, int strlen, char c);
// ----------------------------------------------------------------------
// strrmm()
// Remove all occurrences of a given set of characters from a string.
// Returns the new length.
// ----------------------------------------------------------------------
int strrmm(char* str, const char* chars);
int strrmm(string* str, const string& chars);
#endif // STRINGS_STRIP_H_