blob: ff7449b769bbdc8432c5411f391a115e201b9e45 [file] [log] [blame]
// Copyright 2011 Google Inc. All Rights Reserved.
// based on contributions of various authors in strings/strutil_unittest.cc
//
// This file contains functions that remove a defined part from the string,
// i.e., strip the string.
#include "gutil/strings/strip.h"
#include <assert.h>
#include <string.h>
#include <algorithm>
using std::copy;
using std::max;
using std::min;
using std::reverse;
using std::sort;
using std::swap;
#include <string>
using std::string;
#include "gutil/strings/ascii_ctype.h"
#include "gutil/strings/stringpiece.h"
string StripPrefixString(StringPiece str, const StringPiece& prefix) {
if (str.starts_with(prefix))
str.remove_prefix(prefix.length());
return str.as_string();
}
bool TryStripPrefixString(StringPiece str, const StringPiece& prefix,
string* result) {
const bool has_prefix = str.starts_with(prefix);
if (has_prefix)
str.remove_prefix(prefix.length());
str.as_string().swap(*result);
return has_prefix;
}
string StripSuffixString(StringPiece str, const StringPiece& suffix) {
if (str.ends_with(suffix))
str.remove_suffix(suffix.length());
return str.as_string();
}
bool TryStripSuffixString(StringPiece str, const StringPiece& suffix,
string* result) {
const bool has_suffix = str.ends_with(suffix);
if (has_suffix)
str.remove_suffix(suffix.length());
str.as_string().swap(*result);
return has_suffix;
}
// ----------------------------------------------------------------------
// StripString
// Replaces any occurrence of the character 'remove' (or the characters
// in 'remove') with the character 'replacewith'.
// ----------------------------------------------------------------------
void StripString(char* str, StringPiece remove, char replacewith) {
for (; *str != '\0'; ++str) {
if (remove.find(*str) != StringPiece::npos) {
*str = replacewith;
}
}
}
void StripString(char* str, int len, StringPiece remove, char replacewith) {
char* end = str + len;
for (; str < end; ++str) {
if (remove.find(*str) != StringPiece::npos) {
*str = replacewith;
}
}
}
void StripString(string* s, StringPiece remove, char replacewith) {
for (char& c : *s) {
if (remove.find(c) != StringPiece::npos) {
c = replacewith;
}
}
}
// ----------------------------------------------------------------------
// StripWhiteSpace
// ----------------------------------------------------------------------
void StripWhiteSpace(const char** str, int* len) {
// strip off trailing whitespace
while ((*len) > 0 && ascii_isspace((*str)[(*len)-1])) {
(*len)--;
}
// strip off leading whitespace
while ((*len) > 0 && ascii_isspace((*str)[0])) {
(*len)--;
(*str)++;
}
}
bool StripTrailingNewline(string* s) {
if (!s->empty() && (*s)[s->size() - 1] == '\n') {
if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
s->resize(s->size() - 2);
else
s->resize(s->size() - 1);
return true;
}
return false;
}
void StripWhiteSpace(string* str) {
int str_length = str->length();
// Strip off leading whitespace.
int first = 0;
while (first < str_length && ascii_isspace(str->at(first))) {
++first;
}
// If entire string is white space.
if (first == str_length) {
str->clear();
return;
}
if (first > 0) {
str->erase(0, first);
str_length -= first;
}
// Strip off trailing whitespace.
int last = str_length - 1;
while (last >= 0 && ascii_isspace(str->at(last))) {
--last;
}
if (last != (str_length - 1) && last >= 0) {
str->erase(last + 1, string::npos);
}
}
// ----------------------------------------------------------------------
// Misc. stripping routines
// ----------------------------------------------------------------------
void StripCurlyBraces(string* s) {
return StripBrackets('{', '}', s);
}
void StripBrackets(char left, char right, string* s) {
string::iterator opencurly = find(s->begin(), s->end(), left);
while (opencurly != s->end()) {
string::iterator closecurly = find(opencurly, s->end(), right);
if (closecurly == s->end())
return;
opencurly = s->erase(opencurly, closecurly + 1);
opencurly = find(opencurly, s->end(), left);
}
}
void StripMarkupTags(string* s) {
string::iterator openbracket = find(s->begin(), s->end(), '<');
while (openbracket != s->end()) {
string::iterator closebracket = find(openbracket, s->end(), '>');
if (closebracket == s->end()) {
s->erase(openbracket, closebracket);
return;
}
openbracket = s->erase(openbracket, closebracket + 1);
openbracket = find(openbracket, s->end(), '<');
}
}
string OutputWithMarkupTagsStripped(const string& s) {
string result(s);
StripMarkupTags(&result);
return result;
}
int TrimStringLeft(string* s, const StringPiece& remove) {
int i = 0;
while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
++i;
}
if (i > 0) s->erase(0, i);
return i;
}
int TrimStringRight(string* s, const StringPiece& remove) {
int i = s->size(), trimmed = 0;
while (i > 0 && memchr(remove.data(), (*s)[i-1], remove.size())) {
--i;
}
if (i < s->size()) {
trimmed = s->size() - i;
s->erase(i);
}
return trimmed;
}
// ----------------------------------------------------------------------
// Various removal routines
// ----------------------------------------------------------------------
int strrm(char* str, char c) {
char *src, *dest;
for (src = dest = str; *src != '\0'; ++src)
if (*src != c) *(dest++) = *src;
*dest = '\0';
return dest - str;
}
int memrm(char* str, int strlen, char c) {
char *src, *dest;
for (src = dest = str; strlen-- > 0; ++src)
if (*src != c) *(dest++) = *src;
return dest - str;
}
int strrmm(char* str, const char* chars) {
char *src, *dest;
for (src = dest = str; *src != '\0'; ++src) {
bool skip = false;
for (const char* c = chars; *c != '\0'; c++) {
if (*src == *c) {
skip = true;
break;
}
}
if (!skip) *(dest++) = *src;
}
*dest = '\0';
return dest - str;
}
int strrmm(string* str, const string& chars) {
size_t str_len = str->length();
size_t in_index = str->find_first_of(chars);
if (in_index == string::npos)
return str_len;
size_t out_index = in_index++;
while (in_index < str_len) {
char c = (*str)[in_index++];
if (chars.find(c) == string::npos)
(*str)[out_index++] = c;
}
str->resize(out_index);
return out_index;
}
// ----------------------------------------------------------------------
// StripDupCharacters
// Replaces any repeated occurrence of the character 'repeat_char'
// with single occurrence. e.g.,
// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
// Return the number of characters removed
// ----------------------------------------------------------------------
int StripDupCharacters(string* s, char dup_char, int start_pos) {
if (start_pos < 0)
start_pos = 0;
// remove dups by compaction in-place
int input_pos = start_pos; // current reader position
int output_pos = start_pos; // current writer position
const int input_end = s->size();
while (input_pos < input_end) {
// keep current character
const char curr_char = (*s)[input_pos];
if (output_pos != input_pos) // must copy
(*s)[output_pos] = curr_char;
++input_pos;
++output_pos;
if (curr_char == dup_char) { // skip subsequent dups
while ((input_pos < input_end) && ((*s)[input_pos] == dup_char))
++input_pos;
}
}
const int num_deleted = input_pos - output_pos;
s->resize(s->size() - num_deleted);
return num_deleted;
}
// ----------------------------------------------------------------------
// RemoveExtraWhitespace()
// Remove leading, trailing, and duplicate internal whitespace.
// ----------------------------------------------------------------------
void RemoveExtraWhitespace(string* s) {
assert(s != nullptr);
// Empty strings clearly have no whitespace, and this code assumes that
// string length is greater than 0
if (s->empty())
return;
int input_pos = 0; // current reader position
int output_pos = 0; // current writer position
const int input_end = s->size();
// Strip off leading space
while (input_pos < input_end && ascii_isspace((*s)[input_pos])) input_pos++;
while (input_pos < input_end - 1) {
char c = (*s)[input_pos];
char next = (*s)[input_pos + 1];
// Copy each non-whitespace character to the right position.
// For a block of whitespace, print the last one.
if (!ascii_isspace(c) || !ascii_isspace(next)) {
if (output_pos != input_pos) { // only copy if needed
(*s)[output_pos] = c;
}
output_pos++;
}
input_pos++;
}
// Pick up the last character if needed.
char c = (*s)[input_end - 1];
if (!ascii_isspace(c)) (*s)[output_pos++] = c;
s->resize(output_pos);
}
//------------------------------------------------------------------------
// See comment in header file for a complete description.
//------------------------------------------------------------------------
void StripLeadingWhiteSpace(string* str) {
char const* const leading = StripLeadingWhiteSpace(
const_cast<char*>(str->c_str()));
if (leading != nullptr) {
string const tmp(leading);
str->assign(tmp);
} else {
str->assign("");
}
}
void StripTrailingWhitespace(string* const s) {
string::size_type i;
for (i = s->size(); i > 0 && ascii_isspace((*s)[i - 1]); --i) {
}
s->resize(i);
}
// ----------------------------------------------------------------------
// TrimRunsInString
// Removes leading and trailing runs, and collapses middle
// runs of a set of characters into a single character (the
// first one specified in 'remove'). Useful for collapsing
// runs of repeated delimiters, whitespace, etc. E.g.,
// TrimRunsInString(&s, " :,()") removes leading and trailing
// delimiter chars and collapses and converts internal runs
// of delimiters to single ' ' characters, so, for example,
// " a:(b):c " -> "a b c"
// "first,last::(area)phone, ::zip" -> "first last area phone zip"
// ----------------------------------------------------------------------
void TrimRunsInString(string* s, StringPiece remove) {
string::iterator dest = s->begin();
string::iterator src_end = s->end();
for (string::iterator src = s->begin(); src != src_end; ) {
if (remove.find(*src) == StringPiece::npos) {
*(dest++) = *(src++);
} else {
// Skip to the end of this run of chars that are in 'remove'.
for (++src; src != src_end; ++src) {
if (remove.find(*src) == StringPiece::npos) {
if (dest != s->begin()) {
// This is an internal run; collapse it.
*(dest++) = remove[0];
}
*(dest++) = *(src++);
break;
}
}
}
}
s->erase(dest, src_end);
}
// ----------------------------------------------------------------------
// RemoveNullsInString
// Removes any internal \0 characters from the string.
// ----------------------------------------------------------------------
void RemoveNullsInString(string* s) {
s->erase(remove(s->begin(), s->end(), '\0'), s->end());
}