blob: 38e3d018ed384821c828e8349240e45c94a14fbb [file] [log] [blame]
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This file declares INTERNAL parts of the Split API that are inline/templated
// or otherwise need to be available at compile time. The main two abstractions
// defined in here are
//
// - SplitIterator<>
// - Splitter<>
//
// Everything else is plumbing for those two.
//
// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
// strings/split.h.
//
// IWYU pragma: private, include "strings/split.h"
#ifndef STRINGS_SPLIT_INTERNAL_H_
#define STRINGS_SPLIT_INTERNAL_H_
#include <iterator>
using std::back_insert_iterator;
using std::iterator_traits;
#include <map>
using std::map;
using std::multimap;
#include <vector>
using std::vector;
#include "gutil/port.h" // for LANG_CXX11
#include "gutil/strings/stringpiece.h"
#ifdef LANG_CXX11
// This must be included after "base/port.h", which defines LANG_CXX11.
#include <initializer_list>
#endif // LANG_CXX11
namespace strings {
namespace internal {
// The default Predicate object, which doesn't filter out anything.
struct NoFilter {
bool operator()(StringPiece /* ignored */) {
return true;
}
};
// This class splits a string using the given delimiter, returning the split
// substrings via an iterator interface. An optional Predicate functor may be
// supplied, which will be used to filter the split strings: strings for which
// the predicate returns false will be skipped. A Predicate object is any
// functor that takes a StringPiece and returns bool. By default, the NoFilter
// Predicate is used, which does not filter out anything.
//
// This class is NOT part of the public splitting API.
//
// Usage:
//
// using strings::delimiter::Literal;
// Literal d(",");
// for (SplitIterator<Literal> it("a,b,c", d), end(d); it != end; ++it) {
// StringPiece substring = *it;
// DoWork(substring);
// }
//
// The explicit single-argument constructor is used to create an "end" iterator.
// The two-argument constructor is used to split the given text using the given
// delimiter.
template <typename Delimiter, typename Predicate = NoFilter>
class SplitIterator
: public std::iterator<std::input_iterator_tag, StringPiece> {
public:
// Two constructors for "end" iterators.
explicit SplitIterator(Delimiter d)
: delimiter_(std::move(d)), predicate_(), is_end_(true) {}
SplitIterator(Delimiter d, Predicate p)
: delimiter_(std::move(d)), predicate_(std::move(p)), is_end_(true) {}
// Two constructors taking the text to iterator.
SplitIterator(StringPiece text, Delimiter d)
: text_(std::move(text)),
delimiter_(std::move(d)),
predicate_(),
is_end_(false) {
++(*this);
}
SplitIterator(StringPiece text, Delimiter d, Predicate p)
: text_(std::move(text)),
delimiter_(std::move(d)),
predicate_(std::move(p)),
is_end_(false) {
++(*this);
}
StringPiece operator*() { return curr_piece_; }
StringPiece* operator->() { return &curr_piece_; }
SplitIterator& operator++() {
do {
if (text_.end() == curr_piece_.end()) {
// Already consumed all of text_, so we're done.
is_end_ = true;
return *this;
}
StringPiece found_delimiter = delimiter_.Find(text_);
assert(found_delimiter.data() != NULL);
assert(text_.begin() <= found_delimiter.begin());
assert(found_delimiter.end() <= text_.end());
// found_delimiter is allowed to be empty.
// Sets curr_piece_ to all text up to but excluding the delimiter itself.
// Sets text_ to remaining data after the delimiter.
curr_piece_.set(text_.begin(), found_delimiter.begin() - text_.begin());
text_.remove_prefix(found_delimiter.end() - text_.begin());
} while (!predicate_(curr_piece_));
return *this;
}
SplitIterator operator++(int /* postincrement */) {
SplitIterator old(*this);
++(*this);
return old;
}
bool operator==(const SplitIterator& other) const {
// Two "end" iterators are always equal. If the two iterators being compared
// aren't both end iterators, then we fallback to comparing their fields.
// Importantly, the text being split must be equal and the current piece
// within the text being split must also be equal. The delimiter_ and
// predicate_ fields need not be checked here because they're template
// parameters that are already part of the SplitIterator's type.
return (is_end_ && other.is_end_) ||
(is_end_ == other.is_end_ &&
text_ == other.text_ &&
text_.data() == other.text_.data() &&
curr_piece_ == other.curr_piece_ &&
curr_piece_.data() == other.curr_piece_.data());
}
bool operator!=(const SplitIterator& other) const {
return !(*this == other);
}
private:
// The text being split. Modified as delimited pieces are consumed.
StringPiece text_;
Delimiter delimiter_;
Predicate predicate_;
bool is_end_;
// Holds the currently split piece of text. Will always refer to string data
// within text_. This value is returned when the iterator is dereferenced.
StringPiece curr_piece_;
};
// Declares a functor that can convert a StringPiece to another type. This works
// for any type that has a constructor (explicit or not) taking a single
// StringPiece argument. A specialization exists for converting to string
// because the underlying data needs to be copied. In theory, these
// specializations could be extended to work with other types (e.g., int32), but
// then a solution for error reporting would need to be devised.
template <typename To>
struct StringPieceTo {
To operator()(StringPiece from) const {
return To(from);
}
};
// Specialization for converting to string.
template <>
struct StringPieceTo<string> {
string operator()(StringPiece from) const {
return from.ToString();
}
};
// Specialization for converting to *const* string.
template <>
struct StringPieceTo<const string> {
string operator()(StringPiece from) const {
return from.ToString();
}
};
#ifdef LANG_CXX11
// IsNotInitializerList<T>::type exists iff T is not an initializer_list. More
// details below in Splitter<> where this is used.
template <typename T>
struct IsNotInitializerList {
typedef void type;
};
template <typename T>
struct IsNotInitializerList<std::initializer_list<T> > {};
#endif // LANG_CXX11
// This class implements the behavior of the split API by giving callers access
// to the underlying split substrings in various convenient ways, such as
// through iterators or implicit conversion functions. Do not construct this
// class directly, rather use the Split() function instead.
//
// Output containers can be collections of either StringPiece or string objects.
// StringPiece is more efficient because the underlying data will not need to be
// copied; the returned StringPieces will all refer to the data within the
// original input string. If a collection of string objects is used, then each
// substring will be copied.
//
// An optional Predicate functor may be supplied. This predicate will be used to
// filter the split strings: only strings for which the predicate returns true
// will be kept. A Predicate object is any unary functor that takes a
// StringPiece and returns bool. By default, the NoFilter predicate is used,
// which does not filter out anything.
template <typename Delimiter, typename Predicate = NoFilter>
class Splitter {
public:
typedef internal::SplitIterator<Delimiter, Predicate> Iterator;
Splitter(StringPiece text, Delimiter d)
: begin_(text, d), end_(d) {}
Splitter(StringPiece text, Delimiter d, Predicate p)
: begin_(text, d, p), end_(d, p) {}
// Range functions that iterate the split substrings as StringPiece objects.
// These methods enable a Splitter to be used in a range-based for loop in
// C++11, for example:
//
// for (StringPiece sp : my_splitter) {
// DoWork(sp);
// }
const Iterator& begin() const { return begin_; }
const Iterator& end() const { return end_; }
#ifdef LANG_CXX11
// Support for default template arguments for function templates was added in
// C++11, but it is not allowed if compiled in C++98 compatibility mode. Since
// this code is under a LANG_CXX11 guard, we can safely ignore the
// -Wc++98-compat flag and use default template arguments on the implicit
// conversion operator below.
//
// This use of default template arguments on a function template was approved
// by tgs and sanjay on behalf of the c-style-arbiters in email thread
//
// All compiler flags are first saved with a diagnostic push and restored with a
// diagnostic pop below.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpragmas"
#pragma GCC diagnostic ignored "-Wc++98-compat"
// Uses SFINAE to restrict conversion to container-like types (by testing for
// the presence of a const_iterator member type) and also to disable
// conversion to an initializer_list (which also has a const_iterator).
// Otherwise, code compiled in C++11 will get an error due to ambiguous
// conversion paths (in C++11 vector<T>::operator= is overloaded to take
// either a vector<T> or an initializer_list<T>).
//
// This trick was taken from util/gtl/container_literal.h
template <typename Container,
typename IsNotInitializerListChecker =
typename IsNotInitializerList<Container>::type,
typename ContainerChecker =
typename Container::const_iterator>
operator Container() {
return SelectContainer<Container, is_map<Container>::value>()(this);
}
// Restores diagnostic settings, i.e., removes the "ignore" on -Wpragmas and
// -Wc++98-compat.
#pragma GCC diagnostic pop
#else
// Not under LANG_CXX11
template <typename Container>
operator Container() {
return SelectContainer<Container, is_map<Container>::value>()(this);
}
#endif // LANG_CXX11
template <typename First, typename Second>
operator std::pair<First, Second>() {
return ToPair<First, Second>();
}
private:
// is_map<T>::value is true iff there exists a type T::mapped_type. This is
// used to dispatch to one of the SelectContainer<> functors (below) from the
// implicit conversion operator (above).
template <typename T>
struct is_map {
template <typename U> static base::big_ test(typename U::mapped_type*);
template <typename> static base::small_ test(...);
static const bool value = (sizeof(test<T>(0)) == sizeof(base::big_));
};
// Base template handles splitting to non-map containers
template <typename Container, bool>
struct SelectContainer {
Container operator()(Splitter* splitter) const {
return splitter->template ToContainer<Container>();
}
};
// Partial template specialization for splitting to map-like containers.
template <typename Container>
struct SelectContainer<Container, true> {
Container operator()(Splitter* splitter) const {
return splitter->template ToMap<Container>();
}
};
// Inserts split results into the container. To do this the results are first
// stored in a vector<StringPiece>. This is where the input text is actually
// "parsed". This vector is then used to possibly reserve space in the output
// container, and the StringPieces in "v" are converted as necessary to the
// output container's value type.
//
// The reason to use an intermediate vector of StringPiece is so we can learn
// the needed capacity of the output container. This is needed when the output
// container is a vector<string> in which case resizes can be expensive due to
// copying of the ::string objects.
//
// At some point in the future we might add a C++11 move constructor to
// ::string, in which case the vector resizes are much less expensive and the
// use of this intermediate vector "v" can be removed.
template <typename Container>
Container ToContainer() {
vector<StringPiece> v;
for (Iterator it = begin(); it != end_; ++it) {
v.push_back(*it);
}
typedef typename Container::value_type ToType;
internal::StringPieceTo<ToType> converter;
Container c;
ReserveCapacity(&c, v.size());
std::insert_iterator<Container> inserter(c, c.begin());
for (const auto& sp : v) {
*inserter++ = converter(sp);
}
return c;
}
// The algorithm is to insert a new pair into the map for each even-numbered
// item, with the even-numbered item as the key with a default-constructed
// value. Each odd-numbered item will then be assigned to the last pair's
// value.
template <typename Map>
Map ToMap() {
typedef typename Map::key_type Key;
typedef typename Map::mapped_type Data;
Map m;
StringPieceTo<Key> key_converter;
StringPieceTo<Data> val_converter;
typename Map::iterator curr_pair;
bool is_even = true;
for (Iterator it = begin(); it != end_; ++it) {
if (is_even) {
curr_pair = InsertInMap(std::make_pair(key_converter(*it), Data()), &m);
} else {
curr_pair->second = val_converter(*it);
}
is_even = !is_even;
}
return m;
}
// Returns a pair with its .first and .second members set to the first two
// strings returned by the begin() iterator. Either/both of .first and .second
// will be empty strings if the iterator doesn't have a corresponding value.
template <typename First, typename Second>
std::pair<First, Second> ToPair() {
StringPieceTo<First> first_converter;
StringPieceTo<Second> second_converter;
StringPiece first, second;
Iterator it = begin();
if (it != end()) {
first = *it;
if (++it != end()) {
second = *it;
}
}
return std::make_pair(first_converter(first), second_converter(second));
}
// Overloaded InsertInMap() function. The first overload is the commonly used
// one for most map-like objects. The second overload is a special case for
// multimap, because multimap's insert() member function directly returns an
// iterator, rather than a pair<iterator, bool> like map's.
template <typename Map>
typename Map::iterator InsertInMap(
const typename Map::value_type& value, Map* map) {
return map->insert(value).first;
}
// InsertInMap overload for multimap.
template <typename K, typename T, typename C, typename A>
typename std::multimap<K, T, C, A>::iterator InsertInMap(
const typename std::multimap<K, T, C, A>::value_type& value,
typename std::multimap<K, T, C, A>* map) {
return map->insert(value);
}
// Reserves the given amount of capacity in a vector<string>
template <typename A>
void ReserveCapacity(vector<string, A>* v, size_t size) {
v->reserve(size);
}
void ReserveCapacity(...) {}
const Iterator begin_;
const Iterator end_;
};
} // namespace internal
} // namespace strings
#endif // STRINGS_SPLIT_INTERNAL_H_