blob: 3d81058c0c74c3405477f6aeb4c0fddb5566c085 [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#include "net/instaweb/util/public/string_buffer.h"
#include <algorithm> // for std::min
#include "base/logging.h"
#include "net/instaweb/util/public/writer.h"
namespace net_instaweb {
// Our strategy is to reduce memory fragmentation when accumulating
// large amounts of resource content by appending new bytes into
// new strings in a vector, rather than resizing the existing strings.
//
// Consider a multi-megabyte image. We'll read it into our system,
// most likely, via a low-level call to read() with a buffer that's
// on the order of 10k. (see net/instaweb/util/stack_buffer.h).
// As we accumulate 100 of these, we don't want to keep appending
// them onto one std::string, requiring megabytes of contiguous
// memory.
//
// However, if a caller is accumulating a few bytes at a time, it
// seems likely that the vector would grow very large, so let's
// grow each element until it hits about 1k. 1000 is picked
// rather than 1024 to leave room below a power-of-two for malloc
// overhead.
//
// Another option would be to have the strings be the same size.
// The advantage of that would be random-access and some code
// simplifications. The disadvantage is that small StringBuffers
// would take more memory. Another idea would be to have all but
// the back() string* be the same size.
size_t StringBuffer::kMinStringSize = 1000;
size_t StringBuffer::kReadBufferSize = 32000;
size_t StringBuffer::npos = std::string::npos;
void StringBuffer::Clear() {
for (int i = 0, n = strings_.size(); i < n; ++i) {
delete strings_[i];
}
strings_.resize(0);
size_ = 0;
}
void StringBuffer::Append(const StringPiece& string_piece) {
// If the last string we appended is a reasonable size,
// or this is a new buffer, then append a new string.
// TODO(jmarantz): this behavior should be observed and tweaked.
size_t piece_size = string_piece.size();
size_t back_size = strings_.empty() ? 0 : strings_.back()->size();
if (strings_.empty() ||
(back_size >= kMinStringSize) ||
(piece_size >= kMinStringSize)) {
strings_.push_back(new std::string(string_piece.data(), piece_size));
} else {
strings_.back()->append(string_piece.data(), piece_size);
}
size_ += piece_size;
}
bool StringBuffer::Write(Writer* writer, MessageHandler* message_handler)
const {
bool ret = true;
for (int i = 0, n = strings_.size(); ret && (i < n); ++i) {
ret = writer->Write(*strings_[i], message_handler);
}
return ret;
}
std::string StringBuffer::ToString() const {
std::string buffer;
buffer.reserve(size_);
for (int i = 0, n = strings_.size(); i < n; ++i) {
buffer.append(*strings_[i]);
}
return buffer;
}
bool StringBuffer::operator==(const StringBuffer& that) const {
if (size_ != that.size_) {
return false;
}
size_t this_vector_index = 0, this_char_index = 0;
size_t that_vector_index = 0, that_char_index = 0;
size_t compare_size = 0;
for (int char_index = 0; char_index < size_; char_index += compare_size) {
const std::string& this_string = *strings_[this_vector_index];
const std::string& that_string = *that.strings_[that_vector_index];
int this_remaining = this_string.size() - this_char_index;
int that_remaining = that_string.size() - that_char_index;
compare_size = std::min(this_remaining, that_remaining);
StringPiece this_piece(this_string.data() + this_char_index, compare_size);
StringPiece that_piece(that_string.data() + that_char_index, compare_size);
if (this_piece != that_piece) {
return false;
}
this_char_index += compare_size;
that_char_index += compare_size;
if (this_char_index == this_string.size()) {
this_char_index = 0;
++this_vector_index;
}
if (that_char_index == that_string.size()) {
that_char_index = 0;
++that_vector_index;
}
}
return true;
}
void StringBuffer::CopyFrom(const StringBuffer& src) {
Clear();
for (int i = 0, n = src.strings_.size(); i < n; ++i) {
strings_.push_back(new std::string(*src.strings_[i]));
}
size_ = src.size_;
}
char* StringBuffer::AllocReadBuffer() {
std::string* buffer = new std::string;
buffer->resize(kReadBufferSize);
strings_.push_back(buffer);
// buffer->data() is a const method, and may return a
// pointer to shared storage, however, taking the address
// of a the first character cannot: the string implementation
// must assume that the returned non-const char*& will be written.
return &((*buffer)[0]);
}
void StringBuffer::CommitReadBuffer(char* read_buffer, int size) {
std::string* buffer = strings_.back();
CHECK(buffer->data() == read_buffer); // pointer comparison
CHECK(size <= static_cast<int>(kReadBufferSize));
buffer->resize(size);
size_ += size;
}
void StringBuffer::AbandonReadBuffer(char* read_buffer) {
std::string* buffer = strings_.back();
CHECK(buffer->data() == read_buffer); // pointer comparison
delete buffer;
strings_.pop_back();
}
std::string StringBuffer::SubString(size_t pos, size_t size) const {
CHECK(pos <= static_cast<size_t>(size_));
if ((size == npos) || (size + pos > static_cast<size_t>(size_))) {
size = size_ - pos;
}
std::string ret;
ret.reserve(size);
// TODO(jmarantz): could binary_search here if we want this to go faster,
// or switch to fixed size buffers, which will also make file-reading faster.
size_t char_index = 0;
for (size_t i = 0; (i < strings_.size()) && (size > 0); ++i) {
std::string* str = strings_[i];
if ((pos >= char_index) && (pos < (char_index + str->size()))) {
size_t pos_in_string = pos - char_index;
size_t copy_size = std::min(size, str->size() - pos_in_string);
ret.append(str->data() + pos_in_string, copy_size);
pos += copy_size;
size -= copy_size;
}
char_index += str->size();
}
return ret;
}
} // namespace net_instaweb