blob: 36818f6a2605455a30985e172fd4e4501a8d43ec [file] [log] [blame]
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
#ifndef PAGESPEED_KERNEL_BASE_VECTOR_DEQUE_H_
#define PAGESPEED_KERNEL_BASE_VECTOR_DEQUE_H_
#include <cstddef> // for size_t
#include <cstring> // for memcpy
#include "pagespeed/kernel/base/basictypes.h"
namespace net_instaweb {
// Simple implementation of deque using a vector which we double in
// capacity whenever we need to make room. This alternative to
// std::deque is perhaps a little more fragmentious to memory
// allocators, but will frequently allocate much less overall memory.
//
// In particular, I found, using top, that std::deque allocates 688
// bytes to construct a deque containing 4 pointers on a 64-bit
// system. In this implementation the cost is 64 bytes plus malloc
// overhead: 3 size_t integers and a pointer to an allocated array,
// plus the the 4 pointers in the allocated array.
//
// This implementation lacks iterators, many std::deque methods, and the
// ability to work with value-semantics for the contained object. These
// could all be added without changing the design.
//
// Please do not instantiate this class with an object that cannot be copied
// with memcpy. Pointers, integers, and floats are fine, as well as simple
// structs of those.
template<class T> class VectorDeque {
public:
// Constructor provides a small initial allocation, rather than constructing
// with zero capacity, based on expected usage patterns.
VectorDeque()
: start_position_(0),
size_minus_1_(static_cast<size_t>(-1)),
capacity_minus_1_(initial_capacity() - 1),
data_(new T[initial_capacity()]) {
}
~VectorDeque() {
delete [] data_;
data_ = NULL;
}
static size_t initial_capacity() { return 4; }
void push_back(T value) {
ExpandIfNecessary();
++size_minus_1_;
*PointerAt(size_minus_1_) = value;
}
// Special faster versions of PointerAt(0) that avoid some math. This
// seems like a clear small win, however, microbenchmarking suggests it's
// a significant performance loss. Investigation is required.
# define SPECIAL_CASE_POINTER_AT_0 1
void push_front(T value) {
ExpandIfNecessary();
start_position_ = ModCapacity(start_position_ - 1);
# if SPECIAL_CASE_POINTER_AT_0
*PointerAt0() = value;
# else
*PointerAt(0) = value;
# endif
++size_minus_1_;
}
void pop_back() {
--size_minus_1_;
}
void pop_front() {
start_position_ = ModCapacity(start_position_ + 1);
--size_minus_1_;
}
T back() const {
return *PointerAt(size_minus_1_);
}
T front() const {
# if SPECIAL_CASE_POINTER_AT_0
return *PointerAt0();
# else
return *PointerAt(0);
# endif
}
size_t capacity() const { return capacity_minus_1_ + 1; }
size_t size() const { return size_minus_1_ + 1; }
bool empty() const { return size_minus_1_ == static_cast<size_t>(-1); }
private:
// Benchmarking shows that index & (capacity - 1) is significantly
// faster than (index % capacity) on a Dell T3500,
// Intel Xeon(R) CPU X5650 @ 2.67GHz. Further, we know that capacity
// is always a power of 2.
size_t ModCapacity(size_t index) const { return index & capacity_minus_1_; }
// Returns a pointer to the element at the specified position.
T* PointerAt(size_t position) {
return data_ + ModCapacity(start_position_ + position);
}
const T* PointerAt(size_t position) const {
return data_ + ModCapacity(start_position_ + position);
}
# if SPECIAL_CASE_POINTER_AT_0
T* PointerAt0() {
return data_ + start_position_;
}
const T* PointerAt0() const {
return data_ + start_position_;
}
# endif
# undef SPECIAL_CASE_POINTER_AT_0
// Expands the deque to accommodate pushing an element onto the front
// or back.
void ExpandIfNecessary() {
// TODO(jmarantz): consider shrinking if the size goes way down.
if (size_minus_1_ == capacity_minus_1_) {
// Consider a deque with:
// start_position_ == 5
// size() == 7
// capacity() == 8
// logical order: [ 0 1 2 3 4 5 6 ]
// physical order: [ 3 4 5 6 _ 0 1 2 ]
// The first time we push, either to the beginning or end, we don't
// need to expand. Let's say we push -1 to the beginning; we'll
// decrement start_position to 4 and have:
// start_position_ == 4
// size() == 8
// capacity() == 8
// logical order: [ -1 0 1 2 3 4 5 6 ]
// physical order: [ 3 4 5 6 -1 0 1 2 ]
// Now we will need to expand before we push another element, getting:
// start_position_ == 12
// size() == 8
// capacity() == 16
// logical order: [ -1 0 1 2 3 4 5 6 ]
// physical order: [ 3 4 5 6 _ _ _ _ _ _ _ _ -1 0 1 2 ]
// Now we are ready to insert 8 more elements into the gap, whether they
// are pushed to the back or front.
capacity_minus_1_ = 2 * capacity() - 1;
T* old_data = data_;
data_ = new T[capacity()];
size_t sz = size();
if (start_position_ == 0) {
memcpy(data_, old_data, sz * sizeof(*data_));
} else {
// TODO(jmarantz): jmaessen suggests rearranging the entries to
// be contiguous on an expansion.
memcpy(data_, old_data, start_position_ * sizeof(*data_));
size_t size_of_right_chunk = sz - start_position_;
size_t new_start_position = start_position_ + sz;
memcpy(data_ + new_start_position,
old_data + start_position_,
size_of_right_chunk * sizeof(*data_));
start_position_ = new_start_position;
}
delete [] old_data;
}
}
size_t start_position_;
size_t size_minus_1_;
size_t capacity_minus_1_; // capacity is constrained to a power of 2.
T* data_;
DISALLOW_COPY_AND_ASSIGN(VectorDeque);
};
} // namespace net_instaweb
#endif // PAGESPEED_KERNEL_BASE_VECTOR_DEQUE_H_