| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| #ifndef KUDU_UTIL_FASTSTRING_H |
| #define KUDU_UTIL_FASTSTRING_H |
| |
| #include <cstdint> |
| #include <cstring> |
| #include <string> |
| #include <utility> |
| |
| #include <glog/logging.h> |
| |
| #include "kudu/gutil/dynamic_annotations.h" |
| #include "kudu/gutil/macros.h" |
| #include "kudu/gutil/port.h" |
| #include "kudu/gutil/strings/fastmem.h" |
| |
| namespace kudu { |
| |
| // A faststring is similar to a std::string, except that it is faster for many |
| // common use cases (in particular, resize() will fill with uninitialized data |
| // instead of memsetting to \0) |
| class faststring { |
| public: |
| enum { |
| kInitialCapacity = 32 |
| }; |
| |
| faststring() : |
| data_(initial_data_), |
| len_(0), |
| capacity_(kInitialCapacity) { |
| } |
| |
| // Construct a string with the given capacity, in bytes. |
| explicit faststring(size_t capacity) |
| : data_(initial_data_), |
| len_(0), |
| capacity_(kInitialCapacity) { |
| if (capacity > capacity_) { |
| data_ = new uint8_t[capacity]; |
| capacity_ = capacity; |
| } |
| ASAN_POISON_MEMORY_REGION(data_, capacity_); |
| } |
| |
| faststring(faststring&& other) noexcept |
| : faststring() { |
| *this = std::move(other); |
| } |
| |
| faststring& operator=(faststring&& other) noexcept { |
| if (this == &other) return *this; |
| |
| if (other.data_ == other.initial_data_) { |
| assign_copy(other.data(), other.size()); |
| other.clear(); |
| } else { |
| len_ = other.len_; |
| capacity_ = other.capacity_; |
| data_ = other.release(); |
| } |
| return *this; |
| } |
| |
| ~faststring() { |
| ASAN_UNPOISON_MEMORY_REGION(initial_data_, arraysize(initial_data_)); |
| if (data_ != initial_data_) { |
| delete[] data_; |
| } |
| } |
| |
| // Reset the valid length of the string to 0. |
| // |
| // This does not free up any memory. The capacity of the string remains unchanged. |
| void clear() { |
| resize(0); |
| ASAN_POISON_MEMORY_REGION(data_, capacity_); |
| } |
| |
| // Resize the string to the given length. |
| // If the new length is larger than the old length, the capacity is expanded as necessary. |
| // |
| // NOTE: in contrast to std::string's implementation, Any newly "exposed" bytes of data are |
| // not cleared. |
| void resize(size_t newsize) { |
| if (newsize > capacity_) { |
| reserve(newsize); |
| } |
| len_ = newsize; |
| ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_); |
| ASAN_UNPOISON_MEMORY_REGION(data_, len_); |
| } |
| |
| // Resize to 'newsize'. In contrast to 'resize()', if this requires allocating a new |
| // backing array, the new capacity is rounded up in the same manner as if data had been |
| // appended to the buffer. |
| void resize_with_extra_capacity(size_t newsize) { |
| if (newsize > capacity_) { |
| GrowToAtLeast(newsize); |
| } |
| resize(newsize); |
| } |
| |
| // Releases the underlying array; after this, the buffer is left empty. |
| // |
| // NOTE: the data pointer returned by release() always points to dynamically |
| // allocated memory and the caller must be responsible for releasing it. |
| uint8_t *release() WARN_UNUSED_RESULT { |
| uint8_t *ret = data_; |
| if (ret == initial_data_) { |
| ret = new uint8_t[len_]; |
| memcpy(ret, data_, len_); |
| } |
| len_ = 0; |
| capacity_ = kInitialCapacity; |
| data_ = initial_data_; |
| ASAN_POISON_MEMORY_REGION(data_, capacity_); |
| return ret; |
| } |
| |
| // Reserve space for the given total amount of data. If the current capacity is already |
| // larger than the newly requested capacity, this is a no-op (i.e. it does not ever free memory). |
| // |
| // NOTE: even though the new capacity is reserved, it is illegal to begin writing into that memory |
| // directly using pointers. If ASAN is enabled, this is ensured using manual memory poisoning. |
| void reserve(size_t newcapacity) { |
| if (PREDICT_TRUE(newcapacity <= capacity_)) return; |
| GrowArray(newcapacity); |
| } |
| |
| // Append the given data to the string, resizing capacity as necessary. |
| void append(const void *src_v, size_t count) { |
| const uint8_t *src = reinterpret_cast<const uint8_t *>(src_v); |
| EnsureRoomForAppend(count); |
| ASAN_UNPOISON_MEMORY_REGION(data_ + len_, count); |
| |
| // appending short values is common enough that this |
| // actually helps, according to benchmarks. In theory |
| // memcpy_inlined should already be just as good, but this |
| // was ~20% faster for reading a large prefix-coded string file |
| // where each string was only a few chars different |
| if (count <= 4) { |
| uint8_t *p = &data_[len_]; |
| for (int i = 0; i < count; i++) { |
| *p++ = *src++; |
| } |
| } else { |
| strings::memcpy_inlined(&data_[len_], src, count); |
| } |
| len_ += count; |
| } |
| |
| // Append the given string to this string. |
| void append(const std::string &str) { |
| append(str.data(), str.size()); |
| } |
| |
| // Append the given character to this string. |
| void push_back(const char byte) { |
| EnsureRoomForAppend(1); |
| ASAN_UNPOISON_MEMORY_REGION(data_ + len_, 1); |
| data_[len_] = byte; |
| len_++; |
| } |
| |
| // Return the valid length of this string. |
| size_t length() const { |
| return len_; |
| } |
| |
| // Return the valid length of this string (identical to length()) |
| size_t size() const { |
| return len_; |
| } |
| |
| // Return the allocated capacity of this string. |
| size_t capacity() const { |
| return capacity_; |
| } |
| |
| // Return a pointer to the data in this string. Note that this pointer |
| // may be invalidated by any later non-const operation. |
| const uint8_t *data() const { |
| return &data_[0]; |
| } |
| |
| // Return a pointer to the data in this string. Note that this pointer |
| // may be invalidated by any later non-const operation. |
| uint8_t *data() { |
| return &data_[0]; |
| } |
| |
| // Return the given element of this string. Note that this does not perform |
| // any bounds checking. |
| const uint8_t &at(size_t i) const { |
| return data_[i]; |
| } |
| |
| // Return the given element of this string. Note that this does not perform |
| // any bounds checking. |
| const uint8_t &operator[](size_t i) const { |
| return data_[i]; |
| } |
| |
| // Return the given element of this string. Note that this does not perform |
| // any bounds checking. |
| uint8_t &operator[](size_t i) { |
| return data_[i]; |
| } |
| |
| // Reset the contents of this string by copying 'len' bytes from 'src'. |
| void assign_copy(const uint8_t *src, size_t len) { |
| // Reset length so that the first resize doesn't need to copy the current |
| // contents of the array. |
| len_ = 0; |
| resize(len); |
| memcpy(data(), src, len); |
| } |
| |
| // Reset the contents of this string by copying from the given std::string. |
| void assign_copy(const std::string &str) { |
| assign_copy(reinterpret_cast<const uint8_t *>(str.c_str()), |
| str.size()); |
| } |
| |
| // Reallocates the internal storage to fit only the current data. |
| // |
| // This may revert to using internal storage if the current length is shorter than |
| // kInitialCapacity. In that case, after this call, capacity() will go down to |
| // kInitialCapacity. |
| // |
| // Any pointers within this instance may be invalidated. |
| void shrink_to_fit() { |
| if (data_ == initial_data_ || capacity_ == len_) return; |
| ShrinkToFitInternal(); |
| } |
| |
| // Return a copy of this string as a std::string. |
| std::string ToString() const { |
| return std::string(reinterpret_cast<const char *>(data()), |
| len_); |
| } |
| |
| // Check various internal invariants. Used by tests. |
| void CheckInvariants() { |
| CHECK_LE(len_, capacity_); |
| if (data_ == initial_data_) { |
| CHECK_EQ(capacity_, kInitialCapacity); |
| } |
| } |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(faststring); |
| |
| // If necessary, expand the buffer to fit at least 'count' more bytes. |
| // If the array has to be grown, it is grown by at least 50%. |
| void EnsureRoomForAppend(size_t count) { |
| if (PREDICT_TRUE(len_ + count <= capacity_)) { |
| return; |
| } |
| |
| // Call the non-inline slow path - this reduces the number of instructions |
| // on the hot path. |
| GrowToAtLeast(len_ + count); |
| } |
| |
| // The slow path of EnsureRoomForAppend. Grows the buffer by either |
| // 'count' bytes, or 50%, whichever is more. |
| void GrowToAtLeast(size_t newcapacity); |
| |
| // Grow the array to the given capacity, which must be more than |
| // the current capacity. |
| void GrowArray(size_t newcapacity); |
| |
| void ShrinkToFitInternal(); |
| |
| uint8_t* data_; |
| uint8_t initial_data_[kInitialCapacity]; |
| size_t len_; |
| // NOTE: we will make a initial buffer as part of the object, so the smallest |
| // possible value of capacity_ is kInitialCapacity. |
| size_t capacity_; |
| }; |
| |
| } // namespace kudu |
| |
| #endif |