| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // This file is copied from |
| // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/PODArray.h |
| // and modified by Doris |
| |
| #pragma once |
| |
| #include <common/compiler_util.h> |
| #include <stdint.h> |
| #include <string.h> |
| #include <sys/types.h> |
| |
| #include <algorithm> |
| #include <boost/core/noncopyable.hpp> |
| #include <cassert> |
| #include <cstddef> |
| #include <initializer_list> |
| #include <utility> |
| |
| #include "common/compiler_util.h" // IWYU pragma: keep |
| #include "runtime/thread_context.h" |
| #include "vec/common/allocator.h" // IWYU pragma: keep |
| #include "vec/common/memcpy_small.h" |
| |
| #ifndef NDEBUG |
| #include <sys/mman.h> |
| #endif |
| |
| #include "vec/common/pod_array_fwd.h" |
| |
| namespace doris::vectorized { |
| #include "common/compile_check_avoid_begin.h" |
| /** For zero argument, result is zero. |
| * For arguments with most significand bit set, result is zero. |
| * For other arguments, returns value, rounded up to power of two. |
| */ |
| inline size_t round_up_to_power_of_two_or_zero(size_t n) { |
| --n; |
| n |= n >> 1; |
| n |= n >> 2; |
| n |= n >> 4; |
| n |= n >> 8; |
| n |= n >> 16; |
| n |= n >> 32; |
| ++n; |
| |
| return n; |
| } |
| |
| /** A dynamic array for POD types. |
| * Designed for a small number of large arrays (rather than a lot of small ones). |
| * To be more precise - for use in ColumnVector. |
| * It differs from std::vector in that it does not initialize the elements. |
| * |
| * Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method. |
| * |
| * Only part of the std::vector interface is supported. |
| * |
| * The default constructor creates an empty object that does not allocate memory. |
| * Then the memory is allocated at least initial_bytes bytes. |
| * |
| * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector. |
| * |
| * The template parameter `pad_right` - always allocate at the end of the array as many unused bytes. |
| * Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions. |
| * |
| * The template parameter `pad_left` - always allocate memory before 0th element of the array (rounded up to the whole number of elements) |
| * and zero initialize -1th element. It allows to use -1th element that will have value 0. |
| * This gives performance benefits when converting an array of offsets to array of sizes. |
| * |
| * If reserve 4096 bytes, used 512 bytes, pad_left = 16, pad_right = 15, the structure of PODArray is as follows: |
| * |
| * 16 bytes 512 bytes 3553 bytes 15 bytes |
| * pad_left ----- c_start -------------c_end ---------------------------- c_end_of_storage ------------- pad_right |
| * ^ ^ ^ |
| * | | | |
| * | c_res_mem (usually > c_end && < c_end + PRE_GROWTH_SIZE) | |
| * | | |
| * +-------------------------------------- allocated_bytes (4096 bytes) -----------------------------------+ |
| * |
| * Some methods using allocator have TAllocatorParams variadic arguments. |
| * These arguments will be passed to corresponding methods of TAllocator. |
| * Example: pointer to Arena, that is used for allocations. |
| * |
| * Why Allocator is not passed through constructor, as it is done in C++ standard library? |
| * Because sometimes we have many small objects, that share same allocator with same parameters, |
| * and we must avoid larger object size due to storing the same parameters in each object. |
| * This is required for states of aggregate functions. |
| * |
| * PODArray does not have memset 0 when allocating memory, therefore, the query mem tracker is virtual memory, |
| * which will cause the query memory statistics to be higher than the actual physical memory. |
| * |
| * TODO Pass alignment to Allocator. |
| * TODO Allow greater alignment than alignof(T). Example: array of char aligned to page size. |
| */ |
| static constexpr size_t EmptyPODArraySize = 1024; |
| static constexpr size_t PRE_GROWTH_SIZE = (1ULL << 20); // 1M |
| extern const char empty_pod_array[EmptyPODArraySize]; |
| |
| /** Base class that depend only on size of element, not on element itself. |
| * You can static_cast to this class if you want to insert some data regardless to the actual type T. |
| */ |
| template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_, |
| size_t pad_left_> |
| class PODArrayBase : private boost::noncopyable, |
| private TAllocator /// empty base optimization |
| { |
| protected: |
| /// Round padding up to an whole number of elements to simplify arithmetic. |
| static constexpr size_t pad_right = integerRoundUp(pad_right_, ELEMENT_SIZE); |
| /// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory. |
| static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16); |
| /// Empty array will point to this static memory as padding. |
| static constexpr char* null = const_cast<char*>(empty_pod_array) + pad_left; |
| |
| static_assert(pad_left <= EmptyPODArraySize && |
| "Left Padding exceeds EmptyPODArraySize. Is the element size too large?"); |
| |
| char* c_start = null; /// Does not include pad_left. |
| char* c_end = null; |
| char* c_end_of_storage = null; /// Does not include pad_right. |
| char* c_res_mem = null; |
| |
| /// The amount of memory occupied by the num_elements of the elements. |
| static size_t byte_size(size_t num_elements) { |
| #ifndef NDEBUG |
| size_t amount; |
| if (__builtin_mul_overflow(num_elements, ELEMENT_SIZE, &amount)) { |
| DCHECK(false) |
| << "Amount of memory requested to allocate is more than allowed, num_elements " |
| << num_elements << ", ELEMENT_SIZE " << ELEMENT_SIZE; |
| } |
| return amount; |
| #else |
| return num_elements * ELEMENT_SIZE; |
| #endif |
| } |
| |
| /// Minimum amount of memory to allocate for num_elements, including padding. |
| static size_t minimum_memory_for_elements(size_t num_elements) { |
| return byte_size(num_elements) + pad_right + pad_left; |
| } |
| |
| inline void check_memory(int64_t size) { |
| std::string err_msg; |
| if (TAllocator::sys_memory_exceed(size, &err_msg) || |
| TAllocator::memory_tracker_exceed(size, &err_msg)) { |
| err_msg = fmt::format("PODArray reserve memory failed, {}.", err_msg); |
| if (doris::enable_thread_catch_bad_alloc) { |
| LOG(WARNING) << err_msg; |
| throw doris::Exception(doris::ErrorCode::MEM_ALLOC_FAILED, err_msg); |
| } else { |
| LOG_EVERY_N(WARNING, 1024) << err_msg; |
| } |
| } |
| } |
| |
| inline void reset_resident_memory(const char* c_end_new) { |
| static_assert(!TAllocator::need_check_and_tracking_memory(), |
| "TAllocator should specify `NoTrackingDefaultMemoryAllocator`"); |
| if (UNLIKELY(c_end_new - c_res_mem > 0)) { |
| // - allocated_bytes = c_end_of_storage - c_start = 4 MB; |
| // - used_bytes = c_end_new - c_start = 2.1 MB; |
| // - last tracking_res_memory = c_res_mem - c_start = 1 MB; |
| // - res_mem_growth = min(allocated_bytes, integerRoundUp(used_bytes)) - last_tracking_res_memory = 3 - 1 = 2 MB; |
| // - update tracking_res_memory = 1 + 2 = 3 MB; |
| // so after each reset_resident_memory, tracking_res_memory >= used_bytes; |
| int64_t res_mem_growth = |
| std::min(static_cast<size_t>(c_end_of_storage - c_start), |
| integerRoundUp(c_end_new - c_start, PRE_GROWTH_SIZE)) - |
| (c_res_mem - c_start); |
| check_memory(res_mem_growth); |
| CONSUME_THREAD_MEM_TRACKER(res_mem_growth); |
| c_res_mem = c_res_mem + res_mem_growth; |
| } |
| } |
| |
| inline void reset_resident_memory() { reset_resident_memory(c_end); } |
| |
| void alloc_for_num_elements(size_t num_elements) { |
| alloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(num_elements))); |
| } |
| |
| template <typename... TAllocatorParams> |
| void alloc(size_t bytes, TAllocatorParams&&... allocator_params) { |
| char* allocated = reinterpret_cast<char*>( |
| TAllocator::alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...)); |
| |
| c_start = allocated + pad_left; |
| c_end = c_start; |
| c_res_mem = c_start; |
| c_end_of_storage = allocated + bytes - pad_right; |
| |
| if (pad_left) memset(c_start - ELEMENT_SIZE, 0, ELEMENT_SIZE); |
| } |
| |
| void dealloc() { |
| if (c_start == null) return; |
| unprotect(); |
| RELEASE_THREAD_MEM_TRACKER((c_res_mem - c_start)); |
| TAllocator::free(c_start - pad_left, allocated_bytes()); |
| } |
| |
| template <typename... TAllocatorParams> |
| void realloc(size_t bytes, TAllocatorParams&&... allocator_params) { |
| if (c_start == null) { |
| alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...); |
| return; |
| } |
| |
| unprotect(); |
| |
| ptrdiff_t end_diff = c_end - c_start; |
| ptrdiff_t res_mem_diff = c_res_mem - c_start; |
| |
| // Realloc can do 2 possible things: |
| // - expand existing memory region |
| // - allocate new memory block and free the old one |
| // Because we don't know which option will be picked we need to make sure there is enough |
| // memory for all options. |
| check_memory(res_mem_diff); |
| char* allocated = reinterpret_cast<char*>( |
| TAllocator::realloc(c_start - pad_left, allocated_bytes(), bytes, |
| std::forward<TAllocatorParams>(allocator_params)...)); |
| |
| c_start = allocated + pad_left; |
| c_end = c_start + end_diff; |
| c_res_mem = c_start + res_mem_diff; |
| c_end_of_storage = allocated + bytes - pad_right; |
| } |
| |
| bool is_initialized() const { |
| return (c_start != null) && (c_end != null) && (c_end_of_storage != null); |
| } |
| |
| bool is_allocated_from_stack() const { |
| constexpr size_t stack_threshold = TAllocator::get_stack_threshold(); |
| return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold); |
| } |
| |
| template <typename... TAllocatorParams> |
| void reserve_for_next_size(TAllocatorParams&&... allocator_params) { |
| if (size() == 0) { |
| // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise, |
| // memory issue such as corruption could appear in edge case. |
| realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), |
| minimum_memory_for_elements(1)), |
| std::forward<TAllocatorParams>(allocator_params)...); |
| } else |
| realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...); |
| } |
| |
| #ifndef NDEBUG |
| /// Make memory region readonly with mprotect if it is large enough. |
| /// The operation is slow and performed only for debug builds. |
| void protect_impl(int prot) { |
| static constexpr size_t PROTECT_PAGE_SIZE = 4096; |
| |
| char* left_rounded_up = reinterpret_cast<char*>( |
| (reinterpret_cast<intptr_t>(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) / |
| PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE); |
| char* right_rounded_down = |
| reinterpret_cast<char*>((reinterpret_cast<intptr_t>(c_end_of_storage) + pad_right) / |
| PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE); |
| |
| if (right_rounded_down > left_rounded_up) { |
| size_t length = right_rounded_down - left_rounded_up; |
| if (0 != mprotect(left_rounded_up, length, prot)) throw std::exception(); |
| } |
| } |
| |
| /// Restore memory protection in destructor or realloc for further reuse by allocator. |
| bool mprotected = false; |
| #endif |
| |
| public: |
| bool empty() const { return c_end == c_start; } |
| size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; } |
| size_t capacity() const { return (c_end_of_storage - c_start) / ELEMENT_SIZE; } |
| |
| /// This method is safe to use only for information about memory usage. |
| size_t allocated_bytes() const { |
| if (c_end_of_storage == null) { |
| return 0; |
| } |
| return c_end_of_storage - c_start + pad_right + pad_left; |
| } |
| |
| void clear() { c_end = c_start; } |
| |
| template <typename... TAllocatorParams> |
| void reserve(size_t n, TAllocatorParams&&... allocator_params) { |
| if (n > capacity()) |
| realloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(n)), |
| std::forward<TAllocatorParams>(allocator_params)...); |
| } |
| |
| template <typename... TAllocatorParams> |
| void resize(size_t n, TAllocatorParams&&... allocator_params) { |
| reserve(n, std::forward<TAllocatorParams>(allocator_params)...); |
| resize_assume_reserved(n); |
| } |
| |
| void resize_assume_reserved(const size_t n) { |
| c_end = c_start + byte_size(n); |
| reset_resident_memory(); |
| } |
| |
| const char* raw_data() const { return c_start; } |
| |
| template <typename... TAllocatorParams> |
| void push_back_raw(const char* ptr, TAllocatorParams&&... allocator_params) { |
| if (UNLIKELY(c_end == c_end_of_storage)) |
| reserve_for_next_size(std::forward<TAllocatorParams>(allocator_params)...); |
| |
| reset_resident_memory(c_end + byte_size(1)); |
| memcpy(c_end, ptr, ELEMENT_SIZE); |
| c_end += byte_size(1); |
| } |
| |
| void protect() { |
| #ifndef NDEBUG |
| protect_impl(PROT_READ); |
| mprotected = true; |
| #endif |
| } |
| |
| void unprotect() { |
| #ifndef NDEBUG |
| if (mprotected) protect_impl(PROT_WRITE); |
| mprotected = false; |
| #endif |
| } |
| |
| template <typename It1, typename It2> |
| void assert_not_intersects(It1 from_begin [[maybe_unused]], It2 from_end [[maybe_unused]]) { |
| #ifndef NDEBUG |
| const char* ptr_begin = reinterpret_cast<const char*>(&*from_begin); |
| const char* ptr_end = reinterpret_cast<const char*>(&*from_end); |
| |
| /// Also it's safe if the range is empty. |
| assert(!((ptr_begin >= c_start && ptr_begin < c_end) || |
| (ptr_end > c_start && ptr_end <= c_end)) || |
| (ptr_begin == ptr_end)); |
| #endif |
| } |
| |
| ~PODArrayBase() { dealloc(); } |
| }; |
| |
| template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, |
| size_t pad_left_> |
| class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_> { |
| protected: |
| using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>; |
| |
| T* t_start() { return reinterpret_cast<T*>(this->c_start); } |
| T* t_end() { return reinterpret_cast<T*>(this->c_end); } |
| |
| const T* t_start() const { return reinterpret_cast<const T*>(this->c_start); } |
| const T* t_end() const { return reinterpret_cast<const T*>(this->c_end); } |
| |
| public: |
| using value_type = T; |
| |
| /// We cannot use boost::iterator_adaptor, because it defeats loop vectorization, |
| /// see https://github.com/ClickHouse/ClickHouse/pull/9442 |
| |
| using iterator = T*; |
| using const_iterator = const T*; |
| |
| PODArray() = default; |
| |
| PODArray(size_t n) { |
| this->alloc_for_num_elements(n); |
| this->c_end += this->byte_size(n); |
| this->reset_resident_memory(); |
| } |
| |
| PODArray(size_t n, const T& x) { |
| this->alloc_for_num_elements(n); |
| assign(n, x); |
| } |
| |
| PODArray(const_iterator from_begin, const_iterator from_end) { |
| this->alloc_for_num_elements(from_end - from_begin); |
| insert(from_begin, from_end); |
| } |
| |
| PODArray(std::initializer_list<T> il) : PODArray(std::begin(il), std::end(il)) {} |
| |
| PODArray(PODArray&& other) { this->swap(other); } |
| |
| PODArray& operator=(PODArray&& other) { |
| this->swap(other); |
| return *this; |
| } |
| |
| T* data() { return t_start(); } |
| const T* data() const { return t_start(); } |
| |
| /// The index is signed to access -1th element without pointer overflow. |
| T& operator[](ssize_t n) { |
| /// <= size, because taking address of one element past memory range is Ok in C++ (expression like &arr[arr.size()] is perfectly valid). |
| DCHECK_GE(n, (static_cast<ssize_t>(pad_left_) ? -1 : 0)); |
| DCHECK_LE(n, static_cast<ssize_t>(this->size())); |
| return t_start()[n]; |
| } |
| |
| const T& operator[](ssize_t n) const { |
| DCHECK_GE(n, (static_cast<ssize_t>(pad_left_) ? -1 : 0)); |
| DCHECK_LE(n, static_cast<ssize_t>(this->size())); |
| return t_start()[n]; |
| } |
| |
| T& front() { return t_start()[0]; } |
| T& back() { return t_end()[-1]; } |
| const T& front() const { return t_start()[0]; } |
| const T& back() const { return t_end()[-1]; } |
| |
| iterator begin() { return t_start(); } |
| iterator end() { return t_end(); } |
| const_iterator begin() const { return t_start(); } |
| const_iterator end() const { return t_end(); } |
| const_iterator cbegin() const { return t_start(); } |
| const_iterator cend() const { return t_end(); } |
| |
| void* get_end_ptr() const { return this->c_end; } |
| void set_end_ptr(void* ptr) { |
| this->c_end = (char*)ptr; |
| this->reset_resident_memory(); |
| } |
| |
| /// Same as resize, but zeroes new elements. |
| void resize_fill(size_t n) { |
| size_t old_size = this->size(); |
| const auto new_size = this->byte_size(n); |
| if (n > old_size) { |
| this->reserve(n); |
| this->reset_resident_memory(this->c_start + new_size); |
| memset(this->c_end, 0, this->byte_size(n - old_size)); |
| } else { |
| this->reset_resident_memory(this->c_start + new_size); |
| } |
| this->c_end = this->c_start + new_size; |
| } |
| |
| /// reset the array capacity |
| /// fill the new additional elements using the value |
| void resize_fill(size_t n, const T& value) { |
| size_t old_size = this->size(); |
| const auto new_size = this->byte_size(n); |
| if (n > old_size) { |
| this->reserve(n); |
| this->reset_resident_memory(this->c_start + new_size); |
| std::fill(t_end(), t_end() + n - old_size, value); |
| } else { |
| this->reset_resident_memory(this->c_start + new_size); |
| } |
| this->c_end = this->c_start + new_size; |
| } |
| |
| template <typename U, typename... TAllocatorParams> |
| void push_back(U&& x, TAllocatorParams&&... allocator_params) { |
| if (UNLIKELY(this->c_end + sizeof(T) > this->c_end_of_storage)) { |
| this->reserve_for_next_size(std::forward<TAllocatorParams>(allocator_params)...); |
| } |
| |
| this->reset_resident_memory(this->c_end + this->byte_size(1)); |
| new (t_end()) T(std::forward<U>(x)); |
| this->c_end += this->byte_size(1); |
| } |
| |
| /** |
| * you must make sure to reserve podarray before calling this method |
| * remove branch if can improve performance |
| */ |
| template <typename U, typename... TAllocatorParams> |
| void push_back_without_reserve(U&& x, TAllocatorParams&&... allocator_params) { |
| this->reset_resident_memory(this->c_end + this->byte_size(1)); |
| new (t_end()) T(std::forward<U>(x)); |
| this->c_end += this->byte_size(1); |
| } |
| |
| /** This method doesn't allow to pass parameters for Allocator, |
| * and it couldn't be used if Allocator requires custom parameters. |
| */ |
| template <typename... Args> |
| void emplace_back(Args&&... args) { |
| if (UNLIKELY(this->c_end + sizeof(T) > this->c_end_of_storage)) { |
| this->reserve_for_next_size(); |
| } |
| |
| this->reset_resident_memory(this->c_end + this->byte_size(1)); |
| new (t_end()) T(std::forward<Args>(args)...); |
| this->c_end += this->byte_size(1); |
| } |
| |
| void pop_back() { this->c_end -= this->byte_size(1); } |
| |
| /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated. |
| template <typename It1, typename It2, typename... TAllocatorParams> |
| void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) { |
| this->assert_not_intersects(from_begin, from_end); |
| size_t required_capacity = this->size() + (from_end - from_begin); |
| if (required_capacity > this->capacity()) |
| this->reserve(round_up_to_power_of_two_or_zero(required_capacity), |
| std::forward<TAllocatorParams>(allocator_params)...); |
| } |
| |
| /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated. |
| template <typename It1, typename It2, typename... TAllocatorParams> |
| void insert(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) { |
| insert_prepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...); |
| |
| insert_assume_reserved(from_begin, from_end); |
| } |
| |
| /// Works under assumption, that it's possible to read up to 15 excessive bytes after `from_end` and this PODArray is padded. |
| template <typename It1, typename It2, typename... TAllocatorParams> |
| void insert_small_allow_read_write_overflow15(It1 from_begin, It2 from_end, |
| TAllocatorParams&&... allocator_params) { |
| static_assert(pad_right_ >= 15); |
| insert_prepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...); |
| size_t bytes_to_copy = this->byte_size(from_end - from_begin); |
| this->reset_resident_memory(this->c_end + bytes_to_copy); |
| memcpy_small_allow_read_write_overflow15( |
| this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy); |
| this->c_end += bytes_to_copy; |
| } |
| |
| template <typename It1, typename It2> |
| void insert(iterator it, It1 from_begin, It2 from_end) { |
| size_t bytes_to_copy = this->byte_size(from_end - from_begin); |
| if (!bytes_to_copy) { |
| return; |
| } |
| size_t bytes_to_move = this->byte_size(end() - it); |
| insert_prepare(from_begin, from_end); |
| this->reset_resident_memory(this->c_end + bytes_to_copy); |
| |
| if (UNLIKELY(bytes_to_move)) { |
| memmove(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move, |
| bytes_to_move); |
| } |
| |
| memcpy(this->c_end - bytes_to_move, reinterpret_cast<const void*>(&*from_begin), |
| bytes_to_copy); |
| this->c_end += bytes_to_copy; |
| } |
| |
| template <typename It1, typename It2> |
| void insert_assume_reserved(It1 from_begin, It2 from_end) { |
| this->assert_not_intersects(from_begin, from_end); |
| size_t bytes_to_copy = this->byte_size(from_end - from_begin); |
| this->reset_resident_memory(this->c_end + bytes_to_copy); |
| memcpy(this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy); |
| this->c_end += bytes_to_copy; |
| } |
| |
| template <typename It1, typename It2> |
| void insert_assume_reserved_and_allow_overflow(It1 from_begin, It2 from_end) { |
| size_t bytes_to_copy = this->byte_size(from_end - from_begin); |
| this->reset_resident_memory(this->c_end + bytes_to_copy); |
| memcpy_small_allow_read_write_overflow15( |
| this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy); |
| this->c_end += bytes_to_copy; |
| } |
| |
| void swap(PODArray& rhs) { |
| DCHECK(this->pad_left == rhs.pad_left && this->pad_right == rhs.pad_right) |
| << ", this.pad_left: " << this->pad_left << ", rhs.pad_left: " << rhs.pad_left |
| << ", this.pad_right: " << this->pad_right << ", rhs.pad_right: " << rhs.pad_right; |
| #ifndef NDEBUG |
| this->unprotect(); |
| rhs.unprotect(); |
| #endif |
| |
| /// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions: |
| /// - The elements of arr1 are stored on stack. |
| /// - The elements of arr2 are stored on heap. |
| auto swap_stack_heap = [this](PODArray& arr1, PODArray& arr2) { |
| size_t stack_size = arr1.size(); |
| size_t stack_allocated = arr1.allocated_bytes(); |
| size_t stack_res_mem_used = arr1.c_res_mem - arr1.c_start; |
| |
| size_t heap_size = arr2.size(); |
| size_t heap_allocated = arr2.allocated_bytes(); |
| size_t heap_res_mem_used = arr2.c_res_mem - arr2.c_start; |
| |
| /// Keep track of the stack content we have to copy. |
| char* stack_c_start = arr1.c_start; |
| |
| /// arr1 takes ownership of the heap memory of arr2. |
| arr1.c_start = arr2.c_start; |
| arr1.c_end_of_storage = arr1.c_start + heap_allocated - arr2.pad_right - arr2.pad_left; |
| arr1.c_end = arr1.c_start + this->byte_size(heap_size); |
| arr1.c_res_mem = arr1.c_start + heap_res_mem_used; |
| |
| /// Allocate stack space for arr2. |
| arr2.alloc(stack_allocated); |
| /// Copy the stack content. |
| memcpy(arr2.c_start, stack_c_start, this->byte_size(stack_size)); |
| arr2.c_end = arr2.c_start + this->byte_size(stack_size); |
| arr2.c_res_mem = arr2.c_start + stack_res_mem_used; |
| }; |
| |
| auto do_move = [this](PODArray& src, PODArray& dest) { |
| if (src.is_allocated_from_stack()) { |
| dest.dealloc(); |
| dest.alloc(src.allocated_bytes()); |
| memcpy(dest.c_start, src.c_start, this->byte_size(src.size())); |
| dest.c_end = dest.c_start + this->byte_size(src.size()); |
| dest.c_res_mem = dest.c_start + (src.c_res_mem - src.c_start); |
| |
| src.c_start = Base::null; |
| src.c_end = Base::null; |
| src.c_end_of_storage = Base::null; |
| src.c_res_mem = Base::null; |
| } else { |
| std::swap(dest.c_start, src.c_start); |
| std::swap(dest.c_end, src.c_end); |
| std::swap(dest.c_end_of_storage, src.c_end_of_storage); |
| std::swap(dest.c_res_mem, src.c_res_mem); |
| } |
| }; |
| |
| if (!this->is_initialized() && !rhs.is_initialized()) { |
| return; |
| } else if (!this->is_initialized() && rhs.is_initialized()) { |
| do_move(rhs, *this); |
| return; |
| } else if (this->is_initialized() && !rhs.is_initialized()) { |
| do_move(*this, rhs); |
| return; |
| } |
| |
| if (this->is_allocated_from_stack() && rhs.is_allocated_from_stack()) { |
| size_t min_size = std::min(this->size(), rhs.size()); |
| size_t max_size = std::max(this->size(), rhs.size()); |
| |
| for (size_t i = 0; i < min_size; ++i) std::swap(this->operator[](i), rhs[i]); |
| |
| if (this->size() == max_size) { |
| for (size_t i = min_size; i < max_size; ++i) rhs[i] = this->operator[](i); |
| } else { |
| for (size_t i = min_size; i < max_size; ++i) this->operator[](i) = rhs[i]; |
| } |
| |
| size_t lhs_size = this->size(); |
| size_t lhs_allocated = this->allocated_bytes(); |
| size_t lhs_res_mem_used = this->c_res_mem - this->c_start; |
| |
| size_t rhs_size = rhs.size(); |
| size_t rhs_allocated = rhs.allocated_bytes(); |
| size_t rhs_res_mem_used = rhs.c_res_mem - rhs.c_start; |
| |
| this->c_end_of_storage = |
| this->c_start + rhs_allocated - Base::pad_right - Base::pad_left; |
| rhs.c_end_of_storage = rhs.c_start + lhs_allocated - Base::pad_right - Base::pad_left; |
| |
| this->c_end = this->c_start + this->byte_size(rhs_size); |
| rhs.c_end = rhs.c_start + this->byte_size(lhs_size); |
| |
| this->c_res_mem = this->c_start + rhs_res_mem_used; |
| rhs.c_res_mem = rhs.c_start + lhs_res_mem_used; |
| } else if (this->is_allocated_from_stack() && !rhs.is_allocated_from_stack()) { |
| swap_stack_heap(*this, rhs); |
| } else if (!this->is_allocated_from_stack() && rhs.is_allocated_from_stack()) { |
| swap_stack_heap(rhs, *this); |
| } else { |
| std::swap(this->c_start, rhs.c_start); |
| std::swap(this->c_end, rhs.c_end); |
| std::swap(this->c_end_of_storage, rhs.c_end_of_storage); |
| std::swap(this->c_res_mem, rhs.c_res_mem); |
| } |
| } |
| |
| /// reset the array capacity |
| /// replace the all elements using the value |
| void assign(size_t n, const T& x) { |
| this->resize(n); |
| std::fill(begin(), end(), x); |
| } |
| |
| template <typename It1, typename It2> |
| void assign(It1 from_begin, It2 from_end) { |
| this->assert_not_intersects(from_begin, from_end); |
| size_t required_capacity = from_end - from_begin; |
| if (required_capacity > this->capacity()) |
| this->reserve(round_up_to_power_of_two_or_zero(required_capacity)); |
| |
| size_t bytes_to_copy = this->byte_size(required_capacity); |
| this->reset_resident_memory(this->c_start + bytes_to_copy); |
| memcpy(this->c_start, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy); |
| this->c_end = this->c_start + bytes_to_copy; |
| } |
| |
| void assign(const PODArray& from) { assign(from.begin(), from.end()); } |
| |
| void erase(const_iterator first, const_iterator last) { |
| auto first_no_const = const_cast<iterator>(first); |
| auto last_no_const = const_cast<iterator>(last); |
| |
| size_t items_to_move = end() - last; |
| |
| while (items_to_move != 0) { |
| *first_no_const = *last_no_const; |
| |
| ++first_no_const; |
| ++last_no_const; |
| |
| --items_to_move; |
| } |
| |
| this->c_end = reinterpret_cast<char*>(first_no_const); |
| } |
| |
| void erase(const_iterator pos) { this->erase(pos, pos + 1); } |
| |
| bool operator==(const PODArray& rhs) const { |
| if (this->size() != rhs.size()) { |
| return false; |
| } |
| |
| const_iterator lhs_it = begin(); |
| const_iterator rhs_it = rhs.begin(); |
| |
| while (lhs_it != end()) { |
| if (*lhs_it != *rhs_it) { |
| return false; |
| } |
| |
| ++lhs_it; |
| ++rhs_it; |
| } |
| |
| return true; |
| } |
| |
| bool operator!=(const PODArray& rhs) const { return !operator==(rhs); } |
| }; |
| |
| template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_, |
| size_t pad_left_> |
| void swap(PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_>& lhs, |
| PODArray<T, initial_bytes, TAllocator, pad_right_, pad_left_>& rhs) { |
| lhs.swap(rhs); |
| } |
| |
| } // namespace doris::vectorized |
| #include "common/compile_check_avoid_end.h" |