blob: b64ee29ed98ecc97b6faae1b960c22d7b8fb572c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Initially imported from Apache Impala on 2016-02-23, and has been modified
// since for parquet-cpp
#ifndef PARQUET_UTIL_MEM_POOL_H
#define PARQUET_UTIL_MEM_POOL_H
#include <stdio.h>
#include <algorithm>
#include <cstdint>
#include <vector>
#include <string>
#include "parquet/util/logging.h"
#include "parquet/util/bit-util.h"
#include "parquet/util/mem-allocator.h"
namespace parquet {
/// A MemPool maintains a list of memory chunks from which it allocates memory
/// in response to Allocate() calls;
/// Chunks stay around for the lifetime of the mempool or until they are passed on to
/// another mempool.
//
/// An Allocate() call will attempt to allocate memory from the chunk that was most
/// recently added; if that chunk doesn't have enough memory to
/// satisfy the allocation request, the free chunks are searched for one that is
/// big enough otherwise a new chunk is added to the list.
/// The current_chunk_idx_ always points to the last chunk with allocated memory.
/// In order to keep allocation overhead low, chunk sizes double with each new one
/// added, until they hit a maximum size.
//
/// Example:
/// MemPool* p = new MemPool();
/// for (int i = 0; i < 1024; ++i) {
/// returns 8-byte aligned memory (effectively 24 bytes):
/// .. = p->Allocate(17);
/// }
/// at this point, 17K have been handed out in response to Allocate() calls and
/// 28K of chunks have been allocated (chunk sizes: 4K, 8K, 16K)
/// We track total and peak allocated bytes. At this point they would be the same:
/// 28k bytes. A call to Clear will return the allocated memory so
/// total_allocate_bytes_
/// becomes 0 while peak_allocate_bytes_ remains at 28k.
/// p->Clear();
/// the entire 1st chunk is returned:
/// .. = p->Allocate(4 * 1024);
/// 4K of the 2nd chunk are returned:
/// .. = p->Allocate(4 * 1024);
/// a new 20K chunk is created
/// .. = p->Allocate(20 * 1024);
//
/// MemPool* p2 = new MemPool();
/// the new mempool receives all chunks containing data from p
/// p2->AcquireData(p, false);
/// At this point p.total_allocated_bytes_ would be 0 while p.peak_allocated_bytes_
/// remains unchanged.
/// The one remaining (empty) chunk is released:
/// delete p;
class MemPool {
public:
explicit MemPool(MemoryAllocator* allocator = default_allocator());
/// Frees all chunks of memory and subtracts the total allocated bytes
/// from the registered limits.
~MemPool();
/// Allocates 8-byte aligned section of memory of 'size' bytes at the end
/// of the the current chunk. Creates a new chunk if there aren't any chunks
/// with enough capacity.
uint8_t* Allocate(int size) { return Allocate<false>(size); }
/// Returns 'byte_size' to the current chunk back to the mem pool. This can
/// only be used to return either all or part of the previous allocation returned
/// by Allocate().
void ReturnPartialAllocation(int byte_size) {
DCHECK_GE(byte_size, 0);
DCHECK(current_chunk_idx_ != -1);
ChunkInfo& info = chunks_[current_chunk_idx_];
DCHECK_GE(info.allocated_bytes, byte_size);
info.allocated_bytes -= byte_size;
total_allocated_bytes_ -= byte_size;
}
/// Makes all allocated chunks available for re-use, but doesn't delete any chunks.
void Clear();
/// Deletes all allocated chunks. FreeAll() or AcquireData() must be called for
/// each mem pool
void FreeAll();
/// Absorb all chunks that hold data from src. If keep_current is true, let src hold on
/// to its last allocated chunk that contains data.
/// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid.
void AcquireData(MemPool* src, bool keep_current);
std::string DebugString();
int64_t total_allocated_bytes() const { return total_allocated_bytes_; }
int64_t peak_allocated_bytes() const { return peak_allocated_bytes_; }
int64_t total_reserved_bytes() const { return total_reserved_bytes_; }
/// Return sum of chunk_sizes_.
int64_t GetTotalChunkSizes() const;
private:
friend class MemPoolTest;
static const int INITIAL_CHUNK_SIZE = 4 * 1024;
/// The maximum size of chunk that should be allocated. Allocations larger than this
/// size will get their own individual chunk.
static const int MAX_CHUNK_SIZE = 1024 * 1024;
struct ChunkInfo {
uint8_t* data; // Owned by the ChunkInfo.
int64_t size; // in bytes
/// bytes allocated via Allocate() in this chunk
int64_t allocated_bytes;
explicit ChunkInfo(int64_t size, uint8_t* buf);
ChunkInfo() : data(NULL), size(0), allocated_bytes(0) {}
};
/// chunk from which we served the last Allocate() call;
/// always points to the last chunk that contains allocated data;
/// chunks 0..current_chunk_idx_ are guaranteed to contain data
/// (chunks_[i].allocated_bytes > 0 for i: 0..current_chunk_idx_);
/// -1 if no chunks present
int current_chunk_idx_;
/// The size of the next chunk to allocate.
int64_t next_chunk_size_;
/// sum of allocated_bytes_
int64_t total_allocated_bytes_;
/// Maximum number of bytes allocated from this pool at one time.
int64_t peak_allocated_bytes_;
/// sum of all bytes allocated in chunks_
int64_t total_reserved_bytes_;
std::vector<ChunkInfo> chunks_;
MemoryAllocator* allocator_;
/// Find or allocated a chunk with at least min_size spare capacity and update
/// current_chunk_idx_. Also updates chunks_, chunk_sizes_ and allocated_bytes_
/// if a new chunk needs to be created.
bool FindChunk(int64_t min_size);
/// Check integrity of the supporting data structures; always returns true but DCHECKs
/// all invariants.
/// If 'current_chunk_empty' is false, checks that the current chunk contains data.
bool CheckIntegrity(bool current_chunk_empty);
/// Return offset to unoccpied space in current chunk.
int GetFreeOffset() const {
if (current_chunk_idx_ == -1) return 0;
return chunks_[current_chunk_idx_].allocated_bytes;
}
template <bool CHECK_LIMIT_FIRST>
uint8_t* Allocate(int size) {
if (size == 0) return NULL;
int64_t num_bytes = BitUtil::RoundUp(size, 8);
if (current_chunk_idx_ == -1 ||
num_bytes + chunks_[current_chunk_idx_].allocated_bytes >
chunks_[current_chunk_idx_].size) {
// If we couldn't allocate a new chunk, return NULL.
if (UNLIKELY(!FindChunk(num_bytes))) return NULL;
}
ChunkInfo& info = chunks_[current_chunk_idx_];
uint8_t* result = info.data + info.allocated_bytes;
DCHECK_LE(info.allocated_bytes + num_bytes, info.size);
info.allocated_bytes += num_bytes;
total_allocated_bytes_ += num_bytes;
DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1);
peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
return result;
}
};
} // namespace parquet
#endif // PARQUET_UTIL_MEM_POOL_H