src/parquet/util/mem-pool.h - parquet-cpp - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 // Initially imported from Apache Impala on 2016-02-23, and has been modified
 // since for parquet-cpp

 #ifndef PARQUET_UTIL_MEM_POOL_H
 #define PARQUET_UTIL_MEM_POOL_H

 #include <stdio.h>
 #include <algorithm>
 #include <cstdint>
 #include <vector>
 #include <string>

 #include "parquet/util/logging.h"
 #include "parquet/util/bit-util.h"
 #include "parquet/util/mem-allocator.h"

 namespace parquet {

 /// A MemPool maintains a list of memory chunks from which it allocates memory
 /// in response to Allocate() calls;
 /// Chunks stay around for the lifetime of the mempool or until they are passed on to
 /// another mempool.
 //
 /// An Allocate() call will attempt to allocate memory from the chunk that was most
 /// recently added; if that chunk doesn't have enough memory to
 /// satisfy the allocation request, the free chunks are searched for one that is
 /// big enough otherwise a new chunk is added to the list.
 /// The current_chunk_idx_ always points to the last chunk with allocated memory.
 /// In order to keep allocation overhead low, chunk sizes double with each new one
 /// added, until they hit a maximum size.
 //
 ///     Example:
 ///     MemPool* p = new MemPool();
 ///     for (int i = 0; i < 1024; ++i) {
 /// returns 8-byte aligned memory (effectively 24 bytes):
 ///       .. = p->Allocate(17);
 ///     }
 /// at this point, 17K have been handed out in response to Allocate() calls and
 /// 28K of chunks have been allocated (chunk sizes: 4K, 8K, 16K)
 /// We track total and peak allocated bytes. At this point they would be the same:
 /// 28k bytes.  A call to Clear will return the allocated memory so
 /// total_allocate_bytes_
 /// becomes 0 while peak_allocate_bytes_ remains at 28k.
 ///     p->Clear();
 /// the entire 1st chunk is returned:
 ///     .. = p->Allocate(4 * 1024);
 /// 4K of the 2nd chunk are returned:
 ///     .. = p->Allocate(4 * 1024);
 /// a new 20K chunk is created
 ///     .. = p->Allocate(20 * 1024);
 //
 ///      MemPool* p2 = new MemPool();
 /// the new mempool receives all chunks containing data from p
 ///      p2->AcquireData(p, false);
 /// At this point p.total_allocated_bytes_ would be 0 while p.peak_allocated_bytes_
 /// remains unchanged.
 /// The one remaining (empty) chunk is released:
 ///    delete p;

 class MemPool {
  public:
   explicit MemPool(MemoryAllocator* allocator = default_allocator());

   /// Frees all chunks of memory and subtracts the total allocated bytes
   /// from the registered limits.
   ~MemPool();

   /// Allocates 8-byte aligned section of memory of 'size' bytes at the end
   /// of the the current chunk. Creates a new chunk if there aren't any chunks
   /// with enough capacity.
   uint8_t* Allocate(int size) { return Allocate<false>(size); }

   /// Returns 'byte_size' to the current chunk back to the mem pool. This can
   /// only be used to return either all or part of the previous allocation returned
   /// by Allocate().
   void ReturnPartialAllocation(int byte_size) {
     DCHECK_GE(byte_size, 0);
     DCHECK(current_chunk_idx_ != -1);
     ChunkInfo& info = chunks_[current_chunk_idx_];
     DCHECK_GE(info.allocated_bytes, byte_size);
     info.allocated_bytes -= byte_size;
     total_allocated_bytes_ -= byte_size;
   }

   /// Makes all allocated chunks available for re-use, but doesn't delete any chunks.
   void Clear();

   /// Deletes all allocated chunks. FreeAll() or AcquireData() must be called for
   /// each mem pool
   void FreeAll();

   /// Absorb all chunks that hold data from src. If keep_current is true, let src hold on
   /// to its last allocated chunk that contains data.
   /// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid.
   void AcquireData(MemPool* src, bool keep_current);

   std::string DebugString();

   int64_t total_allocated_bytes() const { return total_allocated_bytes_; }
   int64_t peak_allocated_bytes() const { return peak_allocated_bytes_; }
   int64_t total_reserved_bytes() const { return total_reserved_bytes_; }

   /// Return sum of chunk_sizes_.
   int64_t GetTotalChunkSizes() const;

  private:
   friend class MemPoolTest;
   static const int INITIAL_CHUNK_SIZE = 4 * 1024;

   /// The maximum size of chunk that should be allocated. Allocations larger than this
   /// size will get their own individual chunk.
   static const int MAX_CHUNK_SIZE = 1024 * 1024;

   struct ChunkInfo {
     uint8_t* data;  // Owned by the ChunkInfo.
     int64_t size;   // in bytes

     /// bytes allocated via Allocate() in this chunk
     int64_t allocated_bytes;

     explicit ChunkInfo(int64_t size, uint8_t* buf);

     ChunkInfo() : data(NULL), size(0), allocated_bytes(0) {}
   };

   /// chunk from which we served the last Allocate() call;
   /// always points to the last chunk that contains allocated data;
   /// chunks 0..current_chunk_idx_ are guaranteed to contain data
   /// (chunks_[i].allocated_bytes > 0 for i: 0..current_chunk_idx_);
   /// -1 if no chunks present
   int current_chunk_idx_;

   /// The size of the next chunk to allocate.
   int64_t next_chunk_size_;

   /// sum of allocated_bytes_
   int64_t total_allocated_bytes_;

   /// Maximum number of bytes allocated from this pool at one time.
   int64_t peak_allocated_bytes_;

   /// sum of all bytes allocated in chunks_
   int64_t total_reserved_bytes_;

   std::vector<ChunkInfo> chunks_;

   MemoryAllocator* allocator_;

   /// Find or allocated a chunk with at least min_size spare capacity and update
   /// current_chunk_idx_. Also updates chunks_, chunk_sizes_ and allocated_bytes_
   /// if a new chunk needs to be created.
   bool FindChunk(int64_t min_size);

   /// Check integrity of the supporting data structures; always returns true but DCHECKs
   /// all invariants.
   /// If 'current_chunk_empty' is false, checks that the current chunk contains data.
   bool CheckIntegrity(bool current_chunk_empty);

   /// Return offset to unoccpied space in current chunk.
   int GetFreeOffset() const {
     if (current_chunk_idx_ == -1) return 0;
     return chunks_[current_chunk_idx_].allocated_bytes;
   }

   template <bool CHECK_LIMIT_FIRST>
   uint8_t* Allocate(int size) {
     if (size == 0) return NULL;

     int64_t num_bytes = BitUtil::RoundUp(size, 8);
     if (current_chunk_idx_ == -1 ||
         num_bytes + chunks_[current_chunk_idx_].allocated_bytes >
             chunks_[current_chunk_idx_].size) {
       // If we couldn't allocate a new chunk, return NULL.
       if (UNLIKELY(!FindChunk(num_bytes))) return NULL;
     }
     ChunkInfo& info = chunks_[current_chunk_idx_];
     uint8_t* result = info.data + info.allocated_bytes;
     DCHECK_LE(info.allocated_bytes + num_bytes, info.size);
     info.allocated_bytes += num_bytes;
     total_allocated_bytes_ += num_bytes;
     DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1);
     peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
     return result;
   }
 };

 }  // namespace parquet

 #endif  // PARQUET_UTIL_MEM_POOL_H
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	// Initially imported from Apache Impala on 2016-02-23, and has been modified
	// since for parquet-cpp

	#ifndef PARQUET_UTIL_MEM_POOL_H
	#define PARQUET_UTIL_MEM_POOL_H

	#include <stdio.h>
	#include <algorithm>
	#include <cstdint>
	#include <vector>
	#include <string>

	#include "parquet/util/logging.h"
	#include "parquet/util/bit-util.h"
	#include "parquet/util/mem-allocator.h"

	namespace parquet {

	/// A MemPool maintains a list of memory chunks from which it allocates memory
	/// in response to Allocate() calls;
	/// Chunks stay around for the lifetime of the mempool or until they are passed on to
	/// another mempool.
	//
	/// An Allocate() call will attempt to allocate memory from the chunk that was most
	/// recently added; if that chunk doesn't have enough memory to
	/// satisfy the allocation request, the free chunks are searched for one that is
	/// big enough otherwise a new chunk is added to the list.
	/// The current_chunk_idx_ always points to the last chunk with allocated memory.
	/// In order to keep allocation overhead low, chunk sizes double with each new one
	/// added, until they hit a maximum size.
	//
	/// Example:
	/// MemPool* p = new MemPool();
	/// for (int i = 0; i < 1024; ++i) {
	/// returns 8-byte aligned memory (effectively 24 bytes):
	/// .. = p->Allocate(17);
	/// }
	/// at this point, 17K have been handed out in response to Allocate() calls and
	/// 28K of chunks have been allocated (chunk sizes: 4K, 8K, 16K)
	/// We track total and peak allocated bytes. At this point they would be the same:
	/// 28k bytes. A call to Clear will return the allocated memory so
	/// total_allocate_bytes_
	/// becomes 0 while peak_allocate_bytes_ remains at 28k.
	/// p->Clear();
	/// the entire 1st chunk is returned:
	/// .. = p->Allocate(4 * 1024);
	/// 4K of the 2nd chunk are returned:
	/// .. = p->Allocate(4 * 1024);
	/// a new 20K chunk is created
	/// .. = p->Allocate(20 * 1024);
	//
	/// MemPool* p2 = new MemPool();
	/// the new mempool receives all chunks containing data from p
	/// p2->AcquireData(p, false);
	/// At this point p.total_allocated_bytes_ would be 0 while p.peak_allocated_bytes_
	/// remains unchanged.
	/// The one remaining (empty) chunk is released:
	/// delete p;

	class MemPool {
	public:
	explicit MemPool(MemoryAllocator* allocator = default_allocator());

	/// Frees all chunks of memory and subtracts the total allocated bytes
	/// from the registered limits.
	~MemPool();

	/// Allocates 8-byte aligned section of memory of 'size' bytes at the end
	/// of the the current chunk. Creates a new chunk if there aren't any chunks
	/// with enough capacity.
	uint8_t* Allocate(int size) { return Allocate<false>(size); }

	/// Returns 'byte_size' to the current chunk back to the mem pool. This can
	/// only be used to return either all or part of the previous allocation returned
	/// by Allocate().
	void ReturnPartialAllocation(int byte_size) {
	DCHECK_GE(byte_size, 0);
	DCHECK(current_chunk_idx_ != -1);
	ChunkInfo& info = chunks_[current_chunk_idx_];
	DCHECK_GE(info.allocated_bytes, byte_size);
	info.allocated_bytes -= byte_size;
	total_allocated_bytes_ -= byte_size;
	}

	/// Makes all allocated chunks available for re-use, but doesn't delete any chunks.
	void Clear();

	/// Deletes all allocated chunks. FreeAll() or AcquireData() must be called for
	/// each mem pool
	void FreeAll();

	/// Absorb all chunks that hold data from src. If keep_current is true, let src hold on
	/// to its last allocated chunk that contains data.
	/// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid.
	void AcquireData(MemPool* src, bool keep_current);

	std::string DebugString();

	int64_t total_allocated_bytes() const { return total_allocated_bytes_; }
	int64_t peak_allocated_bytes() const { return peak_allocated_bytes_; }
	int64_t total_reserved_bytes() const { return total_reserved_bytes_; }

	/// Return sum of chunk_sizes_.
	int64_t GetTotalChunkSizes() const;

	private:
	friend class MemPoolTest;
	static const int INITIAL_CHUNK_SIZE = 4 * 1024;

	/// The maximum size of chunk that should be allocated. Allocations larger than this
	/// size will get their own individual chunk.
	static const int MAX_CHUNK_SIZE = 1024 * 1024;

	struct ChunkInfo {
	uint8_t* data; // Owned by the ChunkInfo.
	int64_t size; // in bytes

	/// bytes allocated via Allocate() in this chunk
	int64_t allocated_bytes;

	explicit ChunkInfo(int64_t size, uint8_t* buf);

	ChunkInfo() : data(NULL), size(0), allocated_bytes(0) {}
	};

	/// chunk from which we served the last Allocate() call;
	/// always points to the last chunk that contains allocated data;
	/// chunks 0..current_chunk_idx_ are guaranteed to contain data
	/// (chunks_[i].allocated_bytes > 0 for i: 0..current_chunk_idx_);
	/// -1 if no chunks present
	int current_chunk_idx_;

	/// The size of the next chunk to allocate.
	int64_t next_chunk_size_;

	/// sum of allocated_bytes_
	int64_t total_allocated_bytes_;

	/// Maximum number of bytes allocated from this pool at one time.
	int64_t peak_allocated_bytes_;

	/// sum of all bytes allocated in chunks_
	int64_t total_reserved_bytes_;

	std::vector<ChunkInfo> chunks_;

	MemoryAllocator* allocator_;

	/// Find or allocated a chunk with at least min_size spare capacity and update
	/// current_chunk_idx_. Also updates chunks_, chunk_sizes_ and allocated_bytes_
	/// if a new chunk needs to be created.
	bool FindChunk(int64_t min_size);

	/// Check integrity of the supporting data structures; always returns true but DCHECKs
	/// all invariants.
	/// If 'current_chunk_empty' is false, checks that the current chunk contains data.
	bool CheckIntegrity(bool current_chunk_empty);

	/// Return offset to unoccpied space in current chunk.
	int GetFreeOffset() const {
	if (current_chunk_idx_ == -1) return 0;
	return chunks_[current_chunk_idx_].allocated_bytes;
	}

	template <bool CHECK_LIMIT_FIRST>
	uint8_t* Allocate(int size) {
	if (size == 0) return NULL;

	int64_t num_bytes = BitUtil::RoundUp(size, 8);
	if (current_chunk_idx_ == -1 \|\|
	num_bytes + chunks_[current_chunk_idx_].allocated_bytes >
	chunks_[current_chunk_idx_].size) {
	// If we couldn't allocate a new chunk, return NULL.
	if (UNLIKELY(!FindChunk(num_bytes))) return NULL;
	}
	ChunkInfo& info = chunks_[current_chunk_idx_];
	uint8_t* result = info.data + info.allocated_bytes;
	DCHECK_LE(info.allocated_bytes + num_bytes, info.size);
	info.allocated_bytes += num_bytes;
	total_allocated_bytes_ += num_bytes;
	DCHECK_LE(current_chunk_idx_, static_cast<int>(chunks_.size()) - 1);
	peak_allocated_bytes_ = std::max(total_allocated_bytes_, peak_allocated_bytes_);
	return result;
	}
	};

	} // namespace parquet

	#endif // PARQUET_UTIL_MEM_POOL_H