blob: 6653af16c6c2834400d77f46ea615e048788c222 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <string>
#include "runtime/mem-pool.h"
#include "runtime/mem-tracker.h"
#include "testutil/gtest-util.h"
#include "util/bit-util.h"
#include "common/names.h"
// Maximum allocation size which exceeds 32-bit.
#define LARGE_ALLOC_SIZE (1LL << 32)
namespace impala {
// Utility class to call private functions on MemPool.
class MemPoolTest {
public:
static bool CheckIntegrity(MemPool* pool, bool current_chunk_empty) {
return pool->CheckIntegrity(current_chunk_empty);
}
static const int INITIAL_CHUNK_SIZE = MemPool::INITIAL_CHUNK_SIZE;
static const int MAX_CHUNK_SIZE = MemPool::MAX_CHUNK_SIZE;
};
const int MemPoolTest::INITIAL_CHUNK_SIZE;
const int MemPoolTest::MAX_CHUNK_SIZE;
TEST(MemPoolTest, Basic) {
MemTracker tracker;
MemPool p(&tracker);
MemPool p2(&tracker);
MemPool p3(&tracker);
uint8_t* ptr = NULL;
for (int iter = 0; iter < 2; ++iter) {
// Allocate 768 25 + 7 byte allocations.
for (int i = 0; i < 768; ++i) {
ptr = p.Allocate(25);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(0, reinterpret_cast<uintptr_t>(ptr) % MemPool::DEFAULT_ALIGNMENT)
<< "Allocation should be aligned";
// Allocate the padding to get 32 bytes
ptr = p.TryAllocateAligned(7, 1);
EXPECT_TRUE(ptr != NULL);
}
// We allocated 24K from 28K of chunks (4, 8, 16)
EXPECT_EQ(24 * 1024, p.total_allocated_bytes());
EXPECT_EQ(28 * 1024, p.GetTotalChunkSizes());
// we're passing on the first two chunks, containing 12K of data; we're left with
// one chunk of 16K containing 12K of data
p2.AcquireData(&p, true);
EXPECT_EQ(12 * 1024, p.total_allocated_bytes());
EXPECT_EQ(16 * 1024, p.GetTotalChunkSizes());
// we allocate 8K, for which there isn't enough room in the current chunk,
// so another one is allocated (32K)
p.Allocate(8 * 1024);
EXPECT_EQ((16 + 32) * 1024, p.GetTotalChunkSizes());
// we allocate 65K, which doesn't fit into the current chunk or the default
// size of the next allocated chunk (64K)
p.Allocate(65 * 1024);
EXPECT_EQ((12 + 8 + 65) * 1024, p.total_allocated_bytes());
EXPECT_EQ((16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
// Clear() resets allocated data, but doesn't remove any chunks
p.Clear();
EXPECT_EQ(0, p.total_allocated_bytes());
EXPECT_EQ((16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
// next allocation reuses existing chunks
p.Allocate(1024);
EXPECT_EQ(1024, p.total_allocated_bytes());
EXPECT_EQ((16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
// ... unless it doesn't fit into any available chunk
p.Allocate(120 * 1024);
EXPECT_EQ((1 + 120) * 1024, p.total_allocated_bytes());
EXPECT_EQ((130 + 16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
// ... Try another chunk that fits into an existing chunk
p.Allocate(33 * 1024);
EXPECT_EQ((1 + 120 + 33) * 1024, p.total_allocated_bytes());
EXPECT_EQ((130 + 16 + 32 + 65) * 1024, p.GetTotalChunkSizes());
// we're releasing 3 chunks, which get added to p2
p2.AcquireData(&p, false);
EXPECT_EQ(0, p.total_allocated_bytes());
EXPECT_EQ(0, p.GetTotalChunkSizes());
p3.AcquireData(&p2, true); // we're keeping the 65k chunk
EXPECT_EQ(33 * 1024, p2.total_allocated_bytes());
EXPECT_EQ(65 * 1024, p2.GetTotalChunkSizes());
p.FreeAll();
p2.FreeAll();
p3.FreeAll();
}
// Test zero byte allocation.
ptr = p.Allocate(0);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(0, p.GetTotalChunkSizes());
}
// Test that we can keep an allocated chunk and a free chunk.
// This case verifies that when chunks are acquired by another memory pool the
// remaining chunks are consistent if there were more than one used chunk and some
// free chunks.
TEST(MemPoolTest, Keep) {
MemTracker tracker;
MemPool p(&tracker);
p.Allocate(4*1024);
p.Allocate(8*1024);
p.Allocate(16*1024);
EXPECT_EQ((4 + 8 + 16) * 1024, p.total_allocated_bytes());
EXPECT_EQ((4 + 8 + 16) * 1024, p.GetTotalChunkSizes());
p.Clear();
EXPECT_EQ(0, p.total_allocated_bytes());
EXPECT_EQ((4 + 8 + 16) * 1024, p.GetTotalChunkSizes());
p.Allocate(1*1024);
p.Allocate(4*1024);
EXPECT_EQ((1 + 4) * 1024, p.total_allocated_bytes());
EXPECT_EQ((4 + 8 + 16) * 1024, p.GetTotalChunkSizes());
MemPool p2(&tracker);
p2.AcquireData(&p, true);
EXPECT_EQ(4 * 1024, p.total_allocated_bytes());
EXPECT_EQ((8 + 16) * 1024, p.GetTotalChunkSizes());
EXPECT_EQ(1 * 1024, p2.total_allocated_bytes());
EXPECT_EQ(4 * 1024, p2.GetTotalChunkSizes());
p.FreeAll();
p2.FreeAll();
}
#ifdef ADDRESS_SANITIZER
/// These tests confirm that ASAN will catch use-after-return errors (even though the
/// memory is still valid, just cached by the MemPool).
TEST(MemPoolTest, UseAfterClear) {
MemTracker tracker;
MemPool p(&tracker);
uint8_t* ptr = p.Allocate(1024);
ptr[10] = 'A';
p.Clear();
ASSERT_DEATH({ptr[10] = 'B';}, "use-after-poison");
p.FreeAll();
}
TEST(MemPoolTest, UseAfterPartialReturn) {
MemTracker tracker;
MemPool p(&tracker);
uint8_t* ptr = p.Allocate(1024);
ptr[10] = 'A';
p.ReturnPartialAllocation(512);
ptr[511] = 'B';
ASSERT_DEATH({ptr[512] = 'X';}, "use-after-poison");
p.FreeAll();
}
#endif
// Tests that we can return partial allocations.
TEST(MemPoolTest, ReturnPartial) {
MemTracker tracker;
MemPool p(&tracker);
uint8_t* ptr = p.Allocate(1024);
EXPECT_EQ(1024, p.total_allocated_bytes());
memset(ptr, 0, 1024);
p.ReturnPartialAllocation(1024);
uint8_t* ptr2 = p.Allocate(1024);
EXPECT_EQ(1024, p.total_allocated_bytes());
EXPECT_TRUE(ptr == ptr2);
p.ReturnPartialAllocation(1016);
ptr2 = p.Allocate(1016);
EXPECT_EQ(1024, p.total_allocated_bytes());
EXPECT_TRUE(ptr2 == ptr + 8);
p.ReturnPartialAllocation(512);
memset(ptr2, 1, 1016 - 512);
uint8_t* ptr3 = p.Allocate(512);
EXPECT_EQ(1024, p.total_allocated_bytes());
EXPECT_TRUE(ptr3 == ptr + 512);
memset(ptr3, 2, 512);
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(0, ptr[i]);
}
for (int i = 8; i < 512; ++i) {
EXPECT_EQ(1, ptr[i]);
}
for (int i = 512; i < 1024; ++i) {
EXPECT_EQ(2, ptr[i]);
}
// Try ReturnPartialAllocations() with 64-bit values.
uint8_t* ptr4 = p.Allocate(LARGE_ALLOC_SIZE + 512);
EXPECT_EQ(1024 + LARGE_ALLOC_SIZE + 512, p.total_allocated_bytes());
memset(ptr4, 3, 512 * 2);
p.ReturnPartialAllocation(LARGE_ALLOC_SIZE);
uint8_t* ptr5 = p.Allocate(512);
EXPECT_TRUE(ptr5 == ptr4 + 512);
memset(ptr5, 4, 512);
for (int i = 0; i < 512; ++i) {
EXPECT_EQ(3, ptr4[i]);
}
for (int i = 512; i < 512 * 2; ++i) {
EXPECT_EQ(4, ptr4[i]);
}
p.FreeAll();
}
TEST(MemPoolTest, Limits) {
MemTracker limit3(4 * MemPoolTest::INITIAL_CHUNK_SIZE);
MemTracker limit1(2 * MemPoolTest::INITIAL_CHUNK_SIZE, "", &limit3);
MemTracker limit2(3 * MemPoolTest::INITIAL_CHUNK_SIZE, "", &limit3);
MemPool* p1 = new MemPool(&limit1);
EXPECT_FALSE(limit1.AnyLimitExceeded(MemLimit::HARD));
MemPool* p2 = new MemPool(&limit2);
EXPECT_FALSE(limit2.AnyLimitExceeded(MemLimit::HARD));
// p1 exceeds a non-shared limit
p1->Allocate(MemPoolTest::INITIAL_CHUNK_SIZE);
EXPECT_FALSE(limit1.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE, limit1.consumption());
EXPECT_FALSE(limit3.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE, limit3.consumption());
p1->Allocate(MemPoolTest::INITIAL_CHUNK_SIZE);
EXPECT_TRUE(limit1.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 3, limit1.consumption());
EXPECT_FALSE(limit3.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 3, limit3.consumption());
// p2 exceeds a shared limit
p2->Allocate(MemPoolTest::INITIAL_CHUNK_SIZE);
EXPECT_FALSE(limit2.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE, limit2.consumption());
EXPECT_FALSE(limit3.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 4, limit3.consumption());
p2->Allocate(1);
EXPECT_FALSE(limit2.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 3, limit2.consumption());
EXPECT_TRUE(limit3.LimitExceeded(MemLimit::HARD));
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 6, limit3.consumption());
// deleting pools reduces consumption
p1->FreeAll();
delete p1;
EXPECT_EQ(0, limit1.consumption());
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 3, limit2.consumption());
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE * 3, limit3.consumption());
// Allocate another chunk
p2->FreeAll();
EXPECT_FALSE(limit2.LimitExceeded(MemLimit::HARD));
uint8_t* result = p2->TryAllocate(MemPoolTest::INITIAL_CHUNK_SIZE);
ASSERT_TRUE(result != NULL);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(p2, false));
// Try To allocate over the limit, this should fail.
result = p2->TryAllocate(MemPoolTest::INITIAL_CHUNK_SIZE * 4);
ASSERT_TRUE(result == NULL);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(p2, false));
// Try To allocate 20 bytes, this should succeed. TryAllocate() should leave the
// pool in a functional state.
result = p2->TryAllocate(20);
ASSERT_TRUE(result != NULL);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(p2, false));
p2->FreeAll();
delete p2;
}
TEST(MemPoolTest, MaxAllocation) {
int64_t int_max_rounded = BitUtil::RoundUp(LARGE_ALLOC_SIZE, 8);
// Allocate a single LARGE_ALLOC_SIZE chunk
MemTracker tracker;
MemPool p1(&tracker);
uint8_t* ptr = p1.Allocate(LARGE_ALLOC_SIZE);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(int_max_rounded, p1.GetTotalChunkSizes());
EXPECT_EQ(int_max_rounded, p1.total_allocated_bytes());
p1.FreeAll();
// Allocate a small chunk (INITIAL_CHUNK_SIZE) followed by an LARGE_ALLOC_SIZE chunk
MemPool p2(&tracker);
p2.Allocate(8);
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE, p2.GetTotalChunkSizes());
EXPECT_EQ(8, p2.total_allocated_bytes());
ptr = p2.Allocate(LARGE_ALLOC_SIZE);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(MemPoolTest::INITIAL_CHUNK_SIZE + int_max_rounded,
p2.GetTotalChunkSizes());
EXPECT_EQ(8LL + int_max_rounded, p2.total_allocated_bytes());
p2.FreeAll();
// Allocate three LARGE_ALLOC_SIZE chunks followed by a small chunk
// followed by another LARGE_ALLOC_SIZE chunk.
MemPool p3(&tracker);
p3.Allocate(LARGE_ALLOC_SIZE);
// Allocates new int_max_rounded chunk
ptr = p3.Allocate(LARGE_ALLOC_SIZE);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(int_max_rounded * 2, p3.GetTotalChunkSizes());
EXPECT_EQ(int_max_rounded * 2, p3.total_allocated_bytes());
// Allocates new int_max_rounded chunk
ptr = p3.Allocate(LARGE_ALLOC_SIZE);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(int_max_rounded * 3, p3.GetTotalChunkSizes());
EXPECT_EQ(int_max_rounded * 3, p3.total_allocated_bytes());
// Allocates new MAX_CHUNK_SIZE chunk
#if !defined (ADDRESS_SANITIZER) || (__clang_major__ >= 3 && __clang_minor__ >= 7)
ptr = p3.Allocate(8);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(int_max_rounded * 3 + MemPoolTest::MAX_CHUNK_SIZE, p3.GetTotalChunkSizes());
EXPECT_EQ(int_max_rounded * 3 + 8, p3.total_allocated_bytes());
// Allocates new int_max_rounded chunk
ptr = p3.Allocate(LARGE_ALLOC_SIZE);
EXPECT_TRUE(ptr != NULL);
EXPECT_EQ(int_max_rounded * 4 + MemPoolTest::MAX_CHUNK_SIZE, p3.GetTotalChunkSizes());
EXPECT_EQ(int_max_rounded * 4 + 8, p3.total_allocated_bytes());
#endif
p3.FreeAll();
}
// Test for IMPALA-2742: chunk size can grow unbounded when calling AcquireData with
// MemPools with keep_current = true.
TEST(MemPoolTest, GrowthWithTransfer) {
MemTracker tracker;
MemPool src(&tracker);
MemPool dst(&tracker);
const int64_t total_to_alloc = 100L * 1024L * 1024L * 1024L;
// Choose size that will straddle chunk boundaries to exercise boundary cases.
const int alloc_size = 100 * 1024 + 12345;
// We should have at most one chunk allocated at a time.
const int max_consumption = MemPoolTest::MAX_CHUNK_SIZE;
for (int i = 0; i < total_to_alloc / alloc_size; ++i) {
uint8_t* mem = src.Allocate(alloc_size);
ASSERT_TRUE(mem != NULL);
dst.AcquireData(&src, true);
dst.FreeAll();
// Check that the chunk size limits memory consumption.
ASSERT_LE(src.GetTotalChunkSizes(), max_consumption);
ASSERT_EQ(0, dst.GetTotalChunkSizes());
ASSERT_LE(tracker.consumption(), max_consumption);
}
dst.AcquireData(&src, false);
dst.FreeAll();
}
// Test that the MemPool overhead is bounded when we make allocations of
// INITIAL_CHUNK_SIZE.
TEST(MemPoolTest, MemoryOverhead) {
MemTracker tracker;
MemPool p(&tracker);
const int alloc_size = MemPoolTest::INITIAL_CHUNK_SIZE;
const int num_allocs = 1000;
int64_t total_allocated = 0;
for (int i = 0; i < num_allocs; ++i) {
uint8_t* mem = p.Allocate(alloc_size);
ASSERT_TRUE(mem != NULL);
total_allocated += alloc_size;
int64_t wasted_memory = p.GetTotalChunkSizes() - total_allocated;
// The initial chunk fits evenly into MAX_CHUNK_SIZE, so should have at most
// one empty chunk at the end.
EXPECT_LE(wasted_memory, MemPoolTest::MAX_CHUNK_SIZE);
// The chunk doubling algorithm should not allocate chunks larger than the total
// amount of memory already allocated.
EXPECT_LE(wasted_memory, total_allocated);
}
p.FreeAll();
}
// Test that the MemPool overhead is bounded when we make alternating large and small
// allocations.
TEST(MemPoolTest, FragmentationOverhead) {
MemTracker tracker;
MemPool p(&tracker);
const int num_allocs = 100;
int64_t total_allocated = 0;
for (int i = 0; i < num_allocs; ++i) {
int alloc_size = i % 2 == 0 ? 1 : MemPoolTest::MAX_CHUNK_SIZE;
uint8_t* mem = p.Allocate(alloc_size);
ASSERT_TRUE(mem != NULL);
total_allocated += alloc_size;
int64_t wasted_memory = p.GetTotalChunkSizes() - total_allocated;
// Fragmentation should not waste more than half of each completed chunk.
EXPECT_LE(wasted_memory, total_allocated + MemPoolTest::MAX_CHUNK_SIZE);
}
p.FreeAll();
}
/// Test interaction between returning a full allocation and transferring chunks.
TEST(MemPoolTest, ReturnAllocationThenTransfer) {
MemTracker tracker;
MemPool src(&tracker);
MemPool dst(&tracker);
const int64_t alloc_size = 1L * 1024 * 1024;
uint8_t* mem = src.Allocate(alloc_size);
ASSERT_TRUE(mem != NULL);
src.ReturnPartialAllocation(alloc_size);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(&src, true));
// IMPALA-3946: this would hit a DCHECK because the current chunk was empty.
dst.AcquireData(&src, true);
ASSERT_EQ(0, dst.GetTotalChunkSizes()); // Src chunk shouldn't be transferred.
ASSERT_TRUE(MemPoolTest::CheckIntegrity(&dst, true));
ASSERT_TRUE(MemPoolTest::CheckIntegrity(&src, true));
src.FreeAll();
dst.FreeAll();
}
/// Test that making a large allocation that doesn't fit in the current chunk after
/// returning a full allocation works correctly.
TEST(MemPoolTest, ReturnAllocationThenLargeAllocation) {
MemTracker tracker;
MemPool src(&tracker);
const int64_t alloc_size = 1L * 1024 * 1024;
uint8_t* mem = src.Allocate(alloc_size);
ASSERT_TRUE(mem != NULL);
src.ReturnPartialAllocation(alloc_size);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(&src, true));
// IMPALA-3946: chunks ended in invalid order because the first chunk was empty.
// This resulted in the the integrity check failing.
mem = src.Allocate(alloc_size + 1);
ASSERT_TRUE(mem != NULL);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(&src, false));
src.FreeAll();
}
/// Test that making a large allocation that fails after returning a full allocation works
/// correctly.
TEST(MemPoolTest, ReturnAllocationThenFailedAllocation) {
const int64_t alloc_size = 1L * 1024 * 1024;
const int64_t mem_limit = alloc_size * 10;
MemTracker tracker(mem_limit);
MemPool src(&tracker);
uint8_t* mem;
// Allocate two chunks then clear them so they are empty.
for (int i = 0; i < 2; ++i) {
mem = src.Allocate(alloc_size);
ASSERT_TRUE(mem != NULL);
}
src.Clear();
// IMPALA-3946: chunk index wasn't reset correctly to before the empty chunks when an
// allocation failed. This resulted in the the integrity check failing.
mem = src.TryAllocate(mem_limit);
ASSERT_TRUE(mem == NULL);
ASSERT_TRUE(MemPoolTest::CheckIntegrity(&src, false));
src.FreeAll();
}
TEST(MemPoolTest, TryAllocateAligned) {
MemTracker tracker(-1);
MemPool pool(&tracker);
int alignment = 1;
int64_t size = 1;
constexpr int NUM_ALLOCATIONS = 100000;
constexpr int MAX_ALLOCATION_SIZE = 113;
for (int i = 0; i < NUM_ALLOCATIONS; ++i) {
uint8_t* ptr = pool.TryAllocateAligned(size, alignment);
ASSERT_TRUE(ptr != NULL) << "size=" << size << " alignment=" << alignment;
ASSERT_EQ(0, reinterpret_cast<uintptr_t>(ptr) % alignment)
<< "size=" << size << " alignment=" << alignment;
alignment = alignment == alignof(std::max_align_t) ? 1 : alignment * 2;
size = (size + 1) % MAX_ALLOCATION_SIZE;
}
pool.FreeAll();
}
}