blob: 84541418cc7fd5b73586bb11c8f1919737bd3704 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <Python.h>
#include <qpid/dispatch/alloc.h>
#include <qpid/dispatch/ctools.h>
#include <qpid/dispatch/log.h>
#include <memory.h>
#include <inttypes.h>
#include <stdio.h>
#include "entity.h"
#include "entity_cache.h"
#include "config.h"
const char *QD_ALLOCATOR_TYPE = "allocator";
typedef struct qd_alloc_type_t qd_alloc_type_t;
typedef struct qd_alloc_item_t qd_alloc_item_t;
typedef struct qd_alloc_chunk_t qd_alloc_chunk_t;
typedef struct qd_alloc_linked_stack_t qd_alloc_linked_stack_t;
struct qd_alloc_type_t {
DEQ_LINKS(qd_alloc_type_t);
qd_alloc_type_desc_t *desc;
};
DEQ_DECLARE(qd_alloc_type_t, qd_alloc_type_list_t);
#define PATTERN_FRONT 0xdeadbeef
#define PATTERN_BACK 0xbabecafe
struct qd_alloc_item_t {
uintmax_t sequence; // uintmax_t ensures proper alignment of following data
#ifdef QD_MEMORY_DEBUG
qd_alloc_type_desc_t *desc;
uint32_t header;
#endif
};
//128 has been chosen because many CPUs arch use an
//adiacent line prefetching optimization that load
//2*cache line bytes in batch
#define CHUNK_SIZE 128/sizeof(void*)
struct qd_alloc_chunk_t {
qd_alloc_chunk_t *prev; //do not use DEQ_LINKS here: field position could affect access cost
qd_alloc_item_t *items[CHUNK_SIZE];
qd_alloc_chunk_t *next;
};
struct qd_alloc_linked_stack_t {
//the base
qd_alloc_chunk_t *top_chunk;
uint32_t top; //qd_alloc_item* top_item = top_chunk->items[top+1] <-> top > 0
uint64_t size;
qd_alloc_chunk_t base_chunk;
};
static inline void init_stack(qd_alloc_linked_stack_t *stack)
{
stack->top_chunk = &stack->base_chunk;
stack->top_chunk->next = NULL;
stack->top = 0;
stack->size = 0;
}
static inline void prev_chunk_stack(qd_alloc_linked_stack_t *const stack)
{
const uint32_t chunk_size = CHUNK_SIZE;
assert(stack->top == 0);
assert(stack->size != 0);
assert(stack->top_chunk != &stack->base_chunk);
qd_alloc_chunk_t *prev = stack->top_chunk->prev;
//TODO(franz): stack->top_chunk could be passed externally and walked its nexts
// to recycle the last chunk.
// Just need to pay attention to null out released_chunk->prev->next
// to make it unreachable from the stack
stack->top_chunk = prev;
stack->top = chunk_size;
}
static inline qd_alloc_item_t *pop_stack(qd_alloc_linked_stack_t *const stack)
{
if (stack->top == 0) {
if (stack->size == 0) {
assert(stack->top_chunk == &stack->base_chunk);
return NULL;
}
prev_chunk_stack(stack);
}
assert(stack->top > 0);
stack->top--;
assert(stack->top < CHUNK_SIZE);
assert(stack->size > 0);
stack->size--;
qd_alloc_item_t *item = stack->top_chunk->items[stack->top];
assert(item != NULL);
return item;
}
static inline void free_stack_chunks(qd_alloc_linked_stack_t *stack)
{
assert(stack->size == 0);
//the assumption here is that next is always correctly set
qd_alloc_chunk_t *chunk = stack->base_chunk.next;
while (chunk != NULL) {
qd_alloc_chunk_t *next = chunk->next;
free(chunk);
chunk = next;
}
}
static inline bool next_chunk_stack(qd_alloc_linked_stack_t *const stack)
{
assert(stack->top == CHUNK_SIZE);
qd_alloc_chunk_t *top = stack->top_chunk->next;
if (top == NULL) {
top = NEW(qd_alloc_chunk_t);
if (top == NULL) {
return false;
}
stack->top_chunk->next = top;
top->prev = stack->top_chunk;
top->next = NULL;
}
assert(top->prev == stack->top_chunk);
assert(stack->top_chunk->next == top);
stack->top_chunk = top;
stack->top = 0;
return true;
}
static inline bool push_stack(qd_alloc_linked_stack_t *stack, qd_alloc_item_t *item)
{
const uint32_t chunk_size = CHUNK_SIZE;
if (stack->top == chunk_size) {
if (!next_chunk_stack(stack)) {
return false;
}
}
assert(stack->top < chunk_size);
stack->size++;
stack->top_chunk->items[stack->top] = item;
stack->top++;
return true;
}
static inline int unordered_move_stack(qd_alloc_linked_stack_t *from, qd_alloc_linked_stack_t *to, uint32_t length)
{
length = from->size < length ? from->size : length;
if (length == 0) {
return 0;
}
uint32_t remaining = length;
const uint32_t chunk_size = CHUNK_SIZE;
while (remaining > 0) {
//top will tell us how much data we could memcpy
uint32_t to_copy = remaining;
if (from->top == 0) {
prev_chunk_stack(from);
}
to_copy = from->top < to_copy ? from->top : to_copy;
if (to->top == chunk_size) {
if (!next_chunk_stack(to)) {
return length - remaining;
}
}
uint32_t remaining_to = chunk_size - to->top;
to_copy = remaining_to < to_copy ? remaining_to : to_copy;
from->top -= to_copy;
memcpy(&to->top_chunk->items[to->top], &from->top_chunk->items[from->top], to_copy * sizeof(qd_alloc_item_t *));
to->top += to_copy;
to->size += to_copy;
from->size -= to_copy;
remaining -= to_copy;
}
return length;
}
struct qd_alloc_pool_t {
DEQ_LINKS(qd_alloc_pool_t);
qd_alloc_linked_stack_t free_list;
};
qd_alloc_config_t qd_alloc_default_config_big = {16, 32, 0};
qd_alloc_config_t qd_alloc_default_config_small = {64, 128, 0};
#define BIG_THRESHOLD 2000
static sys_mutex_t *init_lock = 0;
static qd_alloc_type_list_t type_list;
static char *debug_dump = 0;
static void qd_alloc_init(qd_alloc_type_desc_t *desc)
{
sys_mutex_lock(init_lock);
if (!desc->global_pool) {
desc->total_size = desc->type_size;
if (desc->additional_size)
desc->total_size += *desc->additional_size;
if (desc->config == 0)
desc->config = desc->total_size > BIG_THRESHOLD ?
&qd_alloc_default_config_big : &qd_alloc_default_config_small;
assert (desc->config->local_free_list_max >= desc->config->transfer_batch_size);
desc->global_pool = NEW(qd_alloc_pool_t);
DEQ_ITEM_INIT(desc->global_pool);
init_stack(&desc->global_pool->free_list);
desc->lock = sys_mutex();
DEQ_INIT(desc->tpool_list);
#if QD_MEMORY_STATS
desc->stats = NEW(qd_alloc_stats_t);
ZERO(desc->stats);
#endif
qd_alloc_type_t *type_item = NEW(qd_alloc_type_t);
DEQ_ITEM_INIT(type_item);
type_item->desc = desc;
DEQ_INSERT_TAIL(type_list, type_item);
desc->header = PATTERN_FRONT;
desc->trailer = PATTERN_BACK;
qd_entity_cache_add(QD_ALLOCATOR_TYPE, type_item);
}
sys_mutex_unlock(init_lock);
}
/* coverity[+alloc] */
void *qd_alloc(qd_alloc_type_desc_t *desc, qd_alloc_pool_t **tpool)
{
int idx;
//
// If the descriptor is not initialized, set it up now.
//
if (desc->header != PATTERN_FRONT)
qd_alloc_init(desc);
//
// If this is the thread's first pass through here, allocate the
// thread-local pool for this type.
//
if (*tpool == 0) {
NEW_CACHE_ALIGNED(qd_alloc_pool_t, *tpool);
DEQ_ITEM_INIT(*tpool);
init_stack(&(*tpool)->free_list);
sys_mutex_lock(desc->lock);
DEQ_INSERT_TAIL(desc->tpool_list, *tpool);
sys_mutex_unlock(desc->lock);
}
qd_alloc_pool_t *pool = *tpool;
//
// Fast case: If there's an item on the local free list, take it off the
// list and return it. Since everything we've touched is thread-local,
// there is no need to acquire a lock.
//
qd_alloc_item_t *item = pop_stack(&pool->free_list);
if (item) {
#ifdef QD_MEMORY_DEBUG
item->desc = desc;
item->header = PATTERN_FRONT;
*((uint32_t*) ((char*) &item[1] + desc->total_size))= PATTERN_BACK;
QD_MEMORY_FILL(&item[1], QD_MEMORY_INIT, desc->total_size);
#endif
return &item[1];
}
//
// The local free list is empty, we need to either rebalance a batch
// of items from the global list or go to the heap to get new memory.
//
sys_mutex_lock(desc->lock);
if (DEQ_SIZE(desc->global_pool->free_list) >= desc->config->transfer_batch_size) {
//
// Rebalance a full batch from the global free list to the thread list.
//
const int moved = unordered_move_stack(&desc->global_pool->free_list, &pool->free_list,
desc->config->transfer_batch_size);
assert(moved == desc->config->transfer_batch_size);
#if QD_MEMORY_STATS
desc->stats->batches_rebalanced_to_threads++;
desc->stats->held_by_threads += moved;
#endif
} else {
//
// Allocate a full batch from the heap and put it on the thread list.
//
//TODO(franz):
// - would be better to allocate in batches == transfer_batch_size
// and put a small (== sizeof(transfer_batch_size)) ref_count to help the final free
// - could be beneficial directly to delink a chunk?
for (idx = 0; idx < desc->config->transfer_batch_size; idx++) {
size_t size = sizeof(qd_alloc_item_t) + desc->total_size
#ifdef QD_MEMORY_DEBUG
+ sizeof(uint32_t)
#endif
;
ALLOC_CACHE_ALIGNED(size, item);
if (item == 0)
break;
if (!push_stack(&pool->free_list, item)) {
free(item);
break;
}
item->sequence = 0;
#if QD_MEMORY_STATS
desc->stats->held_by_threads++;
desc->stats->total_alloc_from_heap++;
#endif
}
}
sys_mutex_unlock(desc->lock);
item = pop_stack(&pool->free_list);
if (item) {
#ifdef QD_MEMORY_DEBUG
item->desc = desc;
item->header = PATTERN_FRONT;
*((uint32_t*) ((char*) &item[1] + desc->total_size))= PATTERN_BACK;
QD_MEMORY_FILL(&item[1], QD_MEMORY_INIT, desc->total_size);
#endif
return &item[1];
}
return 0;
}
/* coverity[+free : arg-2] */
void qd_dealloc(qd_alloc_type_desc_t *desc, qd_alloc_pool_t **tpool, char *p)
{
if (!p) return;
qd_alloc_item_t *item = ((qd_alloc_item_t*) p) - 1;
#ifdef QD_MEMORY_DEBUG
assert (desc->header == PATTERN_FRONT);
assert (desc->trailer == PATTERN_BACK);
assert (item->header == PATTERN_FRONT);
assert (*((uint32_t*) (p + desc->total_size)) == PATTERN_BACK);
assert (item->desc == desc); // Check for double-free
item->desc = 0;
QD_MEMORY_FILL(p, QD_MEMORY_FREE, desc->total_size);
#endif
//
// If this is the thread's first pass through here, allocate the
// thread-local pool for this type.
//
if (*tpool == 0) {
*tpool = NEW(qd_alloc_pool_t);
DEQ_ITEM_INIT(*tpool);
init_stack(&(*tpool)->free_list);
sys_mutex_lock(desc->lock);
DEQ_INSERT_TAIL(desc->tpool_list, *tpool);
sys_mutex_unlock(desc->lock);
}
qd_alloc_pool_t *pool = *tpool;
item->sequence++;
if (!push_stack(&pool->free_list, item)) {
free(item);
}
if (DEQ_SIZE(pool->free_list) < desc->config->local_free_list_max)
return;
//
// We've exceeded the maximum size of the local free list. A batch must be
// rebalanced back to the global list.
//
sys_mutex_lock(desc->lock);
const int moved = unordered_move_stack(&pool->free_list, &desc->global_pool->free_list,
desc->config->transfer_batch_size);
assert(moved == desc->config->transfer_batch_size);
#if QD_MEMORY_STATS
desc->stats->batches_rebalanced_to_global++;
desc->stats->held_by_threads -= moved;
#endif
//
// If there's a global_free_list size limit, remove items until the limit is
// not exceeded.
//
if (desc->config->global_free_list_max != 0) {
while (DEQ_SIZE(desc->global_pool->free_list) > desc->config->global_free_list_max) {
item = pop_stack(&desc->global_pool->free_list);
free(item);
#if QD_MEMORY_STATS
desc->stats->total_free_to_heap++;
#endif
}
}
sys_mutex_unlock(desc->lock);
}
uint32_t qd_alloc_sequence(void *p)
{
if (!p)
return 0;
qd_alloc_item_t *item = ((qd_alloc_item_t*) p) - 1;
return item->sequence;
}
void qd_alloc_initialize(void)
{
init_lock = sys_mutex();
DEQ_INIT(type_list);
}
void qd_alloc_finalize(void)
{
//
// Note that the logging facility is already finalized by the time this is called.
// We will dump debugging information into debug_dump if specified.
//
// The assumption coming into this finalizer is that all allocations have been
// released. Any non-released objects shall be flagged.
//
//
// Note: By the time we get here, the server threads have been joined and there is
// only the main thread remaining. There is therefore no reason to be
// concerned about locking.
//
qd_alloc_item_t *item;
qd_alloc_type_t *type_item = DEQ_HEAD(type_list);
FILE *dump_file = 0;
if (debug_dump) {
dump_file = fopen(debug_dump, "w");
free(debug_dump);
}
while (type_item) {
qd_entity_cache_remove(QD_ALLOCATOR_TYPE, type_item);
qd_alloc_type_desc_t *desc = type_item->desc;
//
// Reclaim the items on the global free pool
//
item = pop_stack(&desc->global_pool->free_list);
while (item) {
free(item);
#if QD_MEMORY_STATS
desc->stats->total_free_to_heap++;
#endif
item = pop_stack(&desc->global_pool->free_list);
}
free_stack_chunks(&desc->global_pool->free_list);
free(desc->global_pool);
desc->global_pool = 0;
//
// Reclaim the items on thread pools
//
qd_alloc_pool_t *tpool = DEQ_HEAD(desc->tpool_list);
while (tpool) {
item = pop_stack(&tpool->free_list);
while (item) {
free(item);
#if QD_MEMORY_STATS
desc->stats->total_free_to_heap++;
#endif
item = pop_stack(&tpool->free_list);
}
DEQ_REMOVE_HEAD(desc->tpool_list);
free_stack_chunks(&tpool->free_list);
free(tpool);
tpool = DEQ_HEAD(desc->tpool_list);
}
//
// Check the stats for lost items
//
#if QD_MEMORY_STATS
if (dump_file && desc->stats->total_free_to_heap < desc->stats->total_alloc_from_heap)
fprintf(dump_file,
"alloc.c: Items of type '%s' remain allocated at shutdown: %"PRId64"\n",
desc->type_name,
desc->stats->total_alloc_from_heap - desc->stats->total_free_to_heap);
#endif
//
// Reclaim the descriptor components
//
#if QD_MEMORY_STATS
free(desc->stats);
#endif
sys_mutex_free(desc->lock);
desc->lock = 0;
desc->trailer = 0;
DEQ_REMOVE_HEAD(type_list);
free(type_item);
type_item = DEQ_HEAD(type_list);
}
sys_mutex_free(init_lock);
if (dump_file) fclose(dump_file);
}
qd_error_t qd_entity_refresh_allocator(qd_entity_t* entity, void *impl) {
qd_alloc_type_t *alloc_type = (qd_alloc_type_t*) impl;
if (qd_entity_set_string(entity, "typeName", alloc_type->desc->type_name) == 0 &&
qd_entity_set_long(entity, "typeSize", alloc_type->desc->total_size) == 0 &&
qd_entity_set_long(entity, "transferBatchSize", alloc_type->desc->config->transfer_batch_size) == 0 &&
qd_entity_set_long(entity, "localFreeListMax", alloc_type->desc->config->local_free_list_max) == 0 &&
qd_entity_set_long(entity, "globalFreeListMax", alloc_type->desc->config->global_free_list_max) == 0
#if QD_MEMORY_STATS
&& qd_entity_set_long(entity, "totalAllocFromHeap", alloc_type->desc->stats->total_alloc_from_heap) == 0 &&
qd_entity_set_long(entity, "totalFreeToHeap", alloc_type->desc->stats->total_free_to_heap) == 0 &&
qd_entity_set_long(entity, "heldByThreads", alloc_type->desc->stats->held_by_threads) == 0 &&
qd_entity_set_long(entity, "batchesRebalancedToThreads", alloc_type->desc->stats->batches_rebalanced_to_threads) == 0 &&
qd_entity_set_long(entity, "batchesRebalancedToGlobal", alloc_type->desc->stats->batches_rebalanced_to_global) == 0
#endif
)
return QD_ERROR_NONE;
return qd_error_code();
}
void qd_alloc_debug_dump(const char *file) {
debug_dump = file ? strdup(file) : 0;
}