blob: fb7c657cd6708947b619f916e8498294caf12226 [file] [log] [blame]
/** @file
A brief file description
@section license License
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/***********************************************************************
Reclaimable freelist Implementation
***********************************************************************/
#include "ink_config.h"
#include <assert.h>
#include <memory.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/mman.h>
#include "ink_thread.h"
#include "ink_atomic.h"
#include "ink_queue.h"
#include "ink_memory.h"
#include "ink_error.h"
#include "ink_assert.h"
#include "ink_resource.h"
#include "ink_stack_trace.h"
#include "ink_queue_ext.h"
#if TS_USE_RECLAIMABLE_FREELIST
#define CEIL(x,y) (((x) + (y) - 1L) / (y))
#define ROUND(x,l) (((x) + ((l) - 1L)) & ~((l) - 1L))
#define ITEM_MAGIC 0xFF
#define MAX_NUM_FREELIST 1024
/*
* Configurable Variables
*/
float cfg_reclaim_factor = 0.3;
int64_t cfg_max_overage = 10;
int64_t cfg_enable_reclaim = 0;
/*
* Debug filter bit mask:
* bit 0: reclaim in ink_freelist_new
* bit 1: reclaim in ink_freelist_free
* bit 2: fetch memory from thread cache
*/
int64_t cfg_debug_filter;
static uint32_t nr_freelist;
static uint64_t total_mem_in_byte;
static __thread InkThreadCache *ThreadCaches[MAX_NUM_FREELIST];
#define MAX_CHUNK_BYTE_SIZE (ats_pagesize() << 8)
/*
* For debug
*/
#define show_info(tag, f, pCache) \
__show_info(stdout, __FILE__, __LINE__, tag, f, pCache)
#define error_info(tag, f, pCache) \
__show_info(stderr, __FILE__, __LINE__, tag, f, pCache)
static inline void
__show_info(FILE *fp, const char *file, int line,
const char *tag, InkFreeList *f, InkThreadCache *pCache)
{
fprintf(fp, "[%lx:%02u][%s:%05d][%s] %6.2fM t:%-8uf:%-4u m:%-4u avg:%-6.1f"
" M:%-4u csbase:%-4u csize:%-4u tsize:%-6u cbsize:%u\n",
(long)ink_thread_self(), f->thread_cache_idx, file, line, tag,
((double)total_mem_in_byte/1024/1024),
pCache->nr_total,
pCache->nr_free,
pCache->nr_min,
pCache->nr_average,
pCache->nr_malloc,
f->chunk_size_base,
f->chunk_size,
f->type_size,
f->chunk_byte_size);
}
static inline void
memory_alignment_init(InkFreeList *f, uint32_t type_size, uint32_t chunk_size,
uint32_t alignment)
{
uint32_t chunk_byte_size, user_alignment, user_type_size;
f->chunk_size_base = chunk_size;
user_alignment = alignment;
user_type_size = type_size;
chunk_size = 1;
#ifdef DEBUG
/*
* enlarge type_size to hold a item_magic
*/
type_size += sizeof(unsigned char);
#endif
/*
* limit the size of each chunk and resize alignment.
* 1) when size of chunk > MAX_CHUNK_BYTE_SIZE:
* alignment = page_size;
* 2) when size of chunk <= MAX_CHUNK_BYTE_SIZE:
* alignment = (2^N * page_size),
* alignment should not larger than MAX_CHUNK_BYTE_SIZE
*/
alignment = ats_pagesize();
chunk_byte_size = ROUND(type_size + sizeof(InkChunkInfo), ats_pagesize());
if (chunk_byte_size <= MAX_CHUNK_BYTE_SIZE) {
chunk_byte_size = ROUND(type_size * f->chunk_size_base
+ sizeof(InkChunkInfo), ats_pagesize());
if (chunk_byte_size > MAX_CHUNK_BYTE_SIZE) {
chunk_size = (MAX_CHUNK_BYTE_SIZE - sizeof(InkChunkInfo)) / type_size;
chunk_byte_size = ROUND(type_size * chunk_size + sizeof(InkChunkInfo),
ats_pagesize());
} else
chunk_size = (chunk_byte_size - sizeof(InkChunkInfo)) / type_size;
if (chunk_size > 1) {
/* make alignment to be (2^N * page_size),
* but not larger than MAX_CHUNK_BYTE_SIZE */
while (alignment < chunk_byte_size)
alignment <<= 1;
}
}
if (user_alignment > alignment) {
alignment = ats_pagesize();
while (alignment < user_alignment)
alignment <<= 1;
}
ink_release_assert(alignment <= MAX_CHUNK_BYTE_SIZE);
f->alignment = alignment;
f->type_size = user_type_size;
f->chunk_size = chunk_size;
f->chunk_addr_mask = ~((uintptr_t)(alignment - 1));
f->chunk_byte_size = chunk_byte_size;
return;
}
/*
* mmap_align allocates _size_ bytes and returns a pointer to the
* allocated memory, which address will be a multiple of _alignment_.
* 1)the _size_ must be a multiple of page_size;
* 2)the _alignment_ must be a power of page_size;
*/
static void*
mmap_align(size_t size, size_t alignment) {
uintptr_t ptr;
size_t adjust, extra = 0;
ink_assert(size % ats_pagesize() == 0);
/* ask for extra memory if alignment > page_size */
if (alignment > ats_pagesize()) {
extra = alignment - ats_pagesize();
}
void* result = mmap(NULL, size + extra,
PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON,
-1, 0);
if (result == MAP_FAILED) {
ink_stack_trace_dump();
const char *err_str = "Out of memory, or the process's maximum number of "
"mappings would have been exceeded(if so, you can "
"enlarge 'vm.max_map_count' by sysctl in linux).";
ink_fatal(1, "Failed to mmap %zu bytes, %s", size,
(errno == ENOMEM) ? err_str : strerror(errno));
}
/* adjust the return memory so it is aligned */
adjust = 0;
ptr = (uintptr_t)result;
if ((ptr & (alignment - 1)) != 0) {
adjust = alignment - (ptr & (alignment - 1));
}
/* return the unused memory to the system */
if (adjust > 0) {
munmap((void*)ptr, adjust);
}
if (adjust < extra) {
munmap((void*)(ptr + adjust + size), extra - adjust);
}
ptr += adjust;
ink_assert((ptr & (alignment -1)) == 0);
return (void*)ptr;
}
#ifdef DEBUG
static inline uint32_t
get_chunk_item_magic_idx(InkFreeList *f, void *item, InkChunkInfo **ppChunk,
bool do_check = false)
{
uint32_t idx;
uintptr_t chunk_addr;
if (f->chunk_size > 1)
chunk_addr = (uintptr_t)item & f->chunk_addr_mask;
else
chunk_addr = (uintptr_t)item;
if (*ppChunk == NULL)
*ppChunk = (InkChunkInfo *)(chunk_addr + f->type_size * f->chunk_size);
idx = ((uintptr_t)item - chunk_addr) / f->type_size;
if (do_check && (idx >= f->chunk_size
|| ((uintptr_t)item - chunk_addr) % f->type_size)) {
ink_stack_trace_dump();
ink_fatal(1, "Invalid address:%p, chunk_addr:%p, type_size:%d, chunk_size:%u, idx:%u",
item, (void *)chunk_addr, f->type_size, f->chunk_size, idx);
}
return idx;
}
static inline void
set_chunk_item_magic(InkFreeList *f, InkChunkInfo *pChunk, void *item)
{
uint32_t idx;
idx = get_chunk_item_magic_idx(f, item, &pChunk);
ink_release_assert(pChunk->item_magic[idx] == 0);
pChunk->item_magic[idx] = ITEM_MAGIC;
}
static inline void
clear_chunk_item_magic(InkFreeList *f, InkChunkInfo *pChunk, void *item)
{
uint32_t idx;
idx = get_chunk_item_magic_idx(f, item, &pChunk, true);
ink_release_assert(pChunk->item_magic[idx] == ITEM_MAGIC);
pChunk->item_magic[idx] = 0;
}
#else
#define set_chunk_item_magic(a, b, c)
#define clear_chunk_item_magic(a, b, c)
#endif
static inline InkChunkInfo *
get_chunk_info_addr(InkFreeList *f, void *item)
{
uintptr_t chunk_addr;
if (f->chunk_size > 1)
chunk_addr = (uintptr_t)item & f->chunk_addr_mask;
else
chunk_addr = (uintptr_t)item;
return (InkChunkInfo *)(chunk_addr + f->type_size * f->chunk_size);
}
static inline InkChunkInfo *
ink_chunk_create(InkFreeList *f, InkThreadCache *pCache)
{
uint32_t i;
uint32_t type_size, chunk_size;
void *chunk_addr, *curr, *next;
InkChunkInfo *pChunk;
chunk_addr = mmap_align(f->chunk_byte_size, f->alignment);
pChunk = (InkChunkInfo *)((char *)chunk_addr + f->type_size * f->chunk_size);
type_size = f->type_size;
chunk_size = f->chunk_size;
pChunk->tid = ink_thread_self();
pChunk->head = chunk_addr;
pChunk->type_size = type_size;
pChunk->chunk_size = chunk_size;
pChunk->length = f->chunk_byte_size;
pChunk->allocated = 0;
pChunk->pThreadCache = pCache;
pChunk->link = Link<InkChunkInfo>();
#ifdef DEBUG
/*
* The content will be initialized to zero when
* calls mmap() with MAP_ANONYMOUS flag on linux
* platform.
*/
#if !defined(linux)
memset(pChunk->item_magic, 0, chunk_size * sizeof(unsigned char));
#endif
#endif
curr = pChunk->head;
pChunk->inner_free_list = curr;
for (i = 1; i < chunk_size; i++) {
next = (void *)((char *)curr + type_size);
*(void **)curr = next;
curr = next;
}
*(void **)curr = NULL;
ink_atomic_increment(&f->count, chunk_size);
ink_atomic_increment(&total_mem_in_byte, f->chunk_byte_size);
pCache->free_chunk_list.push(pChunk);
pCache->nr_free_chunks++;
return pChunk;
}
static inline void
ink_chunk_delete(InkFreeList *f, InkThreadCache *pCache, InkChunkInfo *pChunk)
{
void *chunk_addr = pChunk->head;
ink_assert(pChunk->allocated == 0);
pCache->free_chunk_list.remove(pChunk);
pCache->nr_free_chunks--;
if (unlikely(munmap(chunk_addr, f->chunk_byte_size))) {
ink_stack_trace_dump();
ink_fatal(1, "Failed to munmap %u bytes, %s", f->chunk_byte_size, strerror(errno));
}
ink_atomic_increment((int *)&f->count, -f->chunk_size);
/*
* TODO: I had used ink_atomic_increment() here, but it would
* lead to incorrect value in linux OS, I don't know why:
* ink_atomic_decrement((int64_t *)&total_mem_in_byte, -f->chunk_byte_size);
*
* So I create a new wrap, ink_atomic_decrement(), in ink_atomic.h,
* it works well. But we should create the same wrap for other OS.
*/
ink_atomic_decrement(&total_mem_in_byte, f->chunk_byte_size);
}
static inline void *
malloc_whole_chunk(InkFreeList *f, InkThreadCache *pCache, InkChunkInfo *pChunk)
{
uint32_t i;
uint32_t type_size, chunk_size;
void *next, *item;
ink_assert(pChunk->allocated == 0);
type_size = f->type_size;
chunk_size = f->chunk_size;
item = pChunk->head;
for (i = 1; i < chunk_size; i++) {
next = (void *)((char *)item + i * type_size);
ink_atomic_increment(&pCache->nr_free, 1);
ink_atomiclist_push(&pCache->outer_free_list, next);
}
pChunk->allocated += chunk_size;
pChunk->inner_free_list = NULL;
pCache->nr_total += chunk_size;
ink_atomic_increment(&f->allocated, chunk_size);
return item;
}
static inline void *
malloc_from_chunk(InkFreeList *f, InkThreadCache *pCache, InkChunkInfo *pChunk)
{
void *item;
if ((item = pChunk->inner_free_list)) {
pChunk->inner_free_list = *(void **)item;
pChunk->allocated++;
pCache->nr_total++;
ink_atomic_increment(&f->allocated, 1);
}
return item;
}
static inline void
free_to_chunk(InkFreeList *f, InkThreadCache *pCache, void *item)
{
InkChunkInfo *pChunk;
pChunk = get_chunk_info_addr(f, item);
pChunk->allocated--;
ink_atomic_increment((int *)&f->allocated, -1);
pCache->nr_total--;
*(void **)item = pChunk->inner_free_list;
pChunk->inner_free_list = item;
if (pChunk->allocated == 0)
ink_chunk_delete(f, pCache, pChunk);
}
static inline void *
malloc_from_cache(InkFreeList *f, InkThreadCache *pCache, uint32_t nr)
{
void *item;
InkChunkInfo *pChunk;
pChunk = pCache->free_chunk_list.head;
while (pChunk) {
while ((item = malloc_from_chunk(f, pCache, pChunk))) {
if (--nr == 0)
return item;
ink_atomic_increment(&pCache->nr_free, 1);
ink_atomiclist_push(&pCache->outer_free_list, item);
}
pChunk = pChunk->link.next;
}
pChunk = ink_chunk_create(f, pCache);
if (nr == f->chunk_size)
return malloc_whole_chunk(f, pCache, pChunk);
while ((item = malloc_from_chunk(f, pCache, pChunk))) {
if (--nr == 0)
return item;
ink_atomic_increment(&pCache->nr_free, 1);
ink_atomiclist_push(&pCache->outer_free_list, item);
}
ink_assert(0);
return NULL;
}
static inline void
free_to_cache(InkFreeList *f, InkThreadCache *pCache, void *item, uint32_t nr)
{
uint32_t n = nr;
if (item)
free_to_chunk(f, pCache, item);
while (n && (item = ink_atomiclist_pop(&pCache->outer_free_list))) {
free_to_chunk(f, pCache, item);
n--;
}
ink_atomic_increment((int *)&pCache->nr_free, -(nr - n));
}
static inline void
refresh_average_info(InkThreadCache *pCache)
{
uint32_t nr_free;
float nr_average;
nr_free = pCache->nr_free;
nr_average = pCache->nr_average;
if (pCache->status == 1 || nr_free < pCache->nr_min)
pCache->nr_min = nr_free;
pCache->nr_average = (nr_average * (1 - cfg_reclaim_factor)) +
(nr_free * cfg_reclaim_factor);
}
static inline bool
need_to_reclaim(InkFreeList *f, InkThreadCache *pCache)
{
if (!cfg_enable_reclaim)
return false;
if(pCache->nr_free >= pCache->nr_average &&
pCache->nr_total > f->chunk_size_base) {
if (pCache->nr_overage++ >= cfg_max_overage) {
pCache->nr_overage = 0;
return true;
}
return false;
}
pCache->nr_overage = 0;
return false;
}
void
reclaimable_freelist_init(InkFreeList **fl, const char *name,
uint32_t type_size, uint32_t chunk_size,
uint32_t alignment)
{
InkFreeList *f;
ink_freelist_list *fll = freelists;
/* quick test for power of 2 */
ink_assert(!(alignment & (alignment - 1)));
/* NOTE: it's safe to operate on this global list because
* ink_freelist_init() is only called from single-threaded
* initialization code. */
while (fll) {
/* Reuse InkFreeList if it has the same type_size. */
if (fll->fl->type_size == type_size) {
fll->fl->refcnt++;
*fl = fll->fl;
return;
}
fll = fll->next;
}
f = (InkFreeList *)ats_memalign(alignment, sizeof(InkFreeList));
fll = (ink_freelist_list *)ats_memalign(alignment, sizeof(ink_freelist_list));
fll->fl = f;
fll->next = freelists;
freelists = fll;
f->name = name;
f->count = 0;
f->allocated = 0;
f->allocated_base = 0;
f->count_base = 0;
memory_alignment_init(f, type_size, chunk_size, alignment);
f->refcnt = 1;
f->pThreadCache = NULL;
f->nr_thread_cache = 0;
f->thread_cache_idx = nr_freelist++;
ink_assert(f->thread_cache_idx < MAX_NUM_FREELIST);
ink_mutex_init(&f->lock, "InkFreeList Lock");
*fl = f;
}
void *
reclaimable_freelist_new(InkFreeList *f)
{
void *ptr;
uint32_t i, nr;
uint32_t old_value;
uint32_t num_to_move;
InkChunkInfo *pChunk = NULL;
InkThreadCache *pCache, *pNextCache;
/* no thread cache, create it */
if (unlikely((pCache = ThreadCaches[f->thread_cache_idx]) == NULL)) {
pCache = (InkThreadCache *) ats_calloc(1, sizeof(InkThreadCache));
pCache->f = f;
pCache->free_chunk_list = DLL<InkChunkInfo>();
/* this lock will only be accessed when initializing
* thread cache, so it won't damage performance */
ink_mutex_acquire(&f->lock);
ink_atomiclist_init(&pCache->outer_free_list, f->name, 0);
nr = CEIL(f->chunk_size_base, f->chunk_size);
for (i = 0; i < nr; i++) {
pChunk = ink_chunk_create(f, pCache);
}
pCache->nr_malloc = 1;
ThreadCaches[f->thread_cache_idx] = pCache;
if (f->pThreadCache) {
/* we will loop pCache.next without lock, following
* statement's sequence is important for us. */
pCache->next = f->pThreadCache;
pCache->prev = f->pThreadCache->prev;
pCache->next->prev = pCache;
pCache->prev->next = pCache;
} else {
pCache->next = pCache;
pCache->prev = pCache;
}
f->pThreadCache = pCache;
f->nr_thread_cache++;
ink_mutex_release(&f->lock);
ptr = malloc_whole_chunk(f, pCache, pChunk);
set_chunk_item_magic(f, pChunk, ptr);
return ptr;
}
pCache->status = 0;
/* priority to fetch memory from outer_free_list */
if ((ptr = ink_atomiclist_pop(&pCache->outer_free_list))) {
old_value = ink_atomic_increment((int *)&pCache->nr_free, -1);
ink_release_assert(old_value > 0);
ink_atomic_increment(&pCache->nr_malloc, 1);
set_chunk_item_magic(f, NULL, ptr);
return ptr;
}
/* try to steal memory from other thread's outer_free_list */
pNextCache = pCache->next;
while (pNextCache != pCache) {
if ((ptr = ink_atomiclist_pop(&pNextCache->outer_free_list))) {
old_value = ink_atomic_increment((int *)&pNextCache->nr_free, -1);
ink_release_assert(old_value > 0);
ink_atomic_increment(&pNextCache->nr_malloc, 1);
set_chunk_item_magic(f, NULL, ptr);
return ptr;
}
pNextCache = pNextCache->next;
}
/* try to reclaim memory from all caches in the same thread */
for (i = 0; i < nr_freelist; i++) {
if ((pNextCache = ThreadCaches[i]) == NULL)
continue;
if (need_to_reclaim(pNextCache->f, pNextCache)) {
if (cfg_debug_filter & 0x1)
show_info("F", pNextCache->f, pNextCache);
num_to_move = MIN(pNextCache->nr_average, pNextCache->nr_free);
free_to_cache(pNextCache->f, pNextCache, NULL, num_to_move);
if (cfg_debug_filter & 0x1)
show_info("-", pNextCache->f, pNextCache);
refresh_average_info(pNextCache);
}
}
/* finally, fetch from thread local cache */
if (cfg_debug_filter & 0x2)
show_info("M", f, pCache);
ptr = malloc_from_cache(f, pCache, f->chunk_size);
if (cfg_debug_filter & 0x2)
show_info("+", f, pCache);
refresh_average_info(pCache);
ink_atomic_increment(&pCache->nr_malloc, 1);
set_chunk_item_magic(f, NULL, ptr);
return ptr;
}
void
reclaimable_freelist_free(InkFreeList *f, void *item)
{
InkChunkInfo *pChunk;
InkThreadCache *pCache;
if (item == NULL)
return;
pChunk = get_chunk_info_addr(f, item);
clear_chunk_item_magic(f, pChunk, item);
pCache = pChunk->pThreadCache;
ink_atomic_increment((int *)&pCache->nr_malloc, -1);
if (ink_atomic_cas((int *)&pCache->status, 0, 1))
refresh_average_info(pCache);
ink_atomic_increment(&pCache->nr_free, 1);
ink_atomiclist_push(&pCache->outer_free_list, item);
}
#endif