| /* frame_malloc.c -*-C-*- |
| * |
| ************************************************************************* |
| * |
| * @copyright |
| * Copyright (C) 2009-2013, Intel Corporation |
| * All rights reserved. |
| * |
| * @copyright |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of Intel Corporation nor the names of its |
| * contributors may be used to endorse or promote products derived |
| * from this software without specific prior written permission. |
| * |
| * @copyright |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY |
| * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| **************************************************************************/ |
| |
| #include "frame_malloc.h" |
| #include "bug.h" |
| #include "local_state.h" |
| #include "cilk_malloc.h" |
| |
| #ifndef __VXWORKS__ |
| #include <memory.h> |
| #endif |
| |
| /* #define USE_MMAP 1 */ |
| #if USE_MMAP |
| #define __USE_MISC 1 |
| #include <sys/mman.h> |
| #include <errno.h> |
| #endif |
| |
| // Define to fill the stack frame header with the fill character when pushing |
| // it on a free list. Note that this should be #ifdef'd out when checked in! |
| |
| #ifdef _DEBUG |
| #define HEADER_FILL_CHAR 0xbf |
| #endif |
| |
| // HEADER_FILL_CHAR should not be defined when checked in, so put out a warning |
| // message if this is a release build |
| |
| #if defined(NDEBUG) && defined (HEADER_FILL_CHAR) |
| #pragma message ("Warning: HEADER_FILL_CHAR defined for a release build") |
| #endif |
| |
| static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size); |
| |
| #ifndef _WIN32 |
| |
| const unsigned short __cilkrts_bucket_sizes[FRAME_MALLOC_NBUCKETS] = |
| { |
| 64, 128, 256, 512, 1024, 2048 |
| }; |
| |
| #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) __cilkrts_bucket_sizes[bucket] |
| |
| /* threshold above which we use slow malloc */ |
| #define FRAME_MALLOC_MAX_SIZE 2048 |
| |
| #else // _WIN32 |
| |
| /* Note that this must match the implementation of framesz_to_bucket in |
| * asmilator/layout.ml! */ |
| #define FRAME_MALLOC_BUCKET_TO_SIZE(bucket) ((size_t)(64 << (bucket))) |
| |
| /* threshold above which we use slow malloc */ |
| #define FRAME_MALLOC_MAX_SIZE \ |
| FRAME_MALLOC_BUCKET_TO_SIZE(FRAME_MALLOC_NBUCKETS - 1) |
| |
| #endif // _WIN32 |
| |
| /* utility procedures */ |
| static void push(struct free_list **b, struct free_list *p) |
| { |
| #ifdef HEADER_FILL_CHAR |
| memset (p, HEADER_FILL_CHAR, FRAME_MALLOC_BUCKET_TO_SIZE(0)); |
| #endif |
| /* cons! onto free list */ |
| p->cdr = *b; |
| *b = p; |
| } |
| |
| static struct free_list *pop(struct free_list **b) |
| { |
| struct free_list *p = *b; |
| if (p) |
| *b = p->cdr; |
| return p; |
| } |
| |
| /************************************************************* |
| global allocator: |
| *************************************************************/ |
| /* request slightly less than 2^K from the OS, which after malloc |
| overhead and alignment should end up filling each VM page almost |
| completely. 128 is a guess of the total malloc overhead and cache |
| line alignment */ |
| #define FRAME_MALLOC_CHUNK (32 * 1024 - 128) |
| |
| /** Implements linked list of frames */ |
| struct pool_cons { |
| char *p; /**< This element of the list */ |
| struct pool_cons *cdr; /**< Remainder of the list */ |
| }; |
| |
| static void extend_global_pool(global_state_t *g) |
| { |
| /* FIXME: memalign to a cache line? */ |
| struct pool_cons *c = (struct pool_cons *)__cilkrts_malloc(sizeof(*c)); |
| g->frame_malloc.pool_begin = |
| (char *)__cilkrts_malloc((size_t)FRAME_MALLOC_CHUNK); |
| g->frame_malloc.pool_end = |
| g->frame_malloc.pool_begin + FRAME_MALLOC_CHUNK; |
| g->frame_malloc.allocated_from_os += FRAME_MALLOC_CHUNK; |
| c->p = g->frame_malloc.pool_begin; |
| c->cdr = g->frame_malloc.pool_list; |
| g->frame_malloc.pool_list = c; |
| } |
| |
| /* the size is already canonicalized at this point */ |
| static struct free_list *global_alloc(global_state_t *g, int bucket) |
| { |
| struct free_list *mem; |
| size_t size; |
| |
| CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS); |
| size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| g->frame_malloc.allocated_from_global_pool += size; |
| |
| if (!(mem = pop(&g->frame_malloc.global_free_list[bucket]))) { |
| |
| CILK_ASSERT(g->frame_malloc.pool_begin <= g->frame_malloc.pool_end); |
| if (g->frame_malloc.pool_begin + size > g->frame_malloc.pool_end) { |
| /* We waste the fragment of pool. */ |
| g->frame_malloc.wasted += |
| g->frame_malloc.pool_end - g->frame_malloc.pool_begin; |
| extend_global_pool(g); |
| } |
| mem = (struct free_list *)g->frame_malloc.pool_begin; |
| g->frame_malloc.pool_begin += size; |
| } |
| |
| return mem; |
| } |
| |
| static void global_free(global_state_t *g, void *mem, int bucket) |
| { |
| size_t size; |
| |
| CILK_ASSERT(bucket < FRAME_MALLOC_NBUCKETS); |
| size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| g->frame_malloc.allocated_from_global_pool -= size; |
| |
| push(&g->frame_malloc.global_free_list[bucket], mem); |
| } |
| |
| void __cilkrts_frame_malloc_global_init(global_state_t *g) |
| { |
| int i; |
| |
| __cilkrts_mutex_init(&g->frame_malloc.lock); |
| g->frame_malloc.check_for_leaks = 1; |
| g->frame_malloc.pool_list = 0; |
| g->frame_malloc.pool_begin = 0; |
| g->frame_malloc.pool_end = 0; |
| g->frame_malloc.batch_size = 8000; |
| g->frame_malloc.potential_limit = 4 * g->frame_malloc.batch_size; |
| g->frame_malloc.allocated_from_os = 0; |
| g->frame_malloc.allocated_from_global_pool = 0; |
| g->frame_malloc.wasted = 0; |
| for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) |
| g->frame_malloc.global_free_list[i] = 0; |
| } |
| |
| // Counts how many bytes are in the global free list. |
| static size_t count_memory_in_global_list(global_state_t *g) |
| { |
| |
| // Count the memory remaining in the global free list. |
| size_t size_remaining_in_global_list = 0; |
| int i; |
| for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) { |
| struct free_list *p; |
| size_t size_in_bucket = 0; |
| p = g->frame_malloc.global_free_list[i]; |
| |
| while (p) { |
| size_in_bucket += FRAME_MALLOC_BUCKET_TO_SIZE(i); |
| p = p->cdr; |
| } |
| size_remaining_in_global_list += size_in_bucket; |
| } |
| return size_remaining_in_global_list; |
| } |
| |
| |
| void __cilkrts_frame_malloc_global_cleanup(global_state_t *g) |
| { |
| struct pool_cons *c; |
| |
| if (g->frame_malloc.check_for_leaks) { |
| size_t memory_in_global_list = count_memory_in_global_list(g); |
| // TBD: This check is weak. Short of memory corruption, |
| // I don't see how we have more memory in the free list |
| // than allocated from the os. |
| // Ideally, we should count the memory in the global free list |
| // and check that we have it all. But I believe the runtime |
| // itself also uses some memory, which is not being tracked. |
| if (memory_in_global_list > g->frame_malloc.allocated_from_os) { |
| __cilkrts_bug("\nError. The Cilk runtime data structures may have been corrupted.\n"); |
| } |
| } |
| |
| while ((c = g->frame_malloc.pool_list)) { |
| g->frame_malloc.pool_list = c->cdr; |
| __cilkrts_free(c->p); |
| __cilkrts_free(c); |
| } |
| |
| __cilkrts_mutex_destroy(0, &g->frame_malloc.lock); |
| |
| // Check that all the memory moved from the global pool into |
| // workers has been returned to the global pool. |
| if (g->frame_malloc.check_for_leaks |
| && (g->frame_malloc.allocated_from_global_pool != 0)) |
| { |
| __cilkrts_bug("\n" |
| "---------------------------" "\n" |
| " MEMORY LEAK DETECTED!!! " "\n" |
| "---------------------------" "\n" |
| "\n" |
| ); |
| } |
| } |
| |
| /************************************************************* |
| per-worker allocator |
| *************************************************************/ |
| /* allocate a batch of frames of size SIZE from the global pool and |
| store them in the worker's free list */ |
| static void allocate_batch(__cilkrts_worker *w, int bucket, size_t size) |
| { |
| global_state_t *g = w->g; |
| |
| __cilkrts_mutex_lock(w, &g->frame_malloc.lock); { |
| #if USE_MMAP |
| char *p = mmap(0, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); |
| if (p == MAP_FAILED) |
| __cilkrts_bug("mmap failed %d", errno); |
| assert(size < 4096); |
| assert(p != MAP_FAILED); |
| mprotect(p, 4096, PROT_NONE); |
| mprotect(p + 8192, 4096, PROT_NONE); |
| w->l->bucket_potential[bucket] += size; |
| push(&w->l->free_list[bucket], (struct free_list *)(p + 8192 - size)); |
| #else |
| size_t bytes_allocated = 0; |
| do { |
| w->l->bucket_potential[bucket] += size; |
| bytes_allocated += size; |
| push(&w->l->free_list[bucket], global_alloc(g, bucket)); |
| } while (bytes_allocated < g->frame_malloc.batch_size); |
| #endif |
| } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock); |
| |
| } |
| |
| static void gc_bucket(__cilkrts_worker *w, int bucket, size_t size) |
| { |
| struct free_list *p, *q; |
| global_state_t *g = w->g; |
| size_t pot = w->l->bucket_potential[bucket]; |
| size_t newpot; |
| |
| /* Keep up to POT/2 elements in the free list. The cost of |
| counting up to POT/2 is amortized against POT. */ |
| newpot = 0; |
| for (newpot = 0, p = w->l->free_list[bucket]; p && 2 * newpot < pot; |
| p = p->cdr, newpot += size) |
| ; |
| w->l->bucket_potential[bucket] = newpot; |
| |
| if (p) { |
| /* free the rest of the list. The cost of grabbing the lock |
| is amortized against POT/2; the cost of traversing the rest |
| of the list is amortized against the free operation that |
| puts the element on the list. */ |
| __cilkrts_mutex_lock(w, &g->frame_malloc.lock); { |
| while ((q = pop(&p->cdr))) |
| #if USE_MMAP |
| munmap((char *)q + size - 8192, 12288); |
| #else |
| global_free(g, q, bucket); |
| #endif |
| } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock); |
| } |
| } |
| |
| // Free all the memory in this bucket for the specified worker, |
| // returning it to the global pool's free list. |
| static void move_bucket_to_global_free_list(__cilkrts_worker *w, |
| int bucket) |
| { |
| struct free_list *p, *q; |
| global_state_t *g = w->g; |
| p = w->l->free_list[bucket]; |
| |
| if (p) { |
| __cilkrts_mutex_lock(w, &g->frame_malloc.lock); { |
| while ((q = pop(&p))) { |
| #if USE_MMAP |
| size_t size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| munmap((char *)q + size - 8192, 12288); |
| #else |
| global_free(g, q, bucket); |
| #endif |
| } |
| } __cilkrts_mutex_unlock(w, &g->frame_malloc.lock); |
| } |
| |
| // I'm not sure this does anything useful now, since |
| // the worker is about to be destroyed. But why not? |
| w->l->bucket_potential[bucket] = 0; |
| } |
| |
| static int bucket_of_size(size_t size) |
| { |
| int i; |
| |
| for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) |
| if (size <= FRAME_MALLOC_BUCKET_TO_SIZE(i)) |
| return i; |
| |
| CILK_ASSERT(0 /* can't happen */); |
| return -1; |
| } |
| |
| size_t __cilkrts_frame_malloc_roundup(size_t size) |
| { |
| if (size > FRAME_MALLOC_MAX_SIZE) { |
| /* nothing, leave it alone */ |
| } else { |
| int bucket = bucket_of_size(size); |
| size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| } |
| return size; |
| } |
| |
| size_t __cilkrts_size_of_bucket(int bucket) |
| { |
| CILK_ASSERT(bucket >= 0 && bucket < FRAME_MALLOC_NBUCKETS); |
| return FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| } |
| |
| void *__cilkrts_frame_malloc(__cilkrts_worker *w, size_t size) |
| { |
| int bucket; |
| void *mem; |
| |
| /* if too large, or if no worker, fall back to __cilkrts_malloc() */ |
| if (!w || size > FRAME_MALLOC_MAX_SIZE) { |
| NOTE_INTERVAL(w, INTERVAL_FRAME_ALLOC_LARGE); |
| return __cilkrts_malloc(size); |
| } |
| |
| START_INTERVAL(w, INTERVAL_FRAME_ALLOC); { |
| bucket = bucket_of_size(size); |
| size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| |
| while (!(mem = pop(&w->l->free_list[bucket]))) { |
| /* get a batch of frames from the global pool */ |
| START_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL) { |
| allocate_batch(w, bucket, size); |
| } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC_GLOBAL); |
| } |
| } STOP_INTERVAL(w, INTERVAL_FRAME_ALLOC); |
| |
| return mem; |
| } |
| |
| void __cilkrts_frame_free(__cilkrts_worker *w, void *p0, size_t size) |
| { |
| int bucket; |
| struct free_list *p = (struct free_list *)p0; |
| |
| /* if too large, or if no worker, fall back to __cilkrts_free() */ |
| if (!w || size > FRAME_MALLOC_MAX_SIZE) { |
| NOTE_INTERVAL(w, INTERVAL_FRAME_FREE_LARGE); |
| __cilkrts_free(p); |
| return; |
| } |
| |
| #if CILK_LIB_DEBUG |
| *(volatile long *)w; |
| #endif |
| |
| START_INTERVAL(w, INTERVAL_FRAME_FREE); { |
| bucket = bucket_of_size(size); |
| size = FRAME_MALLOC_BUCKET_TO_SIZE(bucket); |
| w->l->bucket_potential[bucket] += size; |
| push(&w->l->free_list[bucket], p); |
| if (w->l->bucket_potential[bucket] > |
| w->g->frame_malloc.potential_limit) { |
| START_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL) { |
| gc_bucket(w, bucket, size); |
| } STOP_INTERVAL(w, INTERVAL_FRAME_FREE_GLOBAL); |
| } |
| } STOP_INTERVAL(w, INTERVAL_FRAME_FREE); |
| } |
| |
| void __cilkrts_frame_malloc_per_worker_init(__cilkrts_worker *w) |
| { |
| int i; |
| local_state *l = w->l; |
| |
| for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) { |
| l->free_list[i] = 0; |
| l->bucket_potential[i] = 0; |
| } |
| } |
| |
| void __cilkrts_frame_malloc_per_worker_cleanup(__cilkrts_worker *w) |
| { |
| int i; |
| // Move memory to the global pool. This operation |
| // ensures the memory does not become unreachable / leak |
| // when the worker is destroyed. |
| for (i = 0; i < FRAME_MALLOC_NBUCKETS; ++i) { |
| move_bucket_to_global_free_list(w, i); |
| } |
| } |
| |
| /* |
| Local Variables: ** |
| c-file-style:"bsd" ** |
| c-basic-offset:4 ** |
| indent-tabs-mode:nil ** |
| End: ** |
| */ |