blob: a2aeaa1290e92d76229bab9deddf6e01f08e1e77 [file] [log] [blame]
/*
* Copyright (c) 1996
* Silicon Graphics Computer Systems, Inc.
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Silicon Graphics makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*/
#ifndef __PTHREAD_ALLOC_H
#define __PTHREAD_ALLOC_H
// Pthread-specific node allocator.
// This is similar to the default allocator, except that free-list
// information is kept separately for each thread, avoiding locking.
// This should be reasonably fast even in the presence of threads.
// The down side is that storage may not be well-utilized.
// It is not an error to allocate memory in thread A and deallocate
// it n thread B. But this effectively transfers ownership of the memory,
// so that it can only be reallocated by thread B. Thus this can effectively
// result in a storage leak if it's done on a regular basis.
// It can also result in frequent sharing of
// cache lines among processors, with potentially serious performance
// consequences.
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <alloc.h>
#ifndef __RESTRICT
# define __RESTRICT
#endif
// Note that this class has nonstatic members. We instantiate it once
// per thread.
template <bool dummy>
class __pthread_alloc_template {
private:
enum {ALIGN = 8};
enum {MAX_BYTES = 128}; // power of 2
enum {NFREELISTS = MAX_BYTES/ALIGN};
union obj {
union obj * free_list_link;
char client_data[ALIGN]; /* The client sees this. */
};
// Per instance state
obj* volatile free_list[NFREELISTS];
__pthread_alloc_template<dummy>* next; // Free list link
static size_t ROUND_UP(size_t bytes) {
return (((bytes) + ALIGN-1) & ~(ALIGN - 1));
}
static size_t FREELIST_INDEX(size_t bytes) {
return (((bytes) + ALIGN-1)/ALIGN - 1);
}
// Returns an object of size n, and optionally adds to size n free list.
void *refill(size_t n);
// Allocates a chunk for nobjs of size size. nobjs may be reduced
// if it is inconvenient to allocate the requested number.
static char *chunk_alloc(size_t size, int &nobjs);
// Chunk allocation state. And other shared state.
// Protected by chunk_allocator_lock.
static pthread_mutex_t chunk_allocator_lock;
static char *start_free;
static char *end_free;
static size_t heap_size;
static __pthread_alloc_template<dummy>* free_allocators;
static pthread_key_t key;
static bool key_initialized;
// Pthread key under which allocator is stored.
// Allocator instances that are currently unclaimed by any thread.
static void destructor(void *instance);
// Function to be called on thread exit to reclaim allocator
// instance.
static __pthread_alloc_template<dummy> *new_allocator();
// Return a recycled or new allocator instance.
static __pthread_alloc_template<dummy> *get_allocator_instance();
// ensure that the current thread has an associated
// allocator instance.
class lock {
public:
lock () { pthread_mutex_lock(&chunk_allocator_lock); }
~lock () { pthread_mutex_unlock(&chunk_allocator_lock); }
};
friend class lock;
public:
__pthread_alloc_template() : next(0)
{
memset((void *)free_list, 0, NFREELISTS * sizeof(obj *));
}
/* n must be > 0 */
static void * allocate(size_t n)
{
obj * volatile * my_free_list;
obj * __RESTRICT result;
__pthread_alloc_template<dummy>* a;
if (n > MAX_BYTES) {
return(malloc(n));
}
if (!key_initialized ||
!(a = (__pthread_alloc_template<dummy>*)
pthread_getspecific(key))) {
a = get_allocator_instance();
}
my_free_list = a -> free_list + FREELIST_INDEX(n);
result = *my_free_list;
if (result == 0) {
void *r = a -> refill(ROUND_UP(n));
return r;
}
*my_free_list = result -> free_list_link;
return (result);
};
/* p may not be 0 */
static void deallocate(void *p, size_t n)
{
obj *q = (obj *)p;
obj * volatile * my_free_list;
__pthread_alloc_template<dummy>* a;
if (n > MAX_BYTES) {
free(p);
return;
}
if (!key_initialized ||
!(a = (__pthread_alloc_template<dummy>*)
pthread_getspecific(key))) {
a = get_allocator_instance();
}
my_free_list = a->free_list + FREELIST_INDEX(n);
q -> free_list_link = *my_free_list;
*my_free_list = q;
}
static void * reallocate(void *p, size_t old_sz, size_t new_sz);
} ;
typedef __pthread_alloc_template<false> pthread_alloc;
template <bool dummy>
void __pthread_alloc_template<dummy>::destructor(void * instance)
{
__pthread_alloc_template<dummy>* a =
(__pthread_alloc_template<dummy>*)instance;
a -> next = free_allocators;
free_allocators = a;
}
template <bool dummy>
__pthread_alloc_template<dummy>*
__pthread_alloc_template<dummy>::new_allocator()
{
if (0 != free_allocators) {
__pthread_alloc_template<dummy>* result = free_allocators;
free_allocators = free_allocators -> next;
return result;
} else {
return new __pthread_alloc_template<dummy>;
}
}
template <bool dummy>
__pthread_alloc_template<dummy>*
__pthread_alloc_template<dummy>::get_allocator_instance()
{
__pthread_alloc_template<dummy>* result;
if (!key_initialized) {
/*REFERENCED*/
lock lock_instance;
if (!key_initialized) {
if (pthread_key_create(&key, destructor)) {
abort(); // failed
}
key_initialized = true;
}
}
result = new_allocator();
if (pthread_setspecific(key, result)) abort();
return result;
}
/* We allocate memory in large chunks in order to avoid fragmenting */
/* the malloc heap too much. */
/* We assume that size is properly aligned. */
template <bool dummy>
char *__pthread_alloc_template<dummy>
::chunk_alloc(size_t size, int &nobjs)
{
{
char * result;
size_t total_bytes;
size_t bytes_left;
/*REFERENCED*/
lock lock_instance; // Acquire lock for this routine
total_bytes = size * nobjs;
bytes_left = end_free - start_free;
if (bytes_left >= total_bytes) {
result = start_free;
start_free += total_bytes;
return(result);
} else if (bytes_left >= size) {
nobjs = bytes_left/size;
total_bytes = size * nobjs;
result = start_free;
start_free += total_bytes;
return(result);
} else {
size_t bytes_to_get = 2 * total_bytes + ROUND_UP(heap_size >> 4);
// Try to make use of the left-over piece.
if (bytes_left > 0) {
__pthread_alloc_template<dummy>* a =
(__pthread_alloc_template<dummy>*)pthread_getspecific(key);
obj * volatile * my_free_list =
a->free_list + FREELIST_INDEX(bytes_left);
((obj *)start_free) -> free_list_link = *my_free_list;
*my_free_list = (obj *)start_free;
}
# ifdef _SGI_SOURCE
// Try to get memory that's aligned on something like a
// cache line boundary, so as to avoid parceling out
// parts of the same line to different threads and thus
// possibly different processors.
{
const int cache_line_size = 128; // probable upper bound
bytes_to_get &= ~(cache_line_size-1);
start_free = (char *)memalign(cache_line_size, bytes_to_get);
if (0 == start_free) {
start_free = (char *)malloc_alloc::allocate(bytes_to_get);
}
}
# else /* !SGI_SOURCE */
start_free = (char *)malloc_alloc::allocate(bytes_to_get);
# endif
heap_size += bytes_to_get;
end_free = start_free + bytes_to_get;
}
}
// lock is released here
return(chunk_alloc(size, nobjs));
}
/* Returns an object of size n, and optionally adds to size n free list.*/
/* We assume that n is properly aligned. */
/* We hold the allocation lock. */
template <bool dummy>
void *__pthread_alloc_template<dummy>
::refill(size_t n)
{
int nobjs = 128;
char * chunk = chunk_alloc(n, nobjs);
obj * volatile * my_free_list;
obj * result;
obj * current_obj, * next_obj;
int i;
if (1 == nobjs) {
return(chunk);
}
my_free_list = free_list + FREELIST_INDEX(n);
/* Build free list in chunk */
result = (obj *)chunk;
*my_free_list = next_obj = (obj *)(chunk + n);
for (i = 1; ; i++) {
current_obj = next_obj;
next_obj = (obj *)((char *)next_obj + n);
if (nobjs - 1 == i) {
current_obj -> free_list_link = 0;
break;
} else {
current_obj -> free_list_link = next_obj;
}
}
return(result);
}
template <bool dummy>
void *__pthread_alloc_template<dummy>
::reallocate(void *p, size_t old_sz, size_t new_sz)
{
void * result;
size_t copy_sz;
if (old_sz > MAX_BYTES && new_sz > MAX_BYTES) {
return(realloc(p, new_sz));
}
if (ROUND_UP(old_sz) == ROUND_UP(new_sz)) return(p);
result = allocate(new_sz);
copy_sz = new_sz > old_sz? old_sz : new_sz;
memcpy(result, p, copy_sz);
deallocate(p, old_sz);
return(result);
}
template <bool dummy>
__pthread_alloc_template<dummy> *
__pthread_alloc_template<dummy>::free_allocators = 0;
template <bool dummy>
pthread_key_t __pthread_alloc_template<dummy>::key;
template <bool dummy>
bool __pthread_alloc_template<dummy>::key_initialized = false;
template <bool dummy>
pthread_mutex_t __pthread_alloc_template<dummy>::chunk_allocator_lock
= PTHREAD_MUTEX_INITIALIZER;
template <bool dummy>
char *__pthread_alloc_template<dummy>
::start_free = 0;
template <bool dummy>
char *__pthread_alloc_template<dummy>
::end_free = 0;
template <bool dummy>
size_t __pthread_alloc_template<dummy>
::heap_size = 0;
#endif /* __NODE_ALLOC_H */