blob: 1852908095b0a26765a8564b108e0ed09811d21f [file] [log] [blame]
/*
* Copyright (c) 1996
* Silicon Graphics Computer Systems, Inc.
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Silicon Graphics makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*/
#ifndef __SGI_STL_PTHREAD_ALLOC
#define __SGI_STL_PTHREAD_ALLOC
// Pthread-specific node allocator.
// This is similar to the default allocator, except that free-list
// information is kept separately for each thread, avoiding locking.
// This should be reasonably fast even in the presence of threads.
// The down side is that storage may not be well-utilized.
// It is not an error to allocate memory in thread A and deallocate
// it in thread B. But this effectively transfers ownership of the memory,
// so that it can only be reallocated by thread B. Thus this can effectively
// result in a storage leak if it's done on a regular basis.
// It can also result in frequent sharing of
// cache lines among processors, with potentially serious performance
// consequences.
#include <stl_config.h>
#include <stl_alloc.h>
#ifndef __RESTRICT
# define __RESTRICT
#endif
__STL_BEGIN_NAMESPACE
#define __STL_DATA_ALIGNMENT 8
union _Pthread_alloc_obj {
union _Pthread_alloc_obj * __free_list_link;
char __client_data[__STL_DATA_ALIGNMENT]; /* The client sees this. */
};
// Pthread allocators don't appear to the client to have meaningful
// instances. We do in fact need to associate some state with each
// thread. That state is represented by
// _Pthread_alloc_per_thread_state<_Max_size>.
template<size_t _Max_size>
struct _Pthread_alloc_per_thread_state {
typedef _Pthread_alloc_obj __obj;
enum { _S_NFREELISTS = _Max_size/__STL_DATA_ALIGNMENT };
_Pthread_alloc_obj* volatile __free_list[_S_NFREELISTS];
_Pthread_alloc_per_thread_state<_Max_size> * __next;
// Free list link for list of available per thread structures.
// When one of these becomes available for reuse due to thread
// termination, any objects in its free list remain associated
// with it. The whole structure may then be used by a newly
// created thread.
_Pthread_alloc_per_thread_state() : __next(0)
{
memset((void *)__free_list, 0, _S_NFREELISTS * sizeof(__obj *));
}
// Returns an object of size __n, and possibly adds to size n free list.
void *_M_refill(size_t __n);
};
// Pthread-specific allocator.
// The argument specifies the largest object size allocated from per-thread
// free lists. Larger objects are allocated using malloc_alloc.
// Max_size must be a power of 2.
template <size_t _Max_size = 128>
class _Pthread_alloc_template {
public: // but only for internal use:
typedef _Pthread_alloc_obj __obj;
// Allocates a chunk for nobjs of size "size". nobjs may be reduced
// if it is inconvenient to allocate the requested number.
static char *_S_chunk_alloc(size_t __size, int &__nobjs);
enum {_S_ALIGN = __STL_DATA_ALIGNMENT};
static size_t _S_round_up(size_t __bytes) {
return (((__bytes) + _S_ALIGN-1) & ~(_S_ALIGN - 1));
}
static size_t _S_freelist_index(size_t __bytes) {
return (((__bytes) + _S_ALIGN-1)/_S_ALIGN - 1);
}
private:
// Chunk allocation state. And other shared state.
// Protected by _S_chunk_allocator_lock.
static pthread_mutex_t _S_chunk_allocator_lock;
static char *_S_start_free;
static char *_S_end_free;
static size_t _S_heap_size;
static _Pthread_alloc_per_thread_state<_Max_size>* _S_free_per_thread_states;
static pthread_key_t _S_key;
static bool _S_key_initialized;
// Pthread key under which per thread state is stored.
// Allocator instances that are currently unclaimed by any thread.
static void _S_destructor(void *instance);
// Function to be called on thread exit to reclaim per thread
// state.
static _Pthread_alloc_per_thread_state<_Max_size> *_S_new_per_thread_state();
// Return a recycled or new per thread state.
static _Pthread_alloc_per_thread_state<_Max_size> *_S_get_per_thread_state();
// ensure that the current thread has an associated
// per thread state.
friend class _M_lock;
class _M_lock {
public:
_M_lock () { pthread_mutex_lock(&_S_chunk_allocator_lock); }
~_M_lock () { pthread_mutex_unlock(&_S_chunk_allocator_lock); }
};
public:
/* n must be > 0 */
static void * allocate(size_t __n)
{
__obj * volatile * __my_free_list;
__obj * __RESTRICT __result;
_Pthread_alloc_per_thread_state<_Max_size>* __a;
if (__n > _Max_size) {
return(malloc_alloc::allocate(__n));
}
if (!_S_key_initialized ||
!(__a = (_Pthread_alloc_per_thread_state<_Max_size>*)
pthread_getspecific(_S_key))) {
__a = _S_get_per_thread_state();
}
__my_free_list = __a -> __free_list + _S_freelist_index(__n);
__result = *__my_free_list;
if (__result == 0) {
void *__r = __a -> _M_refill(_S_round_up(__n));
return __r;
}
*__my_free_list = __result -> __free_list_link;
return (__result);
};
/* p may not be 0 */
static void deallocate(void *__p, size_t __n)
{
__obj *__q = (__obj *)__p;
__obj * volatile * __my_free_list;
_Pthread_alloc_per_thread_state<_Max_size>* __a;
if (__n > _Max_size) {
malloc_alloc::deallocate(__p, __n);
return;
}
if (!_S_key_initialized ||
!(__a = (_Pthread_alloc_per_thread_state<_Max_size> *)
pthread_getspecific(_S_key))) {
__a = _S_get_per_thread_state();
}
__my_free_list = __a->__free_list + _S_freelist_index(__n);
__q -> __free_list_link = *__my_free_list;
*__my_free_list = __q;
}
static void * reallocate(void *__p, size_t __old_sz, size_t __new_sz);
} ;
typedef _Pthread_alloc_template<> pthread_alloc;
template <size_t _Max_size>
void _Pthread_alloc_template<_Max_size>::_S_destructor(void * __instance)
{
_M_lock __lock_instance; // Need to acquire lock here.
_Pthread_alloc_per_thread_state<_Max_size>* __s =
(_Pthread_alloc_per_thread_state<_Max_size> *)__instance;
__s -> __next = _S_free_per_thread_states;
_S_free_per_thread_states = __s;
}
template <size_t _Max_size>
_Pthread_alloc_per_thread_state<_Max_size> *
_Pthread_alloc_template<_Max_size>::_S_new_per_thread_state()
{
/* lock already held here. */
if (0 != _S_free_per_thread_states) {
_Pthread_alloc_per_thread_state<_Max_size> *__result =
_S_free_per_thread_states;
_S_free_per_thread_states = _S_free_per_thread_states -> __next;
return __result;
} else {
return new _Pthread_alloc_per_thread_state<_Max_size>;
}
}
template <size_t _Max_size>
_Pthread_alloc_per_thread_state<_Max_size> *
_Pthread_alloc_template<_Max_size>::_S_get_per_thread_state()
{
/*REFERENCED*/
_M_lock __lock_instance; // Need to acquire lock here.
_Pthread_alloc_per_thread_state<_Max_size> * __result;
if (!_S_key_initialized) {
if (pthread_key_create(&_S_key, _S_destructor)) {
abort(); // failed
}
_S_key_initialized = true;
}
__result = _S_new_per_thread_state();
if (pthread_setspecific(_S_key, __result)) abort();
return __result;
}
/* We allocate memory in large chunks in order to avoid fragmenting */
/* the malloc heap too much. */
/* We assume that size is properly aligned. */
template <size_t _Max_size>
char *_Pthread_alloc_template<_Max_size>
::_S_chunk_alloc(size_t __size, int &__nobjs)
{
{
char * __result;
size_t __total_bytes;
size_t __bytes_left;
/*REFERENCED*/
_M_lock __lock_instance; // Acquire lock for this routine
__total_bytes = __size * __nobjs;
__bytes_left = _S_end_free - _S_start_free;
if (__bytes_left >= __total_bytes) {
__result = _S_start_free;
_S_start_free += __total_bytes;
return(__result);
} else if (__bytes_left >= __size) {
__nobjs = __bytes_left/__size;
__total_bytes = __size * __nobjs;
__result = _S_start_free;
_S_start_free += __total_bytes;
return(__result);
} else {
size_t __bytes_to_get =
2 * __total_bytes + _S_round_up(_S_heap_size >> 4);
// Try to make use of the left-over piece.
if (__bytes_left > 0) {
_Pthread_alloc_per_thread_state<_Max_size>* __a =
(_Pthread_alloc_per_thread_state<_Max_size>*)
pthread_getspecific(_S_key);
__obj * volatile * __my_free_list =
__a->__free_list + _S_freelist_index(__bytes_left);
((__obj *)_S_start_free) -> __free_list_link = *__my_free_list;
*__my_free_list = (__obj *)_S_start_free;
}
# ifdef _SGI_SOURCE
// Try to get memory that's aligned on something like a
// cache line boundary, so as to avoid parceling out
// parts of the same line to different threads and thus
// possibly different processors.
{
const int __cache_line_size = 128; // probable upper bound
__bytes_to_get &= ~(__cache_line_size-1);
_S_start_free = (char *)memalign(__cache_line_size, __bytes_to_get);
if (0 == _S_start_free) {
_S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get);
}
}
# else /* !SGI_SOURCE */
_S_start_free = (char *)malloc_alloc::allocate(__bytes_to_get);
# endif
_S_heap_size += __bytes_to_get;
_S_end_free = _S_start_free + __bytes_to_get;
}
}
// lock is released here
return(_S_chunk_alloc(__size, __nobjs));
}
/* Returns an object of size n, and optionally adds to size n free list.*/
/* We assume that n is properly aligned. */
/* We hold the allocation lock. */
template <size_t _Max_size>
void *_Pthread_alloc_per_thread_state<_Max_size>
::_M_refill(size_t __n)
{
int __nobjs = 128;
char * __chunk =
_Pthread_alloc_template<_Max_size>::_S_chunk_alloc(__n, __nobjs);
__obj * volatile * __my_free_list;
__obj * __result;
__obj * __current_obj, * __next_obj;
int __i;
if (1 == __nobjs) {
return(__chunk);
}
__my_free_list = __free_list
+ _Pthread_alloc_template<_Max_size>::_S_freelist_index(__n);
/* Build free list in chunk */
__result = (__obj *)__chunk;
*__my_free_list = __next_obj = (__obj *)(__chunk + __n);
for (__i = 1; ; __i++) {
__current_obj = __next_obj;
__next_obj = (__obj *)((char *)__next_obj + __n);
if (__nobjs - 1 == __i) {
__current_obj -> __free_list_link = 0;
break;
} else {
__current_obj -> __free_list_link = __next_obj;
}
}
return(__result);
}
template <size_t _Max_size>
void *_Pthread_alloc_template<_Max_size>
::reallocate(void *__p, size_t __old_sz, size_t __new_sz)
{
void * __result;
size_t __copy_sz;
if (__old_sz > _Max_size
&& __new_sz > _Max_size) {
return(realloc(__p, __new_sz));
}
if (_S_round_up(__old_sz) == _S_round_up(__new_sz)) return(__p);
__result = allocate(__new_sz);
__copy_sz = __new_sz > __old_sz? __old_sz : __new_sz;
memcpy(__result, __p, __copy_sz);
deallocate(__p, __old_sz);
return(__result);
}
template <size_t _Max_size>
_Pthread_alloc_per_thread_state<_Max_size> *
_Pthread_alloc_template<_Max_size>::_S_free_per_thread_states = 0;
template <size_t _Max_size>
pthread_key_t _Pthread_alloc_template<_Max_size>::_S_key;
template <size_t _Max_size>
bool _Pthread_alloc_template<_Max_size>::_S_key_initialized = false;
template <size_t _Max_size>
pthread_mutex_t _Pthread_alloc_template<_Max_size>::_S_chunk_allocator_lock
= PTHREAD_MUTEX_INITIALIZER;
template <size_t _Max_size>
char *_Pthread_alloc_template<_Max_size>
::_S_start_free = 0;
template <size_t _Max_size>
char *_Pthread_alloc_template<_Max_size>
::_S_end_free = 0;
template <size_t _Max_size>
size_t _Pthread_alloc_template<_Max_size>
::_S_heap_size = 0;
#ifdef __STL_USE_STD_ALLOCATORS
template <class _Tp>
class pthread_allocator {
typedef pthread_alloc _S_Alloc; // The underlying allocator.
public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef _Tp* pointer;
typedef const _Tp* const_pointer;
typedef _Tp& reference;
typedef const _Tp& const_reference;
typedef _Tp value_type;
template <class _Up> struct rebind {
typedef pthread_allocator<_Up> other;
};
pthread_allocator() __STL_NOTHROW {}
pthread_allocator(const pthread_allocator& a) __STL_NOTHROW {}
template <class _Up> pthread_allocator(const pthread_allocator<_Up>&)
__STL_NOTHROW {}
~pthread_allocator() __STL_NOTHROW {}
pointer address(reference __x) const { return &__x; }
const_pointer address(const_reference __x) const { return &__x; }
// __n is permitted to be 0. The C++ standard says nothing about what
// the return value is when __n == 0.
_Tp* allocate(size_type __n, const void* = 0) {
return __n != 0 ? static_cast<_Tp*>(_S_Alloc::allocate(__n * sizeof(_Tp)))
: 0;
}
// p is not permitted to be a null pointer.
void deallocate(pointer __p, size_type __n)
{ _S_Alloc::deallocate(__p, __n * sizeof(_Tp)); }
size_type max_size() const __STL_NOTHROW
{ return size_t(-1) / sizeof(_Tp); }
void construct(pointer __p, const _Tp& __val) { new(__p) _Tp(__val); }
void destroy(pointer _p) { _p->~_Tp(); }
};
template<>
class pthread_allocator<void> {
public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef void* pointer;
typedef const void* const_pointer;
typedef void value_type;
template <class _Up> struct rebind {
typedef pthread_allocator<_Up> other;
};
};
template <size_t _Max_size>
inline bool operator==(const _Pthread_alloc_template<_Max_size>&,
const _Pthread_alloc_template<_Max_size>&)
{
return true;
}
template <class _T1, class _T2>
inline bool operator==(const pthread_allocator<_T1>&,
const pthread_allocator<_T2>& a2)
{
return true;
}
template <class _T1, class _T2>
inline bool operator!=(const pthread_allocator<_T1>&,
const pthread_allocator<_T2>&)
{
return false;
}
template <class _Tp, size_t _Max_size>
struct _Alloc_traits<_Tp, _Pthread_alloc_template<_Max_size> >
{
static const bool _S_instanceless = true;
typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max_size> > _Alloc_type;
typedef __allocator<_Tp, _Pthread_alloc_template<_Max_size> >
allocator_type;
};
template <class _Tp, class _Up, size_t _Max>
struct _Alloc_traits<_Tp, __allocator<_Up, _Pthread_alloc_template<_Max> > >
{
static const bool _S_instanceless = true;
typedef simple_alloc<_Tp, _Pthread_alloc_template<_Max> > _Alloc_type;
typedef __allocator<_Tp, _Pthread_alloc_template<_Max> > allocator_type;
};
template <class _Tp, class _Up>
struct _Alloc_traits<_Tp, pthread_allocator<_Up> >
{
static const bool _S_instanceless = true;
typedef simple_alloc<_Tp, _Pthread_alloc_template<> > _Alloc_type;
typedef pthread_allocator<_Tp> allocator_type;
};
#endif /* __STL_USE_STD_ALLOCATORS */
__STL_END_NAMESPACE
#endif /* __SGI_STL_PTHREAD_ALLOC */
// Local Variables:
// mode:C++
// End: