| /* Copyright (C) 2023-2024 Free Software Foundation, Inc. |
| |
| This file is part of the GNU Offloading and Multi Processing Library |
| (libgomp). |
| |
| Libgomp is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This is a basic "malloc" implementation intended for use with small, |
| low-latency memories. |
| |
| To use this template, define BASIC_ALLOC_PREFIX, and then #include the |
| source file. The other configuration macros are optional. |
| |
| The root heap descriptor is stored in the first bytes of the heap, and each |
| free chunk contains a similar descriptor for the next free chunk in the |
| chain. |
| |
| The descriptor is two values: offset and size, which describe the |
| location of a chunk of memory available for allocation. The offset is |
| relative to the base of the heap. The special offset value 0xffffffff |
| indicates that the heap (free chain) is locked. The offset and size are |
| 32-bit values so the base alignment can be 8-bytes. |
| |
| Memory is allocated to the first free chunk that fits. The free chain |
| is always stored in order of the offset to assist coalescing adjacent |
| chunks. */ |
| |
| #include "libgomp.h" |
| |
| #ifndef BASIC_ALLOC_PREFIX |
| #error "BASIC_ALLOC_PREFIX not defined." |
| #endif |
| |
| #ifndef BASIC_ALLOC_YIELD |
| #define BASIC_ALLOC_YIELD |
| #endif |
| |
| #define ALIGN(VAR) (((VAR) + 7) & ~7) /* 8-byte granularity. */ |
| |
| #define fn1(prefix, name) prefix ## _ ## name |
| #define fn(prefix, name) fn1 (prefix, name) |
| #define basic_alloc_init fn(BASIC_ALLOC_PREFIX,init) |
| #define basic_alloc_alloc fn(BASIC_ALLOC_PREFIX,alloc) |
| #define basic_alloc_calloc fn(BASIC_ALLOC_PREFIX,calloc) |
| #define basic_alloc_free fn(BASIC_ALLOC_PREFIX,free) |
| #define basic_alloc_realloc fn(BASIC_ALLOC_PREFIX,realloc) |
| |
| typedef struct { |
| uint32_t offset; |
| uint32_t size; |
| } heapdesc; |
| |
| void |
| basic_alloc_init (char *heap, size_t limit) |
| { |
| if (heap == NULL) |
| return; |
| |
| /* Initialize the head of the free chain. */ |
| heapdesc *root = (heapdesc *) heap; |
| root->offset = ALIGN(1); |
| root->size = limit - root->offset; |
| |
| /* And terminate the chain. */ |
| heapdesc *next = (heapdesc *) (heap + root->offset); |
| next->offset = 0; |
| next->size = 0; |
| } |
| |
| static void * |
| basic_alloc_alloc (char *heap, size_t size) |
| { |
| if (heap == NULL) |
| return NULL; |
| |
| /* Memory is allocated in N-byte granularity. */ |
| size = ALIGN (size); |
| |
| /* Acquire a lock on the low-latency heap. */ |
| heapdesc root, *root_ptr = (heapdesc *) heap; |
| do |
| { |
| root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, |
| MEMMODEL_ACQUIRE); |
| if (root.offset != 0xffffffff) |
| { |
| root.size = root_ptr->size; |
| break; |
| } |
| /* Spin. */ |
| BASIC_ALLOC_YIELD; |
| } |
| while (1); |
| |
| /* Walk the free chain. */ |
| heapdesc chunk = root; |
| heapdesc *prev_chunkptr = NULL; |
| heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); |
| heapdesc onward_chain = *chunkptr; |
| while (chunk.size != 0 && (uint32_t) size > chunk.size) |
| { |
| chunk = onward_chain; |
| prev_chunkptr = chunkptr; |
| chunkptr = (heapdesc *) (heap + chunk.offset); |
| onward_chain = *chunkptr; |
| } |
| |
| void *result = NULL; |
| if (chunk.size != 0) |
| { |
| /* Allocation successful. */ |
| result = chunkptr; |
| |
| /* Update the free chain. */ |
| heapdesc stillfree = chunk; |
| stillfree.offset += size; |
| stillfree.size -= size; |
| heapdesc *stillfreeptr = (heapdesc *) (heap + stillfree.offset); |
| |
| if (stillfree.size == 0) |
| /* The whole chunk was used. */ |
| stillfree = onward_chain; |
| else |
| /* The chunk was split, so restore the onward chain. */ |
| *stillfreeptr = onward_chain; |
| |
| /* The previous free slot or root now points to stillfree. */ |
| if (prev_chunkptr) |
| *prev_chunkptr = stillfree; |
| else |
| root = stillfree; |
| } |
| |
| /* Update the free chain root and release the lock. */ |
| root_ptr->size = root.size; |
| __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); |
| |
| return result; |
| } |
| |
| static void * |
| basic_alloc_calloc (char *heap, size_t size) |
| { |
| /* Memory is allocated in N-byte granularity. */ |
| size = ALIGN (size); |
| |
| uint64_t *result = basic_alloc_alloc (heap, size); |
| if (result) |
| /* Inline memset in which we know size is a multiple of 8. */ |
| for (unsigned i = 0; i < (unsigned) size / 8; i++) |
| result[i] = 0; |
| |
| return result; |
| } |
| |
| static void |
| basic_alloc_free (char *heap, void *addr, size_t size) |
| { |
| /* Memory is allocated in N-byte granularity. */ |
| size = ALIGN (size); |
| |
| /* Acquire a lock on the low-latency heap. */ |
| heapdesc root, *root_ptr = (heapdesc *) heap; |
| do |
| { |
| root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, |
| MEMMODEL_ACQUIRE); |
| if (root.offset != 0xffffffff) |
| { |
| root.size = root_ptr->size; |
| break; |
| } |
| /* Spin. */ |
| BASIC_ALLOC_YIELD; |
| } |
| while (1); |
| |
| /* Walk the free chain to find where to insert a new entry. */ |
| heapdesc chunk = root, prev_chunk = {0}; |
| heapdesc *prev_chunkptr = NULL, *prevprev_chunkptr = NULL; |
| heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); |
| heapdesc onward_chain = *chunkptr; |
| while (chunk.size != 0 && addr > (void *) chunkptr) |
| { |
| prev_chunk = chunk; |
| chunk = onward_chain; |
| prevprev_chunkptr = prev_chunkptr; |
| prev_chunkptr = chunkptr; |
| chunkptr = (heapdesc *) (heap + chunk.offset); |
| onward_chain = *chunkptr; |
| } |
| |
| /* Create the new chunk descriptor. */ |
| heapdesc newfreechunk; |
| newfreechunk.offset = (uint32_t) ((uintptr_t) addr - (uintptr_t) heap); |
| newfreechunk.size = (uint32_t) size; |
| |
| /* Coalesce adjacent free chunks. */ |
| if (newfreechunk.offset + size == chunk.offset) |
| { |
| /* Free chunk follows. */ |
| newfreechunk.size += chunk.size; |
| chunk = onward_chain; |
| } |
| if (prev_chunkptr) |
| { |
| if (prev_chunk.offset + prev_chunk.size |
| == newfreechunk.offset) |
| { |
| /* Free chunk precedes. */ |
| newfreechunk.offset = prev_chunk.offset; |
| newfreechunk.size += prev_chunk.size; |
| addr = heap + prev_chunk.offset; |
| prev_chunkptr = prevprev_chunkptr; |
| } |
| } |
| |
| /* Update the free chain in the new and previous chunks. */ |
| *(heapdesc *) addr = chunk; |
| if (prev_chunkptr) |
| *prev_chunkptr = newfreechunk; |
| else |
| root = newfreechunk; |
| |
| /* Update the free chain root and release the lock. */ |
| root_ptr->size = root.size; |
| __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); |
| |
| } |
| |
| static void * |
| basic_alloc_realloc (char *heap, void *addr, size_t oldsize, |
| size_t size) |
| { |
| /* Memory is allocated in N-byte granularity. */ |
| oldsize = ALIGN (oldsize); |
| size = ALIGN (size); |
| |
| if (oldsize == size) |
| return addr; |
| |
| /* Acquire a lock on the low-latency heap. */ |
| heapdesc root, *root_ptr = (heapdesc *) heap; |
| do |
| { |
| root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, |
| MEMMODEL_ACQUIRE); |
| if (root.offset != 0xffffffff) |
| { |
| root.size = root_ptr->size; |
| break; |
| } |
| /* Spin. */ |
| BASIC_ALLOC_YIELD; |
| } |
| while (1); |
| |
| /* Walk the free chain. */ |
| heapdesc chunk = root; |
| heapdesc *prev_chunkptr = NULL; |
| heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); |
| heapdesc onward_chain = *chunkptr; |
| while (chunk.size != 0 && (void *) chunkptr < addr) |
| { |
| chunk = onward_chain; |
| prev_chunkptr = chunkptr; |
| chunkptr = (heapdesc *) (heap + chunk.offset); |
| onward_chain = *chunkptr; |
| } |
| |
| void *result = NULL; |
| if (size < oldsize) |
| { |
| /* The new allocation is smaller than the old; we can always |
| shrink an allocation in place. */ |
| result = addr; |
| |
| heapdesc *nowfreeptr = (heapdesc *) (addr + size); |
| |
| /* Update the free chain. */ |
| heapdesc nowfree; |
| nowfree.offset = (char *) nowfreeptr - heap; |
| nowfree.size = oldsize - size; |
| |
| if (nowfree.offset + size == chunk.offset) |
| { |
| /* Coalesce following free chunk. */ |
| nowfree.size += chunk.size; |
| *nowfreeptr = onward_chain; |
| } |
| else |
| *nowfreeptr = chunk; |
| |
| /* The previous free slot or root now points to nowfree. */ |
| if (prev_chunkptr) |
| *prev_chunkptr = nowfree; |
| else |
| root = nowfree; |
| } |
| else if (chunk.size != 0 |
| && (char *) addr + oldsize == (char *) chunkptr |
| && chunk.size >= size-oldsize) |
| { |
| /* The new allocation is larger than the old, and we found a |
| large enough free block right after the existing block, |
| so we extend into that space. */ |
| result = addr; |
| |
| uint32_t delta = size-oldsize; |
| |
| /* Update the free chain. */ |
| heapdesc stillfree = chunk; |
| stillfree.offset += delta; |
| stillfree.size -= delta; |
| heapdesc *stillfreeptr = (heapdesc *) (heap + stillfree.offset); |
| |
| if (stillfree.size == 0) |
| /* The whole chunk was used. */ |
| stillfree = onward_chain; |
| else |
| /* The chunk was split, so restore the onward chain. */ |
| *stillfreeptr = onward_chain; |
| |
| /* The previous free slot or root now points to stillfree. */ |
| if (prev_chunkptr) |
| *prev_chunkptr = stillfree; |
| else |
| root = stillfree; |
| } |
| /* Else realloc in-place has failed and result remains NULL. */ |
| |
| /* Update the free chain root and release the lock. */ |
| root_ptr->size = root.size; |
| __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); |
| |
| if (result == NULL) |
| { |
| /* The allocation could not be extended in place, so we simply |
| allocate fresh memory and move the data. If we can't allocate |
| from low-latency memory then we leave the original alloaction |
| intact and return NULL. |
| We could do a fall-back to main memory, but we don't know what |
| the fall-back trait said to do. */ |
| result = basic_alloc_alloc (heap, size); |
| if (result != NULL) |
| { |
| /* Inline memcpy in which we know oldsize is a multiple of 8. */ |
| uint64_t *from = addr, *to = result; |
| for (unsigned i = 0; i < (unsigned) oldsize / 8; i++) |
| to[i] = from[i]; |
| |
| basic_alloc_free (heap, addr, oldsize); |
| } |
| } |
| |
| return result; |
| } |
| |
| #undef ALIGN |
| #undef fn1 |
| #undef fn |
| #undef basic_alloc_init |
| #undef basic_alloc_alloc |
| #undef basic_alloc_free |
| #undef basic_alloc_realloc |