|  | /* Copyright (C) 2023-2025 Free Software Foundation, Inc. | 
|  |  | 
|  | This file is part of the GNU Offloading and Multi Processing Library | 
|  | (libgomp). | 
|  |  | 
|  | Libgomp is free software; you can redistribute it and/or modify it | 
|  | under the terms of the GNU General Public License as published by | 
|  | the Free Software Foundation; either version 3, or (at your option) | 
|  | any later version. | 
|  |  | 
|  | Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | 
|  | FOR A PARTICULAR PURPOSE.  See the GNU General Public License for | 
|  | more details. | 
|  |  | 
|  | Under Section 7 of GPL version 3, you are granted additional | 
|  | permissions described in the GCC Runtime Library Exception, version | 
|  | 3.1, as published by the Free Software Foundation. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License and | 
|  | a copy of the GCC Runtime Library Exception along with this program; | 
|  | see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see | 
|  | <http://www.gnu.org/licenses/>.  */ | 
|  |  | 
|  | /* This is a basic "malloc" implementation intended for use with small, | 
|  | low-latency memories. | 
|  |  | 
|  | To use this template, define BASIC_ALLOC_PREFIX, and then #include the | 
|  | source file.  The other configuration macros are optional. | 
|  |  | 
|  | The root heap descriptor is stored in the first bytes of the heap, and each | 
|  | free chunk contains a similar descriptor for the next free chunk in the | 
|  | chain. | 
|  |  | 
|  | The descriptor is two values: offset and size, which describe the | 
|  | location of a chunk of memory available for allocation. The offset is | 
|  | relative to the base of the heap.  The special offset value 0xffffffff | 
|  | indicates that the heap (free chain) is locked.  The offset and size are | 
|  | 32-bit values so the base alignment can be 8-bytes. | 
|  |  | 
|  | Memory is allocated to the first free chunk that fits.  The free chain | 
|  | is always stored in order of the offset to assist coalescing adjacent | 
|  | chunks.  */ | 
|  |  | 
|  | #include "libgomp.h" | 
|  |  | 
|  | #ifndef BASIC_ALLOC_PREFIX | 
|  | #error "BASIC_ALLOC_PREFIX not defined." | 
|  | #endif | 
|  |  | 
|  | #ifndef BASIC_ALLOC_YIELD | 
|  | #define BASIC_ALLOC_YIELD | 
|  | #endif | 
|  |  | 
|  | #define ALIGN(VAR) (((VAR) + 7) & ~7)    /* 8-byte granularity.  */ | 
|  |  | 
|  | #define fn1(prefix, name) prefix ## _ ## name | 
|  | #define fn(prefix, name) fn1 (prefix, name) | 
|  | #define basic_alloc_init fn(BASIC_ALLOC_PREFIX,init) | 
|  | #define basic_alloc_alloc fn(BASIC_ALLOC_PREFIX,alloc) | 
|  | #define basic_alloc_calloc fn(BASIC_ALLOC_PREFIX,calloc) | 
|  | #define basic_alloc_free fn(BASIC_ALLOC_PREFIX,free) | 
|  | #define basic_alloc_realloc fn(BASIC_ALLOC_PREFIX,realloc) | 
|  |  | 
|  | typedef struct { | 
|  | uint32_t offset; | 
|  | uint32_t size; | 
|  | } heapdesc; | 
|  |  | 
|  | void | 
|  | basic_alloc_init (char *heap, size_t limit) | 
|  | { | 
|  | if (heap == NULL) | 
|  | return; | 
|  |  | 
|  | /* Initialize the head of the free chain.  */ | 
|  | heapdesc *root = (heapdesc *) heap; | 
|  | root->offset = ALIGN(1); | 
|  | root->size = limit - root->offset; | 
|  |  | 
|  | /* And terminate the chain.  */ | 
|  | heapdesc *next = (heapdesc *) (heap + root->offset); | 
|  | next->offset = 0; | 
|  | next->size = 0; | 
|  | } | 
|  |  | 
|  | static void * | 
|  | basic_alloc_alloc (char *heap, size_t size) | 
|  | { | 
|  | if (heap == NULL) | 
|  | return NULL; | 
|  |  | 
|  | /* Memory is allocated in N-byte granularity.  */ | 
|  | size = ALIGN (size); | 
|  |  | 
|  | /* Acquire a lock on the low-latency heap.  */ | 
|  | heapdesc root, *root_ptr = (heapdesc *) heap; | 
|  | do | 
|  | { | 
|  | root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, | 
|  | MEMMODEL_ACQUIRE); | 
|  | if (root.offset != 0xffffffff) | 
|  | { | 
|  | root.size = root_ptr->size; | 
|  | break; | 
|  | } | 
|  | /* Spin.  */ | 
|  | BASIC_ALLOC_YIELD; | 
|  | } | 
|  | while (1); | 
|  |  | 
|  | /* Walk the free chain.  */ | 
|  | heapdesc chunk = root; | 
|  | heapdesc *prev_chunkptr = NULL; | 
|  | heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); | 
|  | heapdesc onward_chain = *chunkptr; | 
|  | while (chunk.size != 0 && (uint32_t) size > chunk.size) | 
|  | { | 
|  | chunk = onward_chain; | 
|  | prev_chunkptr = chunkptr; | 
|  | chunkptr = (heapdesc *) (heap + chunk.offset); | 
|  | onward_chain = *chunkptr; | 
|  | } | 
|  |  | 
|  | void *result = NULL; | 
|  | if (chunk.size != 0) | 
|  | { | 
|  | /* Allocation successful.  */ | 
|  | result = chunkptr; | 
|  |  | 
|  | /* Update the free chain.  */ | 
|  | heapdesc stillfree = chunk; | 
|  | stillfree.offset += size; | 
|  | stillfree.size -= size; | 
|  | heapdesc *stillfreeptr = (heapdesc *) (heap + stillfree.offset); | 
|  |  | 
|  | if (stillfree.size == 0) | 
|  | /* The whole chunk was used.  */ | 
|  | stillfree = onward_chain; | 
|  | else | 
|  | /* The chunk was split, so restore the onward chain.  */ | 
|  | *stillfreeptr = onward_chain; | 
|  |  | 
|  | /* The previous free slot or root now points to stillfree.  */ | 
|  | if (prev_chunkptr) | 
|  | *prev_chunkptr = stillfree; | 
|  | else | 
|  | root = stillfree; | 
|  | } | 
|  |  | 
|  | /* Update the free chain root and release the lock.  */ | 
|  | root_ptr->size = root.size; | 
|  | __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | static void * | 
|  | basic_alloc_calloc (char *heap, size_t size) | 
|  | { | 
|  | /* Memory is allocated in N-byte granularity.  */ | 
|  | size = ALIGN (size); | 
|  |  | 
|  | uint64_t *result = basic_alloc_alloc (heap, size); | 
|  | if (result) | 
|  | /* Inline memset in which we know size is a multiple of 8.  */ | 
|  | for (unsigned i = 0; i < (unsigned) size / 8; i++) | 
|  | result[i] = 0; | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | static void | 
|  | basic_alloc_free (char *heap, void *addr, size_t size) | 
|  | { | 
|  | /* Memory is allocated in N-byte granularity.  */ | 
|  | size = ALIGN (size); | 
|  |  | 
|  | /* Acquire a lock on the low-latency heap.  */ | 
|  | heapdesc root, *root_ptr = (heapdesc *) heap; | 
|  | do | 
|  | { | 
|  | root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, | 
|  | MEMMODEL_ACQUIRE); | 
|  | if (root.offset != 0xffffffff) | 
|  | { | 
|  | root.size = root_ptr->size; | 
|  | break; | 
|  | } | 
|  | /* Spin.  */ | 
|  | BASIC_ALLOC_YIELD; | 
|  | } | 
|  | while (1); | 
|  |  | 
|  | /* Walk the free chain to find where to insert a new entry.  */ | 
|  | heapdesc chunk = root, prev_chunk = {0}; | 
|  | heapdesc *prev_chunkptr = NULL, *prevprev_chunkptr = NULL; | 
|  | heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); | 
|  | heapdesc onward_chain = *chunkptr; | 
|  | while (chunk.size != 0 && addr > (void *) chunkptr) | 
|  | { | 
|  | prev_chunk = chunk; | 
|  | chunk = onward_chain; | 
|  | prevprev_chunkptr = prev_chunkptr; | 
|  | prev_chunkptr = chunkptr; | 
|  | chunkptr = (heapdesc *) (heap + chunk.offset); | 
|  | onward_chain = *chunkptr; | 
|  | } | 
|  |  | 
|  | /* Create the new chunk descriptor.  */ | 
|  | heapdesc newfreechunk; | 
|  | newfreechunk.offset = (uint32_t) ((uintptr_t) addr - (uintptr_t) heap); | 
|  | newfreechunk.size = (uint32_t) size; | 
|  |  | 
|  | /* Coalesce adjacent free chunks.  */ | 
|  | if (newfreechunk.offset + size == chunk.offset) | 
|  | { | 
|  | /* Free chunk follows.  */ | 
|  | newfreechunk.size += chunk.size; | 
|  | chunk = onward_chain; | 
|  | } | 
|  | if (prev_chunkptr) | 
|  | { | 
|  | if (prev_chunk.offset + prev_chunk.size | 
|  | == newfreechunk.offset) | 
|  | { | 
|  | /* Free chunk precedes.  */ | 
|  | newfreechunk.offset = prev_chunk.offset; | 
|  | newfreechunk.size += prev_chunk.size; | 
|  | addr = heap + prev_chunk.offset; | 
|  | prev_chunkptr = prevprev_chunkptr; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* Update the free chain in the new and previous chunks.  */ | 
|  | *(heapdesc *) addr = chunk; | 
|  | if (prev_chunkptr) | 
|  | *prev_chunkptr = newfreechunk; | 
|  | else | 
|  | root = newfreechunk; | 
|  |  | 
|  | /* Update the free chain root and release the lock.  */ | 
|  | root_ptr->size = root.size; | 
|  | __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); | 
|  |  | 
|  | } | 
|  |  | 
|  | static void * | 
|  | basic_alloc_realloc (char *heap, void *addr, size_t oldsize, | 
|  | size_t size) | 
|  | { | 
|  | /* Memory is allocated in N-byte granularity.  */ | 
|  | oldsize = ALIGN (oldsize); | 
|  | size = ALIGN (size); | 
|  |  | 
|  | if (oldsize == size) | 
|  | return addr; | 
|  |  | 
|  | /* Acquire a lock on the low-latency heap.  */ | 
|  | heapdesc root, *root_ptr = (heapdesc *) heap; | 
|  | do | 
|  | { | 
|  | root.offset = __atomic_exchange_n (&root_ptr->offset, 0xffffffff, | 
|  | MEMMODEL_ACQUIRE); | 
|  | if (root.offset != 0xffffffff) | 
|  | { | 
|  | root.size = root_ptr->size; | 
|  | break; | 
|  | } | 
|  | /* Spin.  */ | 
|  | BASIC_ALLOC_YIELD; | 
|  | } | 
|  | while (1); | 
|  |  | 
|  | /* Walk the free chain.  */ | 
|  | heapdesc chunk = root; | 
|  | heapdesc *prev_chunkptr = NULL; | 
|  | heapdesc *chunkptr = (heapdesc *) (heap + chunk.offset); | 
|  | heapdesc onward_chain = *chunkptr; | 
|  | while (chunk.size != 0 && (void *) chunkptr < addr) | 
|  | { | 
|  | chunk = onward_chain; | 
|  | prev_chunkptr = chunkptr; | 
|  | chunkptr = (heapdesc *) (heap + chunk.offset); | 
|  | onward_chain = *chunkptr; | 
|  | } | 
|  |  | 
|  | void *result = NULL; | 
|  | if (size < oldsize) | 
|  | { | 
|  | /* The new allocation is smaller than the old; we can always | 
|  | shrink an allocation in place.  */ | 
|  | result = addr; | 
|  |  | 
|  | heapdesc *nowfreeptr = (heapdesc *) (addr + size); | 
|  |  | 
|  | /* Update the free chain.  */ | 
|  | heapdesc nowfree; | 
|  | nowfree.offset = (char *) nowfreeptr - heap; | 
|  | nowfree.size = oldsize - size; | 
|  |  | 
|  | if (nowfree.offset + size == chunk.offset) | 
|  | { | 
|  | /* Coalesce following free chunk.  */ | 
|  | nowfree.size += chunk.size; | 
|  | *nowfreeptr = onward_chain; | 
|  | } | 
|  | else | 
|  | *nowfreeptr = chunk; | 
|  |  | 
|  | /* The previous free slot or root now points to nowfree.  */ | 
|  | if (prev_chunkptr) | 
|  | *prev_chunkptr = nowfree; | 
|  | else | 
|  | root = nowfree; | 
|  | } | 
|  | else if (chunk.size != 0 | 
|  | && (char *) addr + oldsize == (char *) chunkptr | 
|  | && chunk.size >= size-oldsize) | 
|  | { | 
|  | /* The new allocation is larger than the old, and we found a | 
|  | large enough free block right after the existing block, | 
|  | so we extend into that space.  */ | 
|  | result = addr; | 
|  |  | 
|  | uint32_t delta = size-oldsize; | 
|  |  | 
|  | /* Update the free chain.  */ | 
|  | heapdesc stillfree = chunk; | 
|  | stillfree.offset += delta; | 
|  | stillfree.size -= delta; | 
|  | heapdesc *stillfreeptr = (heapdesc *) (heap + stillfree.offset); | 
|  |  | 
|  | if (stillfree.size == 0) | 
|  | /* The whole chunk was used.  */ | 
|  | stillfree = onward_chain; | 
|  | else | 
|  | /* The chunk was split, so restore the onward chain.  */ | 
|  | *stillfreeptr = onward_chain; | 
|  |  | 
|  | /* The previous free slot or root now points to stillfree.  */ | 
|  | if (prev_chunkptr) | 
|  | *prev_chunkptr = stillfree; | 
|  | else | 
|  | root = stillfree; | 
|  | } | 
|  | /* Else realloc in-place has failed and result remains NULL.  */ | 
|  |  | 
|  | /* Update the free chain root and release the lock.  */ | 
|  | root_ptr->size = root.size; | 
|  | __atomic_store_n (&root_ptr->offset, root.offset, MEMMODEL_RELEASE); | 
|  |  | 
|  | if (result == NULL) | 
|  | { | 
|  | /* The allocation could not be extended in place, so we simply | 
|  | allocate fresh memory and move the data.  If we can't allocate | 
|  | from low-latency memory then we leave the original alloaction | 
|  | intact and return NULL. | 
|  | We could do a fall-back to main memory, but we don't know what | 
|  | the fall-back trait said to do.  */ | 
|  | result = basic_alloc_alloc (heap, size); | 
|  | if (result != NULL) | 
|  | { | 
|  | /* Inline memcpy in which we know oldsize is a multiple of 8.  */ | 
|  | uint64_t *from = addr, *to = result; | 
|  | for (unsigned i = 0; i < (unsigned) oldsize / 8; i++) | 
|  | to[i] = from[i]; | 
|  |  | 
|  | basic_alloc_free (heap, addr, oldsize); | 
|  | } | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | #undef ALIGN | 
|  | #undef fn1 | 
|  | #undef fn | 
|  | #undef basic_alloc_init | 
|  | #undef basic_alloc_alloc | 
|  | #undef basic_alloc_free | 
|  | #undef basic_alloc_realloc |