| /* Copyright (C) 2022-2025 Free Software Foundation, Inc. |
| Contributed by Jakub Jelinek <jakub@redhat.com>. |
| |
| This file is part of the GNU Offloading and Multi Processing Library |
| (libgomp). |
| |
| Libgomp is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This file contains wrappers for the system allocation routines. Most |
| places in the OpenMP API do not make any provision for failure, so in |
| general we cannot allow memory allocation to fail. */ |
| |
| #define _GNU_SOURCE |
| #include "libgomp.h" |
| #if defined(PLUGIN_SUPPORT) && defined(LIBGOMP_USE_PTHREADS) |
| #define LIBGOMP_USE_MEMKIND |
| #define LIBGOMP_USE_LIBNUMA |
| #endif |
| |
| /* Implement malloc routines that can handle pinned memory on Linux. |
| |
| Given that pinned memory is typically used to help host <-> device memory |
| transfers, we attempt to allocate such memory using a device (really: |
| libgomp plugin), but fall back to mmap plus mlock if no suitable device is |
| available. |
| |
| It's possible to use mlock on any heap memory, but using munlock is |
| problematic if there are multiple pinned allocations on the same page. |
| Tracking all that manually would be possible, but adds overhead. This may |
| be worth it if there are a lot of small allocations getting pinned, but |
| this seems less likely in a HPC application. |
| |
| Instead we optimize for large pinned allocations, and use mmap to ensure |
| that two pinned allocations don't share the same page. This also means |
| that large allocations don't pin extra pages by being poorly aligned. */ |
| |
| #define _GNU_SOURCE |
| #include <sys/mman.h> |
| #include <unistd.h> |
| #include <string.h> |
| #include <assert.h> |
| #include "libgomp.h" |
| #ifdef HAVE_INTTYPES_H |
| # include <inttypes.h> /* For PRIu64. */ |
| #endif |
| |
| static int using_device_for_page_locked |
| = /* uninitialized */ -1; |
| |
| |
| static gomp_simple_alloc_ctx_p pin_ctx = NULL; |
| static pthread_once_t ctxlock = PTHREAD_ONCE_INIT; |
| |
| static void |
| linux_init_pin_ctx () |
| { |
| pin_ctx = gomp_simple_alloc_init_context (); |
| } |
| |
| static void * |
| linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin, |
| bool init0) |
| { |
| void *addr = NULL; |
| |
| if (memspace == ompx_gnu_managed_mem_space) |
| addr = gomp_managed_alloc (size); |
| else if (pin) |
| { |
| int using_device = __atomic_load_n (&using_device_for_page_locked, |
| MEMMODEL_RELAXED); |
| if (using_device != 0) |
| { |
| using_device = gomp_page_locked_host_alloc (&addr, size); |
| int using_device_old |
| = __atomic_exchange_n (&using_device_for_page_locked, |
| using_device, MEMMODEL_RELAXED); |
| assert (using_device_old == -1 |
| /* We shouldn't have concurrently changed our mind. */ |
| || using_device_old == using_device); |
| } |
| if (using_device == 0) |
| { |
| static int pagesize = 0; |
| static void *addrhint = NULL; |
| |
| if (!pagesize) |
| pagesize = sysconf(_SC_PAGE_SIZE); |
| |
| while (1) |
| { |
| addr = gomp_simple_alloc (pin_ctx, size); |
| if (addr) |
| break; |
| |
| /* Round up to a whole page. */ |
| size_t misalignment = size % pagesize; |
| size_t mmap_size = (misalignment > 0 |
| ? size + pagesize - misalignment |
| : size); |
| void *newpage = mmap (addrhint, mmap_size, PROT_READ | PROT_WRITE, |
| MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
| if (newpage == MAP_FAILED) |
| break; |
| else |
| { |
| if (mlock (newpage, size)) |
| { |
| #ifdef HAVE_INTTYPES_H |
| gomp_debug (0, "libgomp: failed to pin %"PRIu64" bytes" |
| " of memory (ulimit too low?)\n", |
| (uint64_t) size); |
| #else |
| gomp_debug (0, "libgomp: failed to pin %lu bytes of" |
| " memory (ulimit too low?)\n", |
| (unsigned long) size); |
| #endif |
| munmap (newpage, size); |
| break; |
| } |
| |
| addrhint = newpage + mmap_size; |
| |
| pthread_once (&ctxlock, linux_init_pin_ctx); |
| gomp_simple_alloc_register_memory (pin_ctx, newpage, |
| mmap_size); |
| } |
| } |
| } |
| } |
| else |
| addr = malloc (size); |
| |
| if (addr && init0) |
| memset (addr, 0, size); |
| |
| return addr; |
| } |
| |
| static void * |
| linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin) |
| { |
| if (memspace == ompx_gnu_managed_mem_space) |
| { |
| void *ret = gomp_managed_alloc (size); |
| if (!ret) |
| return NULL; |
| memset (ret, 0, size); |
| return ret; |
| } |
| else if (pin) |
| return linux_memspace_alloc (memspace, size, pin, true); |
| else |
| return calloc (1, size); |
| } |
| |
| static void |
| linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size, |
| int pin) |
| { |
| if (memspace == ompx_gnu_managed_mem_space) |
| gomp_managed_free (addr); |
| else if (pin) |
| { |
| int using_device |
| = __atomic_load_n (&using_device_for_page_locked, |
| MEMMODEL_RELAXED); |
| if (using_device == 1) |
| gomp_page_locked_host_free (addr); |
| else |
| /* The "simple" allocator does not (currently) munmap locked pages |
| (meaning that the number of locked pages never decreases), but it |
| can reuse the freed memory in subsequent gomp_simple_alloc calls. */ |
| gomp_simple_free (pin_ctx, addr); |
| } |
| else |
| free (addr); |
| } |
| |
| static void * |
| linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr, |
| size_t oldsize, size_t size, int oldpin, int pin) |
| { |
| if (memspace == ompx_gnu_managed_mem_space) |
| /* Realloc is not implemented for device Managed Memory. */ |
| ; |
| else if (oldpin && pin) |
| { |
| int using_device |
| = __atomic_load_n (&using_device_for_page_locked, |
| MEMMODEL_RELAXED); |
| /* The device plugin API does not support realloc, |
| but the gomp_simple_alloc allocator does. */ |
| if (using_device == 0) |
| { |
| /* This can fail if there is insufficient pinned memory free. */ |
| void *newaddr = gomp_simple_realloc (pin_ctx, addr, size); |
| if (newaddr) |
| return newaddr; |
| } |
| } |
| else if (oldpin || pin) |
| /* Moving from pinned to unpinned memory cannot be done in-place. */ |
| ; |
| else |
| return realloc (addr, size); |
| |
| /* In-place reallocation failed. Fall back to copy. */ |
| void *newaddr = linux_memspace_alloc (memspace, size, pin, false); |
| if (newaddr) |
| { |
| memcpy (newaddr, addr, oldsize < size ? oldsize : size); |
| linux_memspace_free (memspace, addr, oldsize, oldpin); |
| } |
| |
| return newaddr; |
| } |
| |
| static int |
| linux_memspace_validate (omp_memspace_handle_t, unsigned, int) |
| { |
| /* Everything should be accepted on Linux, including pinning and |
| non-standard memspaces. */ |
| return 1; |
| } |
| |
| #define MEMSPACE_ALLOC(MEMSPACE, SIZE, PIN) \ |
| linux_memspace_alloc (MEMSPACE, SIZE, PIN, false) |
| #define MEMSPACE_CALLOC(MEMSPACE, SIZE, PIN) \ |
| linux_memspace_calloc (MEMSPACE, SIZE, PIN) |
| #define MEMSPACE_REALLOC(MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) \ |
| linux_memspace_realloc (MEMSPACE, ADDR, OLDSIZE, SIZE, OLDPIN, PIN) |
| #define MEMSPACE_FREE(MEMSPACE, ADDR, SIZE, PIN) \ |
| linux_memspace_free (MEMSPACE, ADDR, SIZE, PIN) |
| #define MEMSPACE_VALIDATE(MEMSPACE, ACCESS, PIN) \ |
| linux_memspace_validate (MEMSPACE, ACCESS, PIN) |
| |
| #include "../../allocator.c" |