| /* Copyright (C) 2013-2022 Free Software Foundation, Inc. |
| Contributed by Jakub Jelinek <jakub@redhat.com>. |
| |
| This file is part of the GNU Offloading and Multi Processing Library |
| (libgomp). |
| |
| Libgomp is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This file contains the support of offloading. */ |
| |
| #include "libgomp.h" |
| #include "oacc-plugin.h" |
| #include "oacc-int.h" |
| #include "gomp-constants.h" |
| #include <limits.h> |
| #include <stdbool.h> |
| #include <stdlib.h> |
| #ifdef HAVE_INTTYPES_H |
| # include <inttypes.h> /* For PRIu64. */ |
| #endif |
| #include <string.h> |
| #include <stdio.h> /* For snprintf. */ |
| #include <assert.h> |
| #include <errno.h> |
| |
| #ifdef PLUGIN_SUPPORT |
| #include <dlfcn.h> |
| #include "plugin-suffix.h" |
| #endif |
| |
| typedef uintptr_t *hash_entry_type; |
| static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } |
| static inline void htab_free (void *ptr) { free (ptr); } |
| #include "hashtab.h" |
| |
| ialias_redirect (GOMP_task) |
| |
| static inline hashval_t |
| htab_hash (hash_entry_type element) |
| { |
| return hash_pointer ((void *) element); |
| } |
| |
| static inline bool |
| htab_eq (hash_entry_type x, hash_entry_type y) |
| { |
| return x == y; |
| } |
| |
| #define FIELD_TGT_EMPTY (~(size_t) 0) |
| |
| static void gomp_target_init (void); |
| |
| /* The whole initialization code for offloading plugins is only run one. */ |
| static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; |
| |
| /* Mutex for offload image registration. */ |
| static gomp_mutex_t register_lock; |
| |
| /* This structure describes an offload image. |
| It contains type of the target device, pointer to host table descriptor, and |
| pointer to target data. */ |
| struct offload_image_descr { |
| unsigned version; |
| enum offload_target_type type; |
| const void *host_table; |
| const void *target_data; |
| }; |
| |
| /* Array of descriptors of offload images. */ |
| static struct offload_image_descr *offload_images; |
| |
| /* Total number of offload images. */ |
| static int num_offload_images; |
| |
| /* Array of descriptors for all available devices. */ |
| static struct gomp_device_descr *devices; |
| |
| /* Total number of available devices. */ |
| static int num_devices; |
| |
| /* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ |
| static int num_devices_openmp; |
| |
| /* OpenMP requires mask. */ |
| static int omp_requires_mask; |
| |
| /* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ |
| |
| static void * |
| gomp_realloc_unlock (void *old, size_t size) |
| { |
| void *ret = realloc (old, size); |
| if (ret == NULL) |
| { |
| gomp_mutex_unlock (®ister_lock); |
| gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); |
| } |
| return ret; |
| } |
| |
| attribute_hidden void |
| gomp_init_targets_once (void) |
| { |
| (void) pthread_once (&gomp_is_initialized, gomp_target_init); |
| } |
| |
| attribute_hidden int |
| gomp_get_num_devices (void) |
| { |
| gomp_init_targets_once (); |
| return num_devices_openmp; |
| } |
| |
| static struct gomp_device_descr * |
| resolve_device (int device_id, bool remapped) |
| { |
| if (remapped && device_id == GOMP_DEVICE_ICV) |
| { |
| struct gomp_task_icv *icv = gomp_icv (false); |
| device_id = icv->default_device_var; |
| remapped = false; |
| } |
| |
| if (device_id < 0) |
| { |
| if (device_id == (remapped ? GOMP_DEVICE_HOST_FALLBACK |
| : omp_initial_device)) |
| return NULL; |
| if (device_id == omp_invalid_device) |
| gomp_fatal ("omp_invalid_device encountered"); |
| else if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) |
| gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " |
| "but device not found"); |
| |
| return NULL; |
| } |
| else if (device_id >= gomp_get_num_devices ()) |
| { |
| if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY |
| && device_id != num_devices_openmp) |
| gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " |
| "but device not found"); |
| |
| return NULL; |
| } |
| |
| gomp_mutex_lock (&devices[device_id].lock); |
| if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) |
| gomp_init_device (&devices[device_id]); |
| else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) |
| { |
| gomp_mutex_unlock (&devices[device_id].lock); |
| |
| if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) |
| gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " |
| "but device is finalized"); |
| |
| return NULL; |
| } |
| gomp_mutex_unlock (&devices[device_id].lock); |
| |
| return &devices[device_id]; |
| } |
| |
| |
| static inline splay_tree_key |
| gomp_map_lookup (splay_tree mem_map, splay_tree_key key) |
| { |
| if (key->host_start != key->host_end) |
| return splay_tree_lookup (mem_map, key); |
| |
| key->host_end++; |
| splay_tree_key n = splay_tree_lookup (mem_map, key); |
| key->host_end--; |
| if (n) |
| return n; |
| key->host_start--; |
| n = splay_tree_lookup (mem_map, key); |
| key->host_start++; |
| if (n) |
| return n; |
| return splay_tree_lookup (mem_map, key); |
| } |
| |
| static inline splay_tree_key |
| gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) |
| { |
| if (key->host_start != key->host_end) |
| return splay_tree_lookup (mem_map, key); |
| |
| key->host_end++; |
| splay_tree_key n = splay_tree_lookup (mem_map, key); |
| key->host_end--; |
| return n; |
| } |
| |
| static inline void |
| gomp_device_copy (struct gomp_device_descr *devicep, |
| bool (*copy_func) (int, void *, const void *, size_t), |
| const char *dst, void *dstaddr, |
| const char *src, const void *srcaddr, |
| size_t size) |
| { |
| if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", |
| src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); |
| } |
| } |
| |
| static inline void |
| goacc_device_copy_async (struct gomp_device_descr *devicep, |
| bool (*copy_func) (int, void *, const void *, size_t, |
| struct goacc_asyncqueue *), |
| const char *dst, void *dstaddr, |
| const char *src, const void *srcaddr, |
| const void *srcaddr_orig, |
| size_t size, struct goacc_asyncqueue *aq) |
| { |
| if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| if (srcaddr_orig && srcaddr_orig != srcaddr) |
| gomp_fatal ("Copying of %s object [%p..%p)" |
| " via buffer %s object [%p..%p)" |
| " to %s object [%p..%p) failed", |
| src, srcaddr_orig, srcaddr_orig + size, |
| src, srcaddr, srcaddr + size, |
| dst, dstaddr, dstaddr + size); |
| else |
| gomp_fatal ("Copying of %s object [%p..%p)" |
| " to %s object [%p..%p) failed", |
| src, srcaddr, srcaddr + size, |
| dst, dstaddr, dstaddr + size); |
| } |
| } |
| |
| /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) |
| host to device memory transfers. */ |
| |
| struct gomp_coalesce_chunk |
| { |
| /* The starting and ending point of a coalesced chunk of memory. */ |
| size_t start, end; |
| }; |
| |
| struct gomp_coalesce_buf |
| { |
| /* Buffer into which gomp_copy_host2dev will memcpy data and from which |
| it will be copied to the device. */ |
| void *buf; |
| struct target_mem_desc *tgt; |
| /* Array with offsets, chunks[i].start is the starting offset and |
| chunks[i].end ending offset relative to tgt->tgt_start device address |
| of chunks which are to be copied to buf and later copied to device. */ |
| struct gomp_coalesce_chunk *chunks; |
| /* Number of chunks in chunks array, or -1 if coalesce buffering should not |
| be performed. */ |
| long chunk_cnt; |
| /* During construction of chunks array, how many memory regions are within |
| the last chunk. If there is just one memory region for a chunk, we copy |
| it directly to device rather than going through buf. */ |
| long use_cnt; |
| }; |
| |
| /* Maximum size of memory region considered for coalescing. Larger copies |
| are performed directly. */ |
| #define MAX_COALESCE_BUF_SIZE (32 * 1024) |
| |
| /* Maximum size of a gap in between regions to consider them being copied |
| within the same chunk. All the device offsets considered are within |
| newly allocated device memory, so it isn't fatal if we copy some padding |
| in between from host to device. The gaps come either from alignment |
| padding or from memory regions which are not supposed to be copied from |
| host to device (e.g. map(alloc:), map(from:) etc.). */ |
| #define MAX_COALESCE_BUF_GAP (4 * 1024) |
| |
| /* Add region with device tgt_start relative offset and length to CBUF. |
| |
| This must not be used for asynchronous copies, because the host data might |
| not be computed yet (by an earlier asynchronous compute region, for |
| example). |
| TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question: |
| is it more performant to use libgomp CBUF buffering or individual device |
| asyncronous copying?) */ |
| |
| static inline void |
| gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) |
| { |
| if (len > MAX_COALESCE_BUF_SIZE || len == 0) |
| return; |
| if (cbuf->chunk_cnt) |
| { |
| if (cbuf->chunk_cnt < 0) |
| return; |
| if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) |
| { |
| cbuf->chunk_cnt = -1; |
| return; |
| } |
| if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) |
| { |
| cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; |
| cbuf->use_cnt++; |
| return; |
| } |
| /* If the last chunk is only used by one mapping, discard it, |
| as it will be one host to device copy anyway and |
| memcpying it around will only waste cycles. */ |
| if (cbuf->use_cnt == 1) |
| cbuf->chunk_cnt--; |
| } |
| cbuf->chunks[cbuf->chunk_cnt].start = start; |
| cbuf->chunks[cbuf->chunk_cnt].end = start + len; |
| cbuf->chunk_cnt++; |
| cbuf->use_cnt = 1; |
| } |
| |
| /* Return true for mapping kinds which need to copy data from the |
| host to device for regions that weren't previously mapped. */ |
| |
| static inline bool |
| gomp_to_device_kind_p (int kind) |
| { |
| switch (kind) |
| { |
| case GOMP_MAP_ALLOC: |
| case GOMP_MAP_FROM: |
| case GOMP_MAP_FORCE_ALLOC: |
| case GOMP_MAP_FORCE_FROM: |
| case GOMP_MAP_ALWAYS_FROM: |
| return false; |
| default: |
| return true; |
| } |
| } |
| |
| /* Copy host memory to an offload device. In asynchronous mode (if AQ is |
| non-NULL), when the source data is stack or may otherwise be deallocated |
| before the asynchronous copy takes place, EPHEMERAL must be passed as |
| TRUE. */ |
| |
| attribute_hidden void |
| gomp_copy_host2dev (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, |
| void *d, const void *h, size_t sz, |
| bool ephemeral, struct gomp_coalesce_buf *cbuf) |
| { |
| if (__builtin_expect (aq != NULL, 0)) |
| { |
| /* See 'gomp_coalesce_buf_add'. */ |
| assert (!cbuf); |
| |
| void *h_buf = (void *) h; |
| if (ephemeral) |
| { |
| /* We're queueing up an asynchronous copy from data that may |
| disappear before the transfer takes place (i.e. because it is a |
| stack local in a function that is no longer executing). Make a |
| copy of the data into a temporary buffer in those cases. */ |
| h_buf = gomp_malloc (sz); |
| memcpy (h_buf, h, sz); |
| } |
| goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, |
| "dev", d, "host", h_buf, h, sz, aq); |
| if (ephemeral) |
| /* Free temporary buffer once the transfer has completed. */ |
| devicep->openacc.async.queue_callback_func (aq, free, h_buf); |
| |
| return; |
| } |
| |
| if (cbuf) |
| { |
| uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; |
| if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) |
| { |
| long first = 0; |
| long last = cbuf->chunk_cnt - 1; |
| while (first <= last) |
| { |
| long middle = (first + last) >> 1; |
| if (cbuf->chunks[middle].end <= doff) |
| first = middle + 1; |
| else if (cbuf->chunks[middle].start <= doff) |
| { |
| if (doff + sz > cbuf->chunks[middle].end) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("internal libgomp cbuf error"); |
| } |
| memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), |
| h, sz); |
| return; |
| } |
| else |
| last = middle - 1; |
| } |
| } |
| } |
| |
| gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); |
| } |
| |
| attribute_hidden void |
| gomp_copy_dev2host (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, |
| void *h, const void *d, size_t sz) |
| { |
| if (__builtin_expect (aq != NULL, 0)) |
| goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, |
| "host", h, "dev", d, NULL, sz, aq); |
| else |
| gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); |
| } |
| |
| static void |
| gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) |
| { |
| if (!devicep->free_func (devicep->target_id, devptr)) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("error in freeing device memory block at %p", devptr); |
| } |
| } |
| |
| /* Increment reference count of a splay_tree_key region K by 1. |
| If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only |
| increment the value if refcount is not yet contained in the set (used for |
| OpenMP 5.0, which specifies that a region's refcount is adjusted at most |
| once for each construct). */ |
| |
| static inline void |
| gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set) |
| { |
| if (k == NULL || k->refcount == REFCOUNT_INFINITY) |
| return; |
| |
| uintptr_t *refcount_ptr = &k->refcount; |
| |
| if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) |
| refcount_ptr = &k->structelem_refcount; |
| else if (REFCOUNT_STRUCTELEM_P (k->refcount)) |
| refcount_ptr = k->structelem_refcount_ptr; |
| |
| if (refcount_set) |
| { |
| if (htab_find (*refcount_set, refcount_ptr)) |
| return; |
| uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); |
| *slot = refcount_ptr; |
| } |
| |
| *refcount_ptr += 1; |
| return; |
| } |
| |
| /* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P |
| is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to |
| track already seen refcounts, and only adjust the value if refcount is not |
| yet contained in the set (like gomp_increment_refcount). |
| |
| Return out-values: set *DO_COPY to true if we set the refcount to zero, or |
| it is already zero and we know we decremented it earlier. This signals that |
| associated maps should be copied back to host. |
| |
| *DO_REMOVE is set to true when we this is the first handling of this refcount |
| and we are setting it to zero. This signals a removal of this key from the |
| splay-tree map. |
| |
| Copy and removal are separated due to cases like handling of structure |
| elements, e.g. each map of a structure element representing a possible copy |
| out of a structure field has to be handled individually, but we only signal |
| removal for one (the first encountered) sibing map. */ |
| |
| static inline void |
| gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p, |
| bool *do_copy, bool *do_remove) |
| { |
| if (k == NULL || k->refcount == REFCOUNT_INFINITY) |
| { |
| *do_copy = *do_remove = false; |
| return; |
| } |
| |
| uintptr_t *refcount_ptr = &k->refcount; |
| |
| if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) |
| refcount_ptr = &k->structelem_refcount; |
| else if (REFCOUNT_STRUCTELEM_P (k->refcount)) |
| refcount_ptr = k->structelem_refcount_ptr; |
| |
| bool new_encountered_refcount; |
| bool set_to_zero = false; |
| bool is_zero = false; |
| |
| uintptr_t orig_refcount = *refcount_ptr; |
| |
| if (refcount_set) |
| { |
| if (htab_find (*refcount_set, refcount_ptr)) |
| { |
| new_encountered_refcount = false; |
| goto end; |
| } |
| |
| uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); |
| *slot = refcount_ptr; |
| new_encountered_refcount = true; |
| } |
| else |
| /* If no refcount_set being used, assume all keys are being decremented |
| for the first time. */ |
| new_encountered_refcount = true; |
| |
| if (delete_p) |
| *refcount_ptr = 0; |
| else if (*refcount_ptr > 0) |
| *refcount_ptr -= 1; |
| |
| end: |
| if (*refcount_ptr == 0) |
| { |
| if (orig_refcount > 0) |
| set_to_zero = true; |
| |
| is_zero = true; |
| } |
| |
| *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero)); |
| *do_remove = (new_encountered_refcount && set_to_zero); |
| } |
| |
| /* Handle the case where gomp_map_lookup, splay_tree_lookup or |
| gomp_map_0len_lookup found oldn for newn. |
| Helper function of gomp_map_vars. */ |
| |
| static inline void |
| gomp_map_vars_existing (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, splay_tree_key oldn, |
| splay_tree_key newn, struct target_var_desc *tgt_var, |
| unsigned char kind, bool always_to_flag, bool implicit, |
| struct gomp_coalesce_buf *cbuf, |
| htab_t *refcount_set) |
| { |
| assert (kind != GOMP_MAP_ATTACH |
| || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); |
| |
| tgt_var->key = oldn; |
| tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); |
| tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); |
| tgt_var->is_attach = false; |
| tgt_var->offset = newn->host_start - oldn->host_start; |
| |
| /* For implicit maps, old contained in new is valid. */ |
| bool implicit_subset = (implicit |
| && newn->host_start <= oldn->host_start |
| && oldn->host_end <= newn->host_end); |
| if (implicit_subset) |
| tgt_var->length = oldn->host_end - oldn->host_start; |
| else |
| tgt_var->length = newn->host_end - newn->host_start; |
| |
| if ((kind & GOMP_MAP_FLAG_FORCE) |
| /* For implicit maps, old contained in new is valid. */ |
| || !(implicit_subset |
| /* Otherwise, new contained inside old is considered valid. */ |
| || (oldn->host_start <= newn->host_start |
| && newn->host_end <= oldn->host_end))) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("Trying to map into device [%p..%p) object when " |
| "[%p..%p) is already mapped", |
| (void *) newn->host_start, (void *) newn->host_end, |
| (void *) oldn->host_start, (void *) oldn->host_end); |
| } |
| |
| if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag) |
| { |
| /* Implicit + always should not happen. If this does occur, below |
| address/length adjustment is a TODO. */ |
| assert (!implicit_subset); |
| |
| if (oldn->aux && oldn->aux->attach_count) |
| { |
| /* We have to be careful not to overwrite still attached pointers |
| during the copyback to host. */ |
| uintptr_t addr = newn->host_start; |
| while (addr < newn->host_end) |
| { |
| size_t i = (addr - oldn->host_start) / sizeof (void *); |
| if (oldn->aux->attach_count[i] == 0) |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (oldn->tgt->tgt_start |
| + oldn->tgt_offset |
| + addr - oldn->host_start), |
| (void *) addr, |
| sizeof (void *), false, cbuf); |
| addr += sizeof (void *); |
| } |
| } |
| else |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (oldn->tgt->tgt_start + oldn->tgt_offset |
| + newn->host_start - oldn->host_start), |
| (void *) newn->host_start, |
| newn->host_end - newn->host_start, false, cbuf); |
| } |
| |
| gomp_increment_refcount (oldn, refcount_set); |
| } |
| |
| static int |
| get_kind (bool short_mapkind, void *kinds, int idx) |
| { |
| if (!short_mapkind) |
| return ((unsigned char *) kinds)[idx]; |
| |
| int val = ((unsigned short *) kinds)[idx]; |
| if (GOMP_MAP_IMPLICIT_P (val)) |
| val &= ~GOMP_MAP_IMPLICIT; |
| return val; |
| } |
| |
| |
| static bool |
| get_implicit (bool short_mapkind, void *kinds, int idx) |
| { |
| if (!short_mapkind) |
| return false; |
| |
| int val = ((unsigned short *) kinds)[idx]; |
| return GOMP_MAP_IMPLICIT_P (val); |
| } |
| |
| static void |
| gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, |
| uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias, |
| struct gomp_coalesce_buf *cbuf, |
| bool allow_zero_length_array_sections) |
| { |
| struct gomp_device_descr *devicep = tgt->device_descr; |
| struct splay_tree_s *mem_map = &devicep->mem_map; |
| struct splay_tree_key_s cur_node; |
| |
| cur_node.host_start = host_ptr; |
| if (cur_node.host_start == (uintptr_t) NULL) |
| { |
| cur_node.tgt_offset = (uintptr_t) NULL; |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start + target_offset), |
| (void *) &cur_node.tgt_offset, sizeof (void *), |
| true, cbuf); |
| return; |
| } |
| /* Add bias to the pointer value. */ |
| cur_node.host_start += bias; |
| cur_node.host_end = cur_node.host_start; |
| splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); |
| if (n == NULL) |
| { |
| if (allow_zero_length_array_sections) |
| cur_node.tgt_offset = 0; |
| else |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("Pointer target of array section wasn't mapped"); |
| } |
| } |
| else |
| { |
| cur_node.host_start -= n->host_start; |
| cur_node.tgt_offset |
| = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; |
| /* At this point tgt_offset is target address of the |
| array section. Now subtract bias to get what we want |
| to initialize the pointer with. */ |
| cur_node.tgt_offset -= bias; |
| } |
| gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), |
| (void *) &cur_node.tgt_offset, sizeof (void *), |
| true, cbuf); |
| } |
| |
| static void |
| gomp_map_fields_existing (struct target_mem_desc *tgt, |
| struct goacc_asyncqueue *aq, splay_tree_key n, |
| size_t first, size_t i, void **hostaddrs, |
| size_t *sizes, void *kinds, |
| struct gomp_coalesce_buf *cbuf, htab_t *refcount_set) |
| { |
| struct gomp_device_descr *devicep = tgt->device_descr; |
| struct splay_tree_s *mem_map = &devicep->mem_map; |
| struct splay_tree_key_s cur_node; |
| int kind; |
| bool implicit; |
| const bool short_mapkind = true; |
| const int typemask = short_mapkind ? 0xff : 0x7; |
| |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = cur_node.host_start + sizes[i]; |
| splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); |
| kind = get_kind (short_mapkind, kinds, i); |
| implicit = get_implicit (short_mapkind, kinds, i); |
| if (n2 |
| && n2->tgt == n->tgt |
| && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) |
| { |
| gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
| kind & typemask, false, implicit, cbuf, |
| refcount_set); |
| return; |
| } |
| if (sizes[i] == 0) |
| { |
| if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) |
| { |
| cur_node.host_start--; |
| n2 = splay_tree_lookup (mem_map, &cur_node); |
| cur_node.host_start++; |
| if (n2 |
| && n2->tgt == n->tgt |
| && n2->host_start - n->host_start |
| == n2->tgt_offset - n->tgt_offset) |
| { |
| gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
| kind & typemask, false, implicit, cbuf, |
| refcount_set); |
| return; |
| } |
| } |
| cur_node.host_end++; |
| n2 = splay_tree_lookup (mem_map, &cur_node); |
| cur_node.host_end--; |
| if (n2 |
| && n2->tgt == n->tgt |
| && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) |
| { |
| gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], |
| kind & typemask, false, implicit, cbuf, |
| refcount_set); |
| return; |
| } |
| } |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("Trying to map into device [%p..%p) structure element when " |
| "other mapped elements from the same structure weren't mapped " |
| "together with it", (void *) cur_node.host_start, |
| (void *) cur_node.host_end); |
| } |
| |
| attribute_hidden void |
| gomp_attach_pointer (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, splay_tree mem_map, |
| splay_tree_key n, uintptr_t attach_to, size_t bias, |
| struct gomp_coalesce_buf *cbufp, |
| bool allow_zero_length_array_sections) |
| { |
| struct splay_tree_key_s s; |
| size_t size, idx; |
| |
| if (n == NULL) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("enclosing struct not mapped for attach"); |
| } |
| |
| size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); |
| /* We might have a pointer in a packed struct: however we cannot have more |
| than one such pointer in each pointer-sized portion of the struct, so |
| this is safe. */ |
| idx = (attach_to - n->host_start) / sizeof (void *); |
| |
| if (!n->aux) |
| n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); |
| |
| if (!n->aux->attach_count) |
| n->aux->attach_count |
| = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); |
| |
| if (n->aux->attach_count[idx] < UINTPTR_MAX) |
| n->aux->attach_count[idx]++; |
| else |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("attach count overflow"); |
| } |
| |
| if (n->aux->attach_count[idx] == 1) |
| { |
| uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to |
| - n->host_start; |
| uintptr_t target = (uintptr_t) *(void **) attach_to; |
| splay_tree_key tn; |
| uintptr_t data; |
| |
| if ((void *) target == NULL) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("attempt to attach null pointer"); |
| } |
| |
| s.host_start = target + bias; |
| s.host_end = s.host_start + 1; |
| tn = splay_tree_lookup (mem_map, &s); |
| |
| if (!tn) |
| { |
| if (allow_zero_length_array_sections) |
| /* When allowing attachment to zero-length array sections, we |
| allow attaching to NULL pointers when the target region is not |
| mapped. */ |
| data = 0; |
| else |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("pointer target not mapped for attach"); |
| } |
| } |
| else |
| data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; |
| |
| gomp_debug (1, |
| "%s: attaching host %p, target %p (struct base %p) to %p\n", |
| __FUNCTION__, (void *) attach_to, (void *) devptr, |
| (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); |
| |
| gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, |
| sizeof (void *), true, cbufp); |
| } |
| else |
| gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, |
| (void *) attach_to, (int) n->aux->attach_count[idx]); |
| } |
| |
| attribute_hidden void |
| gomp_detach_pointer (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, splay_tree_key n, |
| uintptr_t detach_from, bool finalize, |
| struct gomp_coalesce_buf *cbufp) |
| { |
| size_t idx; |
| |
| if (n == NULL) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("enclosing struct not mapped for detach"); |
| } |
| |
| idx = (detach_from - n->host_start) / sizeof (void *); |
| |
| if (!n->aux || !n->aux->attach_count) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("no attachment counters for struct"); |
| } |
| |
| if (finalize) |
| n->aux->attach_count[idx] = 1; |
| |
| if (n->aux->attach_count[idx] == 0) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("attach count underflow"); |
| } |
| else |
| n->aux->attach_count[idx]--; |
| |
| if (n->aux->attach_count[idx] == 0) |
| { |
| uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from |
| - n->host_start; |
| uintptr_t target = (uintptr_t) *(void **) detach_from; |
| |
| gomp_debug (1, |
| "%s: detaching host %p, target %p (struct base %p) to %p\n", |
| __FUNCTION__, (void *) detach_from, (void *) devptr, |
| (void *) (n->tgt->tgt_start + n->tgt_offset), |
| (void *) target); |
| |
| gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, |
| sizeof (void *), true, cbufp); |
| } |
| else |
| gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, |
| (void *) detach_from, (int) n->aux->attach_count[idx]); |
| } |
| |
| attribute_hidden uintptr_t |
| gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) |
| { |
| if (tgt->list[i].key != NULL) |
| return tgt->list[i].key->tgt->tgt_start |
| + tgt->list[i].key->tgt_offset |
| + tgt->list[i].offset; |
| |
| switch (tgt->list[i].offset) |
| { |
| case OFFSET_INLINED: |
| return (uintptr_t) hostaddrs[i]; |
| |
| case OFFSET_POINTER: |
| return 0; |
| |
| case OFFSET_STRUCT: |
| return tgt->list[i + 1].key->tgt->tgt_start |
| + tgt->list[i + 1].key->tgt_offset |
| + tgt->list[i + 1].offset |
| + (uintptr_t) hostaddrs[i] |
| - (uintptr_t) hostaddrs[i + 1]; |
| |
| default: |
| return tgt->tgt_start + tgt->list[i].offset; |
| } |
| } |
| |
| static inline __attribute__((always_inline)) struct target_mem_desc * |
| gomp_map_vars_internal (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, size_t mapnum, |
| void **hostaddrs, void **devaddrs, size_t *sizes, |
| void *kinds, bool short_mapkind, |
| htab_t *refcount_set, |
| enum gomp_map_vars_kind pragma_kind) |
| { |
| size_t i, tgt_align, tgt_size, not_found_cnt = 0; |
| bool has_firstprivate = false; |
| bool has_always_ptrset = false; |
| bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0; |
| const int rshift = short_mapkind ? 8 : 3; |
| const int typemask = short_mapkind ? 0xff : 0x7; |
| struct splay_tree_s *mem_map = &devicep->mem_map; |
| struct splay_tree_key_s cur_node; |
| struct target_mem_desc *tgt |
| = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); |
| tgt->list_count = mapnum; |
| tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1; |
| tgt->device_descr = devicep; |
| tgt->prev = NULL; |
| struct gomp_coalesce_buf cbuf, *cbufp = NULL; |
| |
| if (mapnum == 0) |
| { |
| tgt->tgt_start = 0; |
| tgt->tgt_end = 0; |
| return tgt; |
| } |
| |
| tgt_align = sizeof (void *); |
| tgt_size = 0; |
| cbuf.chunks = NULL; |
| cbuf.chunk_cnt = -1; |
| cbuf.use_cnt = 0; |
| cbuf.buf = NULL; |
| if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) |
| { |
| size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); |
| cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); |
| cbuf.chunk_cnt = 0; |
| } |
| if (pragma_kind == GOMP_MAP_VARS_TARGET) |
| { |
| size_t align = 4 * sizeof (void *); |
| tgt_align = align; |
| tgt_size = mapnum * sizeof (void *); |
| cbuf.chunk_cnt = 1; |
| cbuf.use_cnt = 1 + (mapnum > 1); |
| cbuf.chunks[0].start = 0; |
| cbuf.chunks[0].end = tgt_size; |
| } |
| |
| gomp_mutex_lock (&devicep->lock); |
| if (devicep->state == GOMP_DEVICE_FINALIZED) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| free (tgt); |
| return NULL; |
| } |
| |
| for (i = 0; i < mapnum; i++) |
| { |
| int kind = get_kind (short_mapkind, kinds, i); |
| bool implicit = get_implicit (short_mapkind, kinds, i); |
| if (hostaddrs[i] == NULL |
| || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) |
| { |
| tgt->list[i].key = NULL; |
| tgt->list[i].offset = OFFSET_INLINED; |
| continue; |
| } |
| else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR |
| || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) |
| { |
| tgt->list[i].key = NULL; |
| if (!not_found_cnt) |
| { |
| /* In OpenMP < 5.0 and OpenACC the mapping has to be done |
| on a separate construct prior to using use_device_{addr,ptr}. |
| In OpenMP 5.0, map directives need to be ordered by the |
| middle-end before the use_device_* clauses. If |
| !not_found_cnt, all mappings requested (if any) are already |
| mapped, so use_device_{addr,ptr} can be resolved right away. |
| Otherwise, if not_found_cnt, gomp_map_lookup might fail |
| now but would succeed after performing the mappings in the |
| following loop. We can't defer this always to the second |
| loop, because it is not even invoked when !not_found_cnt |
| after the first loop. */ |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = cur_node.host_start; |
| splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); |
| if (n != NULL) |
| { |
| cur_node.host_start -= n->host_start; |
| hostaddrs[i] |
| = (void *) (n->tgt->tgt_start + n->tgt_offset |
| + cur_node.host_start); |
| } |
| else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("use_device_ptr pointer wasn't mapped"); |
| } |
| else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) |
| /* If not present, continue using the host address. */ |
| ; |
| else |
| __builtin_unreachable (); |
| tgt->list[i].offset = OFFSET_INLINED; |
| } |
| else |
| tgt->list[i].offset = 0; |
| continue; |
| } |
| else if ((kind & typemask) == GOMP_MAP_STRUCT) |
| { |
| size_t first = i + 1; |
| size_t last = i + sizes[i]; |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = (uintptr_t) hostaddrs[last] |
| + sizes[last]; |
| tgt->list[i].key = NULL; |
| tgt->list[i].offset = OFFSET_STRUCT; |
| splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); |
| if (n == NULL) |
| { |
| size_t align = (size_t) 1 << (kind >> rshift); |
| if (tgt_align < align) |
| tgt_align = align; |
| tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| tgt_size += cur_node.host_end - cur_node.host_start; |
| not_found_cnt += last - i; |
| for (i = first; i <= last; i++) |
| { |
| tgt->list[i].key = NULL; |
| if (!aq |
| && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) |
| & typemask)) |
| gomp_coalesce_buf_add (&cbuf, |
| tgt_size - cur_node.host_end |
| + (uintptr_t) hostaddrs[i], |
| sizes[i]); |
| } |
| i--; |
| continue; |
| } |
| for (i = first; i <= last; i++) |
| gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, |
| sizes, kinds, NULL, refcount_set); |
| i--; |
| continue; |
| } |
| else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) |
| { |
| tgt->list[i].key = NULL; |
| tgt->list[i].offset = OFFSET_POINTER; |
| has_firstprivate = true; |
| continue; |
| } |
| else if ((kind & typemask) == GOMP_MAP_ATTACH |
| || ((kind & typemask) |
| == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)) |
| { |
| tgt->list[i].key = NULL; |
| has_firstprivate = true; |
| continue; |
| } |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| if (!GOMP_MAP_POINTER_P (kind & typemask)) |
| cur_node.host_end = cur_node.host_start + sizes[i]; |
| else |
| cur_node.host_end = cur_node.host_start + sizeof (void *); |
| if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) |
| { |
| tgt->list[i].key = NULL; |
| |
| size_t align = (size_t) 1 << (kind >> rshift); |
| if (tgt_align < align) |
| tgt_align = align; |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| if (!aq) |
| gomp_coalesce_buf_add (&cbuf, tgt_size, |
| cur_node.host_end - cur_node.host_start); |
| tgt_size += cur_node.host_end - cur_node.host_start; |
| has_firstprivate = true; |
| continue; |
| } |
| splay_tree_key n; |
| if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) |
| { |
| n = gomp_map_0len_lookup (mem_map, &cur_node); |
| if (!n) |
| { |
| tgt->list[i].key = NULL; |
| tgt->list[i].offset = OFFSET_POINTER; |
| continue; |
| } |
| } |
| else |
| n = splay_tree_lookup (mem_map, &cur_node); |
| if (n && n->refcount != REFCOUNT_LINK) |
| { |
| int always_to_cnt = 0; |
| if ((kind & typemask) == GOMP_MAP_TO_PSET) |
| { |
| bool has_nullptr = false; |
| size_t j; |
| for (j = 0; j < n->tgt->list_count; j++) |
| if (n->tgt->list[j].key == n) |
| { |
| has_nullptr = n->tgt->list[j].has_null_ptr_assoc; |
| break; |
| } |
| if (n->tgt->list_count == 0) |
| { |
| /* 'declare target'; assume has_nullptr; it could also be |
| statically assigned pointer, but that it should be to |
| the equivalent variable on the host. */ |
| assert (n->refcount == REFCOUNT_INFINITY); |
| has_nullptr = true; |
| } |
| else |
| assert (j < n->tgt->list_count); |
| /* Re-map the data if there is an 'always' modifier or if it a |
| null pointer was there and non a nonnull has been found; that |
| permits transparent re-mapping for Fortran array descriptors |
| which were previously mapped unallocated. */ |
| for (j = i + 1; j < mapnum; j++) |
| { |
| int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; |
| if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) |
| && (!has_nullptr |
| || !GOMP_MAP_POINTER_P (ptr_kind) |
| || *(void **) hostaddrs[j] == NULL)) |
| break; |
| else if ((uintptr_t) hostaddrs[j] < cur_node.host_start |
| || ((uintptr_t) hostaddrs[j] + sizeof (void *) |
| > cur_node.host_end)) |
| break; |
| else |
| { |
| has_always_ptrset = true; |
| ++always_to_cnt; |
| } |
| } |
| } |
| gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], |
| kind & typemask, always_to_cnt > 0, implicit, |
| NULL, refcount_set); |
| i += always_to_cnt; |
| } |
| else |
| { |
| tgt->list[i].key = NULL; |
| |
| if ((kind & typemask) == GOMP_MAP_IF_PRESENT) |
| { |
| /* Not present, hence, skip entry - including its MAP_POINTER, |
| when existing. */ |
| tgt->list[i].offset = OFFSET_POINTER; |
| if (i + 1 < mapnum |
| && ((typemask & get_kind (short_mapkind, kinds, i + 1)) |
| == GOMP_MAP_POINTER)) |
| { |
| ++i; |
| tgt->list[i].key = NULL; |
| tgt->list[i].offset = 0; |
| } |
| continue; |
| } |
| size_t align = (size_t) 1 << (kind >> rshift); |
| not_found_cnt++; |
| if (tgt_align < align) |
| tgt_align = align; |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| if (!aq |
| && gomp_to_device_kind_p (kind & typemask)) |
| gomp_coalesce_buf_add (&cbuf, tgt_size, |
| cur_node.host_end - cur_node.host_start); |
| tgt_size += cur_node.host_end - cur_node.host_start; |
| if ((kind & typemask) == GOMP_MAP_TO_PSET) |
| { |
| size_t j; |
| int kind; |
| for (j = i + 1; j < mapnum; j++) |
| if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind, |
| kinds, j)) & typemask)) |
| && !GOMP_MAP_ALWAYS_POINTER_P (kind)) |
| break; |
| else if ((uintptr_t) hostaddrs[j] < cur_node.host_start |
| || ((uintptr_t) hostaddrs[j] + sizeof (void *) |
| > cur_node.host_end)) |
| break; |
| else |
| { |
| tgt->list[j].key = NULL; |
| i++; |
| } |
| } |
| } |
| } |
| |
| if (devaddrs) |
| { |
| if (mapnum != 1) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("unexpected aggregation"); |
| } |
| tgt->to_free = devaddrs[0]; |
| tgt->tgt_start = (uintptr_t) tgt->to_free; |
| tgt->tgt_end = tgt->tgt_start + sizes[0]; |
| } |
| else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) |
| { |
| /* Allocate tgt_align aligned tgt_size block of memory. */ |
| /* FIXME: Perhaps change interface to allocate properly aligned |
| memory. */ |
| tgt->to_free = devicep->alloc_func (devicep->target_id, |
| tgt_size + tgt_align - 1); |
| if (!tgt->to_free) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("device memory allocation fail"); |
| } |
| |
| tgt->tgt_start = (uintptr_t) tgt->to_free; |
| tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); |
| tgt->tgt_end = tgt->tgt_start + tgt_size; |
| |
| if (cbuf.use_cnt == 1) |
| cbuf.chunk_cnt--; |
| if (cbuf.chunk_cnt > 0) |
| { |
| cbuf.buf |
| = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); |
| if (cbuf.buf) |
| { |
| cbuf.tgt = tgt; |
| cbufp = &cbuf; |
| } |
| } |
| } |
| else |
| { |
| tgt->to_free = NULL; |
| tgt->tgt_start = 0; |
| tgt->tgt_end = 0; |
| } |
| |
| tgt_size = 0; |
| if (pragma_kind == GOMP_MAP_VARS_TARGET) |
| tgt_size = mapnum * sizeof (void *); |
| |
| tgt->array = NULL; |
| if (not_found_cnt || has_firstprivate || has_always_ptrset) |
| { |
| if (not_found_cnt) |
| tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); |
| splay_tree_node array = tgt->array; |
| size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; |
| uintptr_t field_tgt_base = 0; |
| splay_tree_key field_tgt_structelem_first = NULL; |
| |
| for (i = 0; i < mapnum; i++) |
| if (has_always_ptrset |
| && tgt->list[i].key |
| && (get_kind (short_mapkind, kinds, i) & typemask) |
| == GOMP_MAP_TO_PSET) |
| { |
| splay_tree_key k = tgt->list[i].key; |
| bool has_nullptr = false; |
| size_t j; |
| for (j = 0; j < k->tgt->list_count; j++) |
| if (k->tgt->list[j].key == k) |
| { |
| has_nullptr = k->tgt->list[j].has_null_ptr_assoc; |
| break; |
| } |
| if (k->tgt->list_count == 0) |
| has_nullptr = true; |
| else |
| assert (j < k->tgt->list_count); |
| |
| tgt->list[i].has_null_ptr_assoc = false; |
| for (j = i + 1; j < mapnum; j++) |
| { |
| int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; |
| if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) |
| && (!has_nullptr |
| || !GOMP_MAP_POINTER_P (ptr_kind) |
| || *(void **) hostaddrs[j] == NULL)) |
| break; |
| else if ((uintptr_t) hostaddrs[j] < k->host_start |
| || ((uintptr_t) hostaddrs[j] + sizeof (void *) |
| > k->host_end)) |
| break; |
| else |
| { |
| if (*(void **) hostaddrs[j] == NULL) |
| tgt->list[i].has_null_ptr_assoc = true; |
| tgt->list[j].key = k; |
| tgt->list[j].copy_from = false; |
| tgt->list[j].always_copy_from = false; |
| tgt->list[j].is_attach = false; |
| gomp_increment_refcount (k, refcount_set); |
| gomp_map_pointer (k->tgt, aq, |
| (uintptr_t) *(void **) hostaddrs[j], |
| k->tgt_offset + ((uintptr_t) hostaddrs[j] |
| - k->host_start), |
| sizes[j], cbufp, false); |
| } |
| } |
| i = j - 1; |
| } |
| else if (tgt->list[i].key == NULL) |
| { |
| int kind = get_kind (short_mapkind, kinds, i); |
| bool implicit = get_implicit (short_mapkind, kinds, i); |
| if (hostaddrs[i] == NULL) |
| continue; |
| switch (kind & typemask) |
| { |
| size_t align, len, first, last; |
| splay_tree_key n; |
| case GOMP_MAP_FIRSTPRIVATE: |
| align = (size_t) 1 << (kind >> rshift); |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| tgt->list[i].offset = tgt_size; |
| len = sizes[i]; |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start + tgt_size), |
| (void *) hostaddrs[i], len, false, cbufp); |
| /* Save device address in hostaddr to permit latter availablity |
| when doing a deep-firstprivate with pointer attach. */ |
| hostaddrs[i] = (void *) (tgt->tgt_start + tgt_size); |
| tgt_size += len; |
| |
| /* If followed by GOMP_MAP_ATTACH, pointer assign this |
| firstprivate to hostaddrs[i+1], which is assumed to contain a |
| device address. */ |
| if (i + 1 < mapnum |
| && (GOMP_MAP_ATTACH |
| == (typemask & get_kind (short_mapkind, kinds, i+1)))) |
| { |
| uintptr_t target = (uintptr_t) hostaddrs[i]; |
| void *devptr = *(void**) hostaddrs[i+1] + sizes[i+1]; |
| gomp_copy_host2dev (devicep, aq, devptr, &target, |
| sizeof (void *), false, cbufp); |
| ++i; |
| } |
| continue; |
| case GOMP_MAP_FIRSTPRIVATE_INT: |
| case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: |
| continue; |
| case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: |
| /* The OpenACC 'host_data' construct only allows 'use_device' |
| "mapping" clauses, so in the first loop, 'not_found_cnt' |
| must always have been zero, so all OpenACC 'use_device' |
| clauses have already been handled. (We can only easily test |
| 'use_device' with 'if_present' clause here.) */ |
| assert (tgt->list[i].offset == OFFSET_INLINED); |
| /* Nevertheless, FALLTHRU to the normal handling, to keep the |
| code conceptually simple, similar to the first loop. */ |
| case GOMP_MAP_USE_DEVICE_PTR: |
| if (tgt->list[i].offset == 0) |
| { |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = cur_node.host_start; |
| n = gomp_map_lookup (mem_map, &cur_node); |
| if (n != NULL) |
| { |
| cur_node.host_start -= n->host_start; |
| hostaddrs[i] |
| = (void *) (n->tgt->tgt_start + n->tgt_offset |
| + cur_node.host_start); |
| } |
| else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("use_device_ptr pointer wasn't mapped"); |
| } |
| else if ((kind & typemask) |
| == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) |
| /* If not present, continue using the host address. */ |
| ; |
| else |
| __builtin_unreachable (); |
| tgt->list[i].offset = OFFSET_INLINED; |
| } |
| continue; |
| case GOMP_MAP_STRUCT: |
| first = i + 1; |
| last = i + sizes[i]; |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = (uintptr_t) hostaddrs[last] |
| + sizes[last]; |
| if (tgt->list[first].key != NULL) |
| continue; |
| n = splay_tree_lookup (mem_map, &cur_node); |
| if (n == NULL) |
| { |
| size_t align = (size_t) 1 << (kind >> rshift); |
| tgt_size -= (uintptr_t) hostaddrs[first] |
| - (uintptr_t) hostaddrs[i]; |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| tgt_size += (uintptr_t) hostaddrs[first] |
| - (uintptr_t) hostaddrs[i]; |
| field_tgt_base = (uintptr_t) hostaddrs[first]; |
| field_tgt_offset = tgt_size; |
| field_tgt_clear = last; |
| field_tgt_structelem_first = NULL; |
| tgt_size += cur_node.host_end |
| - (uintptr_t) hostaddrs[first]; |
| continue; |
| } |
| for (i = first; i <= last; i++) |
| gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, |
| sizes, kinds, cbufp, refcount_set); |
| i--; |
| continue; |
| case GOMP_MAP_ALWAYS_POINTER: |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = cur_node.host_start + sizeof (void *); |
| n = splay_tree_lookup (mem_map, &cur_node); |
| if (n == NULL |
| || n->host_start > cur_node.host_start |
| || n->host_end < cur_node.host_end) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("always pointer not mapped"); |
| } |
| if ((get_kind (short_mapkind, kinds, i - 1) & typemask) |
| != GOMP_MAP_ALWAYS_POINTER) |
| cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); |
| if (cur_node.tgt_offset) |
| cur_node.tgt_offset -= sizes[i]; |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (n->tgt->tgt_start |
| + n->tgt_offset |
| + cur_node.host_start |
| - n->host_start), |
| (void *) &cur_node.tgt_offset, |
| sizeof (void *), true, cbufp); |
| cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset |
| + cur_node.host_start - n->host_start; |
| continue; |
| case GOMP_MAP_IF_PRESENT: |
| /* Not present - otherwise handled above. Skip over its |
| MAP_POINTER as well. */ |
| if (i + 1 < mapnum |
| && ((typemask & get_kind (short_mapkind, kinds, i + 1)) |
| == GOMP_MAP_POINTER)) |
| ++i; |
| continue; |
| case GOMP_MAP_ATTACH: |
| case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: |
| { |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = cur_node.host_start + sizeof (void *); |
| splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); |
| if (n != NULL) |
| { |
| tgt->list[i].key = n; |
| tgt->list[i].offset = cur_node.host_start - n->host_start; |
| tgt->list[i].length = n->host_end - n->host_start; |
| tgt->list[i].copy_from = false; |
| tgt->list[i].always_copy_from = false; |
| tgt->list[i].is_attach = true; |
| /* OpenACC 'attach'/'detach' doesn't affect |
| structured/dynamic reference counts ('n->refcount', |
| 'n->dynamic_refcount'). */ |
| |
| bool zlas |
| = ((kind & typemask) |
| == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); |
| gomp_attach_pointer (devicep, aq, mem_map, n, |
| (uintptr_t) hostaddrs[i], sizes[i], |
| cbufp, zlas); |
| } |
| else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("outer struct not mapped for attach"); |
| } |
| continue; |
| } |
| default: |
| break; |
| } |
| splay_tree_key k = &array->key; |
| k->host_start = (uintptr_t) hostaddrs[i]; |
| if (!GOMP_MAP_POINTER_P (kind & typemask)) |
| k->host_end = k->host_start + sizes[i]; |
| else |
| k->host_end = k->host_start + sizeof (void *); |
| splay_tree_key n = splay_tree_lookup (mem_map, k); |
| if (n && n->refcount != REFCOUNT_LINK) |
| gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], |
| kind & typemask, false, implicit, cbufp, |
| refcount_set); |
| else |
| { |
| k->aux = NULL; |
| if (n && n->refcount == REFCOUNT_LINK) |
| { |
| /* Replace target address of the pointer with target address |
| of mapped object in the splay tree. */ |
| splay_tree_remove (mem_map, n); |
| k->aux |
| = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); |
| k->aux->link_key = n; |
| } |
| size_t align = (size_t) 1 << (kind >> rshift); |
| tgt->list[i].key = k; |
| k->tgt = tgt; |
| k->refcount = 0; |
| k->dynamic_refcount = 0; |
| if (field_tgt_clear != FIELD_TGT_EMPTY) |
| { |
| k->tgt_offset = k->host_start - field_tgt_base |
| + field_tgt_offset; |
| if (openmp_p) |
| { |
| k->refcount = REFCOUNT_STRUCTELEM; |
| if (field_tgt_structelem_first == NULL) |
| { |
| /* Set to first structure element of sequence. */ |
| k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST; |
| field_tgt_structelem_first = k; |
| } |
| else |
| /* Point to refcount of leading element, but do not |
| increment again. */ |
| k->structelem_refcount_ptr |
| = &field_tgt_structelem_first->structelem_refcount; |
| |
| if (i == field_tgt_clear) |
| { |
| k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; |
| field_tgt_structelem_first = NULL; |
| } |
| } |
| if (i == field_tgt_clear) |
| field_tgt_clear = FIELD_TGT_EMPTY; |
| } |
| else |
| { |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| k->tgt_offset = tgt_size; |
| tgt_size += k->host_end - k->host_start; |
| } |
| /* First increment, from 0 to 1. gomp_increment_refcount |
| encapsulates the different increment cases, so use this |
| instead of directly setting 1 during initialization. */ |
| gomp_increment_refcount (k, refcount_set); |
| |
| tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); |
| tgt->list[i].always_copy_from |
| = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); |
| tgt->list[i].is_attach = false; |
| tgt->list[i].offset = 0; |
| tgt->list[i].length = k->host_end - k->host_start; |
| tgt->refcount++; |
| array->left = NULL; |
| array->right = NULL; |
| splay_tree_insert (mem_map, array); |
| switch (kind & typemask) |
| { |
| case GOMP_MAP_ALLOC: |
| case GOMP_MAP_FROM: |
| case GOMP_MAP_FORCE_ALLOC: |
| case GOMP_MAP_FORCE_FROM: |
| case GOMP_MAP_ALWAYS_FROM: |
| break; |
| case GOMP_MAP_TO: |
| case GOMP_MAP_TOFROM: |
| case GOMP_MAP_FORCE_TO: |
| case GOMP_MAP_FORCE_TOFROM: |
| case GOMP_MAP_ALWAYS_TO: |
| case GOMP_MAP_ALWAYS_TOFROM: |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start |
| + k->tgt_offset), |
| (void *) k->host_start, |
| k->host_end - k->host_start, |
| false, cbufp); |
| break; |
| case GOMP_MAP_POINTER: |
| case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: |
| gomp_map_pointer |
| (tgt, aq, (uintptr_t) *(void **) k->host_start, |
| k->tgt_offset, sizes[i], cbufp, |
| ((kind & typemask) |
| == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION)); |
| break; |
| case GOMP_MAP_TO_PSET: |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start |
| + k->tgt_offset), |
| (void *) k->host_start, |
| k->host_end - k->host_start, |
| false, cbufp); |
| tgt->list[i].has_null_ptr_assoc = false; |
| |
| for (j = i + 1; j < mapnum; j++) |
| { |
| int ptr_kind = (get_kind (short_mapkind, kinds, j) |
| & typemask); |
| if (!GOMP_MAP_POINTER_P (ptr_kind) |
| && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)) |
| break; |
| else if ((uintptr_t) hostaddrs[j] < k->host_start |
| || ((uintptr_t) hostaddrs[j] + sizeof (void *) |
| > k->host_end)) |
| break; |
| else |
| { |
| tgt->list[j].key = k; |
| tgt->list[j].copy_from = false; |
| tgt->list[j].always_copy_from = false; |
| tgt->list[j].is_attach = false; |
| tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]); |
| /* For OpenMP, the use of refcount_sets causes |
| errors if we set k->refcount = 1 above but also |
| increment it again here, for decrementing will |
| not properly match, since we decrement only once |
| for each key's refcount. Therefore avoid this |
| increment for OpenMP constructs. */ |
| if (!openmp_p) |
| gomp_increment_refcount (k, refcount_set); |
| gomp_map_pointer (tgt, aq, |
| (uintptr_t) *(void **) hostaddrs[j], |
| k->tgt_offset |
| + ((uintptr_t) hostaddrs[j] |
| - k->host_start), |
| sizes[j], cbufp, false); |
| } |
| } |
| i = j - 1; |
| break; |
| case GOMP_MAP_FORCE_PRESENT: |
| { |
| /* We already looked up the memory region above and it |
| was missing. */ |
| size_t size = k->host_end - k->host_start; |
| gomp_mutex_unlock (&devicep->lock); |
| #ifdef HAVE_INTTYPES_H |
| gomp_fatal ("present clause: !acc_is_present (%p, " |
| "%"PRIu64" (0x%"PRIx64"))", |
| (void *) k->host_start, |
| (uint64_t) size, (uint64_t) size); |
| #else |
| gomp_fatal ("present clause: !acc_is_present (%p, " |
| "%lu (0x%lx))", (void *) k->host_start, |
| (unsigned long) size, (unsigned long) size); |
| #endif |
| } |
| break; |
| case GOMP_MAP_FORCE_DEVICEPTR: |
| assert (k->host_end - k->host_start == sizeof (void *)); |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start |
| + k->tgt_offset), |
| (void *) k->host_start, |
| sizeof (void *), false, cbufp); |
| break; |
| default: |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, |
| kind); |
| } |
| |
| if (k->aux && k->aux->link_key) |
| { |
| /* Set link pointer on target to the device address of the |
| mapped object. */ |
| void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); |
| /* We intentionally do not use coalescing here, as it's not |
| data allocated by the current call to this function. */ |
| gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, |
| &tgt_addr, sizeof (void *), true, NULL); |
| } |
| array++; |
| } |
| } |
| } |
| |
| if (pragma_kind == GOMP_MAP_VARS_TARGET) |
| { |
| for (i = 0; i < mapnum; i++) |
| { |
| cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start + i * sizeof (void *)), |
| (void *) &cur_node.tgt_offset, sizeof (void *), |
| true, cbufp); |
| } |
| } |
| |
| if (cbufp) |
| { |
| /* See 'gomp_coalesce_buf_add'. */ |
| assert (!aq); |
| |
| long c = 0; |
| for (c = 0; c < cbuf.chunk_cnt; ++c) |
| gomp_copy_host2dev (devicep, aq, |
| (void *) (tgt->tgt_start + cbuf.chunks[c].start), |
| (char *) cbuf.buf + (cbuf.chunks[c].start |
| - cbuf.chunks[0].start), |
| cbuf.chunks[c].end - cbuf.chunks[c].start, |
| true, NULL); |
| free (cbuf.buf); |
| cbuf.buf = NULL; |
| cbufp = NULL; |
| } |
| |
| /* If the variable from "omp target enter data" map-list was already mapped, |
| tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or |
| gomp_exit_data. */ |
| if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0) |
| { |
| free (tgt); |
| tgt = NULL; |
| } |
| |
| gomp_mutex_unlock (&devicep->lock); |
| return tgt; |
| } |
| |
| static struct target_mem_desc * |
| gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, |
| void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, |
| bool short_mapkind, htab_t *refcount_set, |
| enum gomp_map_vars_kind pragma_kind) |
| { |
| /* This management of a local refcount_set is for convenience of callers |
| who do not share a refcount_set over multiple map/unmap uses. */ |
| htab_t local_refcount_set = NULL; |
| if (refcount_set == NULL) |
| { |
| local_refcount_set = htab_create (mapnum); |
| refcount_set = &local_refcount_set; |
| } |
| |
| struct target_mem_desc *tgt; |
| tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, |
| sizes, kinds, short_mapkind, refcount_set, |
| pragma_kind); |
| if (local_refcount_set) |
| htab_free (local_refcount_set); |
| |
| return tgt; |
| } |
| |
| attribute_hidden struct target_mem_desc * |
| goacc_map_vars (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, size_t mapnum, |
| void **hostaddrs, void **devaddrs, size_t *sizes, |
| void *kinds, bool short_mapkind, |
| enum gomp_map_vars_kind pragma_kind) |
| { |
| return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, |
| sizes, kinds, short_mapkind, NULL, |
| GOMP_MAP_VARS_OPENACC | pragma_kind); |
| } |
| |
| static void |
| gomp_unmap_tgt (struct target_mem_desc *tgt) |
| { |
| /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ |
| if (tgt->tgt_end) |
| gomp_free_device_memory (tgt->device_descr, tgt->to_free); |
| |
| free (tgt->array); |
| free (tgt); |
| } |
| |
| static bool |
| gomp_unref_tgt (void *ptr) |
| { |
| bool is_tgt_unmapped = false; |
| |
| struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; |
| |
| if (tgt->refcount > 1) |
| tgt->refcount--; |
| else |
| { |
| gomp_unmap_tgt (tgt); |
| is_tgt_unmapped = true; |
| } |
| |
| return is_tgt_unmapped; |
| } |
| |
| static void |
| gomp_unref_tgt_void (void *ptr) |
| { |
| (void) gomp_unref_tgt (ptr); |
| } |
| |
| static void |
| gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k) |
| { |
| splay_tree_remove (sp, k); |
| if (k->aux) |
| { |
| if (k->aux->link_key) |
| splay_tree_insert (sp, (splay_tree_node) k->aux->link_key); |
| if (k->aux->attach_count) |
| free (k->aux->attach_count); |
| free (k->aux); |
| k->aux = NULL; |
| } |
| } |
| |
| static inline __attribute__((always_inline)) bool |
| gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, |
| struct goacc_asyncqueue *aq) |
| { |
| bool is_tgt_unmapped = false; |
| |
| if (REFCOUNT_STRUCTELEM_P (k->refcount)) |
| { |
| if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false) |
| /* Infer the splay_tree_key of the first structelem key using the |
| pointer to the first structleme_refcount. */ |
| k = (splay_tree_key) ((char *) k->structelem_refcount_ptr |
| - offsetof (struct splay_tree_key_s, |
| structelem_refcount)); |
| assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)); |
| |
| /* The array created by gomp_map_vars is an array of splay_tree_nodes, |
| with the splay_tree_keys embedded inside. */ |
| splay_tree_node node = |
| (splay_tree_node) ((char *) k |
| - offsetof (struct splay_tree_node_s, key)); |
| while (true) |
| { |
| /* Starting from the _FIRST key, and continue for all following |
| sibling keys. */ |
| gomp_remove_splay_tree_key (&devicep->mem_map, k); |
| if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount)) |
| break; |
| else |
| k = &(++node)->key; |
| } |
| } |
| else |
| gomp_remove_splay_tree_key (&devicep->mem_map, k); |
| |
| if (aq) |
| devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, |
| (void *) k->tgt); |
| else |
| is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); |
| return is_tgt_unmapped; |
| } |
| |
| attribute_hidden bool |
| gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) |
| { |
| return gomp_remove_var_internal (devicep, k, NULL); |
| } |
| |
| /* Remove a variable asynchronously. This actually removes the variable |
| mapping immediately, but retains the linked target_mem_desc until the |
| asynchronous operation has completed (as it may still refer to target |
| memory). The device lock must be held before entry, and remains locked on |
| exit. */ |
| |
| attribute_hidden void |
| gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, |
| struct goacc_asyncqueue *aq) |
| { |
| (void) gomp_remove_var_internal (devicep, k, aq); |
| } |
| |
| /* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant |
| variables back from device to host: if it is false, it is assumed that this |
| has been done already. */ |
| |
| static inline __attribute__((always_inline)) void |
| gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, |
| htab_t *refcount_set, struct goacc_asyncqueue *aq) |
| { |
| struct gomp_device_descr *devicep = tgt->device_descr; |
| |
| if (tgt->list_count == 0) |
| { |
| free (tgt); |
| return; |
| } |
| |
| gomp_mutex_lock (&devicep->lock); |
| if (devicep->state == GOMP_DEVICE_FINALIZED) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| free (tgt->array); |
| free (tgt); |
| return; |
| } |
| |
| size_t i; |
| |
| /* We must perform detachments before any copies back to the host. */ |
| for (i = 0; i < tgt->list_count; i++) |
| { |
| splay_tree_key k = tgt->list[i].key; |
| |
| if (k != NULL && tgt->list[i].is_attach) |
| gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start |
| + tgt->list[i].offset, |
| false, NULL); |
| } |
| |
| for (i = 0; i < tgt->list_count; i++) |
| { |
| splay_tree_key k = tgt->list[i].key; |
| if (k == NULL) |
| continue; |
| |
| /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference |
| counts ('n->refcount', 'n->dynamic_refcount'). */ |
| if (tgt->list[i].is_attach) |
| continue; |
| |
| bool do_copy, do_remove; |
| gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove); |
| |
| if ((do_copy && do_copyfrom && tgt->list[i].copy_from) |
| || tgt->list[i].always_copy_from) |
| gomp_copy_dev2host (devicep, aq, |
| (void *) (k->host_start + tgt->list[i].offset), |
| (void *) (k->tgt->tgt_start + k->tgt_offset |
| + tgt->list[i].offset), |
| tgt->list[i].length); |
| if (do_remove) |
| { |
| struct target_mem_desc *k_tgt = k->tgt; |
| bool is_tgt_unmapped = gomp_remove_var (devicep, k); |
| /* It would be bad if TGT got unmapped while we're still iterating |
| over its LIST_COUNT, and also expect to use it in the following |
| code. */ |
| assert (!is_tgt_unmapped |
| || k_tgt != tgt); |
| } |
| } |
| |
| if (aq) |
| devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, |
| (void *) tgt); |
| else |
| gomp_unref_tgt ((void *) tgt); |
| |
| gomp_mutex_unlock (&devicep->lock); |
| } |
| |
| static void |
| gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, |
| htab_t *refcount_set) |
| { |
| /* This management of a local refcount_set is for convenience of callers |
| who do not share a refcount_set over multiple map/unmap uses. */ |
| htab_t local_refcount_set = NULL; |
| if (refcount_set == NULL) |
| { |
| local_refcount_set = htab_create (tgt->list_count); |
| refcount_set = &local_refcount_set; |
| } |
| |
| gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL); |
| |
| if (local_refcount_set) |
| htab_free (local_refcount_set); |
| } |
| |
| attribute_hidden void |
| goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, |
| struct goacc_asyncqueue *aq) |
| { |
| gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq); |
| } |
| |
| static void |
| gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, |
| size_t *sizes, void *kinds, bool short_mapkind) |
| { |
| size_t i; |
| struct splay_tree_key_s cur_node; |
| const int typemask = short_mapkind ? 0xff : 0x7; |
| |
| if (!devicep) |
| return; |
| |
| if (mapnum == 0) |
| return; |
| |
| gomp_mutex_lock (&devicep->lock); |
| if (devicep->state == GOMP_DEVICE_FINALIZED) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| return; |
| } |
| |
| for (i = 0; i < mapnum; i++) |
| if (sizes[i]) |
| { |
| cur_node.host_start = (uintptr_t) hostaddrs[i]; |
| cur_node.host_end = cur_node.host_start + sizes[i]; |
| splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); |
| if (n) |
| { |
| int kind = get_kind (short_mapkind, kinds, i); |
| if (n->host_start > cur_node.host_start |
| || n->host_end < cur_node.host_end) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("Trying to update [%p..%p) object when " |
| "only [%p..%p) is mapped", |
| (void *) cur_node.host_start, |
| (void *) cur_node.host_end, |
| (void *) n->host_start, |
| (void *) n->host_end); |
| } |
| |
| if (n->aux && n->aux->attach_count) |
| { |
| uintptr_t addr = cur_node.host_start; |
| while (addr < cur_node.host_end) |
| { |
| /* We have to be careful not to overwrite still attached |
| pointers during host<->device updates. */ |
| size_t i = (addr - cur_node.host_start) / sizeof (void *); |
| if (n->aux->attach_count[i] == 0) |
| { |
| void *devaddr = (void *) (n->tgt->tgt_start |
| + n->tgt_offset |
| + addr - n->host_start); |
| if (GOMP_MAP_COPY_TO_P (kind & typemask)) |
| gomp_copy_host2dev (devicep, NULL, |
| devaddr, (void *) addr, |
| sizeof (void *), false, NULL); |
| if (GOMP_MAP_COPY_FROM_P (kind & typemask)) |
| gomp_copy_dev2host (devicep, NULL, |
| (void *) addr, devaddr, |
| sizeof (void *)); |
| } |
| addr += sizeof (void *); |
| } |
| } |
| else |
| { |
| void *hostaddr = (void *) cur_node.host_start; |
| void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset |
| + cur_node.host_start |
| - n->host_start); |
| size_t size = cur_node.host_end - cur_node.host_start; |
| |
| if (GOMP_MAP_COPY_TO_P (kind & typemask)) |
| gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, |
| false, NULL); |
| if (GOMP_MAP_COPY_FROM_P (kind & typemask)) |
| gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); |
| } |
| } |
| } |
| gomp_mutex_unlock (&devicep->lock); |
| } |
| |
| static struct gomp_offload_icv_list * |
| gomp_get_offload_icv_item (int dev_num) |
| { |
| struct gomp_offload_icv_list *l = gomp_offload_icv_list; |
| while (l != NULL && l->device_num != dev_num) |
| l = l->next; |
| |
| return l; |
| } |
| |
| /* Helper function for 'gomp_load_image_to_device'. Returns the ICV values |
| depending on the device num and the variable hierarchy |
| (_DEV_42, _DEV, _ALL). If no ICV was initially configured for the given |
| device and thus no item with that device number is contained in |
| gomp_offload_icv_list, then a new item is created and added to the list. */ |
| |
| static struct gomp_offload_icvs * |
| get_gomp_offload_icvs (int dev_num) |
| { |
| struct gomp_icv_list *dev |
| = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_DEV); |
| struct gomp_icv_list *all |
| = gomp_get_initial_icv_item (GOMP_DEVICE_NUM_FOR_ALL); |
| struct gomp_icv_list *dev_x = gomp_get_initial_icv_item (dev_num); |
| struct gomp_offload_icv_list *offload_icvs |
| = gomp_get_offload_icv_item (dev_num); |
| |
| if (offload_icvs != NULL) |
| return &offload_icvs->icvs; |
| |
| struct gomp_offload_icv_list *new |
| = (struct gomp_offload_icv_list *) gomp_malloc (sizeof (struct gomp_offload_icv_list)); |
| |
| new->device_num = dev_num; |
| new->icvs.device_num = dev_num; |
| new->next = gomp_offload_icv_list; |
| |
| if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_NTEAMS)) |
| new->icvs.nteams = dev_x->icvs.nteams_var; |
| else if (dev != NULL && gomp_get_icv_flag (dev->flags, GOMP_ICV_NTEAMS)) |
| new->icvs.nteams = dev->icvs.nteams_var; |
| else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_NTEAMS)) |
| new->icvs.nteams = all->icvs.nteams_var; |
| else |
| new->icvs.nteams = gomp_default_icv_values.nteams_var; |
| |
| if (dev_x != NULL |
| && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE)) |
| new->icvs.default_device = dev_x->icvs.default_device_var; |
| else if (dev != NULL |
| && gomp_get_icv_flag (dev->flags, GOMP_ICV_DEFAULT_DEVICE)) |
| new->icvs.default_device = dev->icvs.default_device_var; |
| else if (all != NULL |
| && gomp_get_icv_flag (all->flags, GOMP_ICV_DEFAULT_DEVICE)) |
| new->icvs.default_device = all->icvs.default_device_var; |
| else |
| new->icvs.default_device = gomp_default_icv_values.default_device_var; |
| |
| gomp_offload_icv_list = new; |
| return &new->icvs; |
| } |
| |
| /* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. |
| And insert to splay tree the mapping between addresses from HOST_TABLE and |
| from loaded target image. We rely in the host and device compiler |
| emitting variable and functions in the same order. */ |
| |
| static void |
| gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, |
| const void *host_table, const void *target_data, |
| bool is_register_lock) |
| { |
| void **host_func_table = ((void ***) host_table)[0]; |
| void **host_funcs_end = ((void ***) host_table)[1]; |
| void **host_var_table = ((void ***) host_table)[2]; |
| void **host_vars_end = ((void ***) host_table)[3]; |
| |
| /* The func table contains only addresses, the var table contains addresses |
| and corresponding sizes. */ |
| int num_funcs = host_funcs_end - host_func_table; |
| int num_vars = (host_vars_end - host_var_table) / 2; |
| |
| /* Load image to device and get target addresses for the image. */ |
| struct addr_pair *target_table = NULL; |
| int i, num_target_entries; |
| |
| num_target_entries |
| = devicep->load_image_func (devicep->target_id, version, |
| target_data, &target_table, NULL); |
| |
| if (num_target_entries != num_funcs + num_vars |
| /* "+1" due to the additional ICV struct. */ |
| && num_target_entries != num_funcs + num_vars + 1) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| if (is_register_lock) |
| gomp_mutex_unlock (®ister_lock); |
| gomp_fatal ("Cannot map target functions or variables" |
| " (expected %u, have %u)", num_funcs + num_vars, |
| num_target_entries); |
| } |
| |
| /* Insert host-target address mapping into splay tree. */ |
| struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); |
| /* "+1" due to the additional ICV struct. */ |
| tgt->array = gomp_malloc ((num_funcs + num_vars + 1) |
| * sizeof (*tgt->array)); |
| tgt->refcount = REFCOUNT_INFINITY; |
| tgt->tgt_start = 0; |
| tgt->tgt_end = 0; |
| tgt->to_free = NULL; |
| tgt->prev = NULL; |
| tgt->list_count = 0; |
| tgt->device_descr = devicep; |
| splay_tree_node array = tgt->array; |
| |
| for (i = 0; i < num_funcs; i++) |
| { |
| splay_tree_key k = &array->key; |
| k->host_start = (uintptr_t) host_func_table[i]; |
| k->host_end = k->host_start + 1; |
| k->tgt = tgt; |
| k->tgt_offset = target_table[i].start; |
| k->refcount = REFCOUNT_INFINITY; |
| k->dynamic_refcount = 0; |
| k->aux = NULL; |
| array->left = NULL; |
| array->right = NULL; |
| splay_tree_insert (&devicep->mem_map, array); |
| array++; |
| } |
| |
| /* Most significant bit of the size in host and target tables marks |
| "omp declare target link" variables. */ |
| const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); |
| const uintptr_t size_mask = ~link_bit; |
| |
| for (i = 0; i < num_vars; i++) |
| { |
| struct addr_pair *target_var = &target_table[num_funcs + i]; |
| uintptr_t target_size = target_var->end - target_var->start; |
| bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1]; |
| |
| if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| if (is_register_lock) |
| gomp_mutex_unlock (®ister_lock); |
| gomp_fatal ("Cannot map target variables (size mismatch)"); |
| } |
| |
| splay_tree_key k = &array->key; |
| k->host_start = (uintptr_t) host_var_table[i * 2]; |
| k->host_end |
| = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); |
| k->tgt = tgt; |
| k->tgt_offset = target_var->start; |
| k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; |
| k->dynamic_refcount = 0; |
| k->aux = NULL; |
| array->left = NULL; |
| array->right = NULL; |
| splay_tree_insert (&devicep->mem_map, array); |
| array++; |
| } |
| |
| /* Last entry is for a ICVs variable. |
| Tolerate case where plugin does not return those entries. */ |
| if (num_funcs + num_vars < num_target_entries) |
| { |
| struct addr_pair *var = &target_table[num_funcs + num_vars]; |
| |
| /* Start address will be non-zero for the ICVs variable if |
| the variable was found in this image. */ |
| if (var->start != 0) |
| { |
| /* The index of the devicep within devices[] is regarded as its |
| 'device number', which is different from the per-device type |
| devicep->target_id. */ |
| int dev_num = (int) (devicep - &devices[0]); |
| struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num); |
| size_t var_size = var->end - var->start; |
| |
| /* Copy the ICVs variable to place on device memory, hereby |
| actually designating its device number into effect. */ |
| gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs, |
| var_size, false, NULL); |
| splay_tree_key k = &array->key; |
| k->host_start = (uintptr_t) icvs; |
| k->host_end = |
| k->host_start + (size_mask & sizeof (struct gomp_offload_icvs)); |
| k->tgt = tgt; |
| k->tgt_offset = var->start; |
| k->refcount = REFCOUNT_INFINITY; |
| k->dynamic_refcount = 0; |
| k->aux = NULL; |
| array->left = NULL; |
| array->right = NULL; |
| splay_tree_insert (&devicep->mem_map, array); |
| array++; |
| } |
| } |
| |
| free (target_table); |
| } |
| |
| /* Unload the mappings described by target_data from device DEVICE_P. |
| The device must be locked. */ |
| |
| static void |
| gomp_unload_image_from_device (struct gomp_device_descr *devicep, |
| unsigned version, |
| const void *host_table, const void *target_data) |
| { |
| void **host_func_table = ((void ***) host_table)[0]; |
| void **host_funcs_end = ((void ***) host_table)[1]; |
| void **host_var_table = ((void ***) host_table)[2]; |
| void **host_vars_end = ((void ***) host_table)[3]; |
| |
| /* The func table contains only addresses, the var table contains addresses |
| and corresponding sizes. */ |
| int num_funcs = host_funcs_end - host_func_table; |
| int num_vars = (host_vars_end - host_var_table) / 2; |
| |
| struct splay_tree_key_s k; |
| splay_tree_key node = NULL; |
| |
| /* Find mapping at start of node array */ |
| if (num_funcs || num_vars) |
| { |
| k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] |
| : (uintptr_t) host_var_table[0]); |
| k.host_end = k.host_start + 1; |
| node = splay_tree_lookup (&devicep->mem_map, &k); |
| } |
| |
| if (!devicep->unload_image_func (devicep->target_id, version, target_data)) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("image unload fail"); |
| } |
| |
| /* Remove mappings from splay tree. */ |
| int i; |
| for (i = 0; i < num_funcs; i++) |
| { |
| k.host_start = (uintptr_t) host_func_table[i]; |
| k.host_end = k.host_start + 1; |
| splay_tree_remove (&devicep->mem_map, &k); |
| } |
| |
| /* Most significant bit of the size in host and target tables marks |
| "omp declare target link" variables. */ |
| const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); |
| const uintptr_t size_mask = ~link_bit; |
| bool is_tgt_unmapped = false; |
| |
| for (i = 0; i < num_vars; i++) |
| { |
| k.host_start = (uintptr_t) host_var_table[i * 2]; |
| k.host_end |
| = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); |
| |
| if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) |
| splay_tree_remove (&devicep->mem_map, &k); |
| else |
| { |
| splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); |
| is_tgt_unmapped = gomp_remove_var (devicep, n); |
| } |
| } |
| |
| if (node && !is_tgt_unmapped) |
| { |
| free (node->tgt); |
| free (node); |
| } |
| } |
| |
| static void |
| gomp_requires_to_name (char *buf, size_t size, int requires_mask) |
| { |
| char *end = buf + size, *p = buf; |
| if (requires_mask & GOMP_REQUIRES_UNIFIED_ADDRESS) |
| p += snprintf (p, end - p, "unified_address"); |
| if (requires_mask & GOMP_REQUIRES_UNIFIED_SHARED_MEMORY) |
| p += snprintf (p, end - p, "%sunified_shared_memory", |
| (p == buf ? "" : ", ")); |
| if (requires_mask & GOMP_REQUIRES_REVERSE_OFFLOAD) |
| p += snprintf (p, end - p, "%sreverse_offload", |
| (p == buf ? "" : ", ")); |
| } |
| |
| /* This function should be called from every offload image while loading. |
| It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
| the target, and DATA. */ |
| |
| void |
| GOMP_offload_register_ver (unsigned version, const void *host_table, |
| int target_type, const void *data) |
| { |
| int i; |
| |
| if (GOMP_VERSION_LIB (version) > GOMP_VERSION) |
| gomp_fatal ("Library too old for offload (version %u < %u)", |
| GOMP_VERSION, GOMP_VERSION_LIB (version)); |
| |
| int omp_req; |
| const void *target_data; |
| if (GOMP_VERSION_LIB (version) > 1) |
| { |
| omp_req = (int) (size_t) ((void **) data)[0]; |
| target_data = &((void **) data)[1]; |
| } |
| else |
| { |
| omp_req = 0; |
| target_data = data; |
| } |
| |
| gomp_mutex_lock (®ister_lock); |
| |
| if (omp_req && omp_requires_mask && omp_requires_mask != omp_req) |
| { |
| char buf1[sizeof ("unified_address, unified_shared_memory, " |
| "reverse_offload")]; |
| char buf2[sizeof ("unified_address, unified_shared_memory, " |
| "reverse_offload")]; |
| gomp_requires_to_name (buf2, sizeof (buf2), |
| omp_req != GOMP_REQUIRES_TARGET_USED |
| ? omp_req : omp_requires_mask); |
| if (omp_req != GOMP_REQUIRES_TARGET_USED |
| && omp_requires_mask != GOMP_REQUIRES_TARGET_USED) |
| { |
| gomp_requires_to_name (buf1, sizeof (buf1), omp_requires_mask); |
| gomp_fatal ("OpenMP 'requires' directive with non-identical clauses " |
| "in multiple compilation units: '%s' vs. '%s'", |
| buf1, buf2); |
| } |
| else |
| gomp_fatal ("OpenMP 'requires' directive with '%s' specified only in " |
| "some compilation units", buf2); |
| } |
| omp_requires_mask = omp_req; |
| |
| /* Load image to all initialized devices. */ |
| for (i = 0; i < num_devices; i++) |
| { |
| struct gomp_device_descr *devicep = &devices[i]; |
| gomp_mutex_lock (&devicep->lock); |
| if (devicep->type == target_type |
| && devicep->state == GOMP_DEVICE_INITIALIZED) |
| gomp_load_image_to_device (devicep, version, |
| host_table, target_data, true); |
| gomp_mutex_unlock (&devicep->lock); |
| } |
| |
| /* Insert image to array of pending images. */ |
| offload_images |
| = gomp_realloc_unlock (offload_images, |
| (num_offload_images + 1) |
| * sizeof (struct offload_image_descr)); |
| offload_images[num_offload_images].version = version; |
| offload_images[num_offload_images].type = target_type; |
| offload_images[num_offload_images].host_table = host_table; |
| offload_images[num_offload_images].target_data = target_data; |
| |
| num_offload_images++; |
| gomp_mutex_unlock (®ister_lock); |
| } |
| |
| /* Legacy entry point. */ |
| |
| void |
| GOMP_offload_register (const void *host_table, int target_type, |
| const void *target_data) |
| { |
| GOMP_offload_register_ver (0, host_table, target_type, target_data); |
| } |
| |
| /* This function should be called from every offload image while unloading. |
| It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of |
| the target, and DATA. */ |
| |
| void |
| GOMP_offload_unregister_ver (unsigned version, const void *host_table, |
| int target_type, const void *data) |
| { |
| int i; |
| |
| if (GOMP_VERSION_LIB (version) > GOMP_VERSION) |
| gomp_fatal ("Library too old for offload (version %u < %u)", |
| GOMP_VERSION, GOMP_VERSION_LIB (version)); |
| |
| const void *target_data; |
| if (GOMP_VERSION_LIB (version) > 1) |
| target_data = &((void **) data)[1]; |
| else |
| target_data = data; |
| |
| gomp_mutex_lock (®ister_lock); |
| |
| /* Unload image from all initialized devices. */ |
| for (i = 0; i < num_devices; i++) |
| { |
| struct gomp_device_descr *devicep = &devices[i]; |
| gomp_mutex_lock (&devicep->lock); |
| if (devicep->type == target_type |
| && devicep->state == GOMP_DEVICE_INITIALIZED) |
| gomp_unload_image_from_device (devicep, version, |
| host_table, target_data); |
| gomp_mutex_unlock (&devicep->lock); |
| } |
| |
| /* Remove image from array of pending images. */ |
| for (i = 0; i < num_offload_images; i++) |
| if (offload_images[i].target_data == target_data) |
| { |
| offload_images[i] = offload_images[--num_offload_images]; |
| break; |
| } |
| |
| gomp_mutex_unlock (®ister_lock); |
| } |
| |
| /* Legacy entry point. */ |
| |
| void |
| GOMP_offload_unregister (const void *host_table, int target_type, |
| const void *target_data) |
| { |
| GOMP_offload_unregister_ver (0, host_table, target_type, target_data); |
| } |
| |
| /* This function initializes the target device, specified by DEVICEP. DEVICEP |
| must be locked on entry, and remains locked on return. */ |
| |
| attribute_hidden void |
| gomp_init_device (struct gomp_device_descr *devicep) |
| { |
| int i; |
| if (!devicep->init_device_func (devicep->target_id)) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| gomp_fatal ("device initialization failed"); |
| } |
| |
| /* Load to device all images registered by the moment. */ |
| for (i = 0; i < num_offload_images; i++) |
| { |
| struct offload_image_descr *image = &offload_images[i]; |
| if (image->type == devicep->type) |
| gomp_load_image_to_device (devicep, image->version, |
| image->host_table, image->target_data, |
| false); |
| } |
| |
| /* Initialize OpenACC asynchronous queues. */ |
| goacc_init_asyncqueues (devicep); |
| |
| devicep->state = GOMP_DEVICE_INITIALIZED; |
| } |
| |
| /* This function finalizes the target device, specified by DEVICEP. DEVICEP |
| must be locked on entry, and remains locked on return. */ |
| |
| attribute_hidden bool |
| gomp_fini_device (struct gomp_device_descr *devicep) |
| { |
| bool ret = goacc_fini_asyncqueues (devicep); |
| ret &= devicep->fini_device_func (devicep->target_id); |
| devicep->state = GOMP_DEVICE_FINALIZED; |
| return ret; |
| } |
| |
| attribute_hidden void |
| gomp_unload_device (struct gomp_device_descr *devicep) |
| { |
| if (devicep->state == GOMP_DEVICE_INITIALIZED) |
| { |
| unsigned i; |
| |
| /* Unload from device all images registered at the moment. */ |
| for (i = 0; i < num_offload_images; i++) |
| { |
| struct offload_image_descr *image = &offload_images[i]; |
| if (image->type == devicep->type) |
| gomp_unload_image_from_device (devicep, image->version, |
| image->host_table, |
| image->target_data); |
| } |
| } |
| } |
| |
| /* Host fallback for GOMP_target{,_ext} routines. */ |
| |
| static void |
| gomp_target_fallback (void (*fn) (void *), void **hostaddrs, |
| struct gomp_device_descr *devicep, void **args) |
| { |
| struct gomp_thread old_thr, *thr = gomp_thread (); |
| |
| if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY |
| && devicep != NULL) |
| gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " |
| "be used for offloading"); |
| |
| old_thr = *thr; |
| memset (thr, '\0', sizeof (*thr)); |
| if (gomp_places_list) |
| { |
| thr->place = old_thr.place; |
| thr->ts.place_partition_len = gomp_places_list_len; |
| } |
| if (args) |
| while (*args) |
| { |
| intptr_t id = (intptr_t) *args++, val; |
| if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) |
| val = (intptr_t) *args++; |
| else |
| val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; |
| if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) |
| continue; |
| id &= GOMP_TARGET_ARG_ID_MASK; |
| if (id != GOMP_TARGET_ARG_THREAD_LIMIT) |
| continue; |
| val = val > INT_MAX ? INT_MAX : val; |
| if (val) |
| gomp_icv (true)->thread_limit_var = val; |
| break; |
| } |
| |
| fn (hostaddrs); |
| gomp_free_thread (thr); |
| *thr = old_thr; |
| } |
| |
| /* Calculate alignment and size requirements of a private copy of data shared |
| as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ |
| |
| static inline void |
| calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, |
| unsigned short *kinds, size_t *tgt_align, |
| size_t *tgt_size) |
| { |
| size_t i; |
| for (i = 0; i < mapnum; i++) |
| if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) |
| { |
| size_t align = (size_t) 1 << (kinds[i] >> 8); |
| if (*tgt_align < align) |
| *tgt_align = align; |
| *tgt_size = (*tgt_size + align - 1) & ~(align - 1); |
| *tgt_size += sizes[i]; |
| } |
| } |
| |
| /* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ |
| |
| static inline void |
| copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, |
| size_t *sizes, unsigned short *kinds, size_t tgt_align, |
| size_t tgt_size) |
| { |
| uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); |
| if (al) |
| tgt += tgt_align - al; |
| tgt_size = 0; |
| size_t i; |
| for (i = 0; i < mapnum; i++) |
| if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL) |
| { |
| size_t align = (size_t) 1 << (kinds[i] >> 8); |
| tgt_size = (tgt_size + align - 1) & ~(align - 1); |
| memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); |
| hostaddrs[i] = tgt + tgt_size; |
| tgt_size = tgt_size + sizes[i]; |
| if (i + 1 < mapnum && (kinds[i+1] & 0xff) == GOMP_MAP_ATTACH) |
| { |
| *(*(uintptr_t**) hostaddrs[i+1] + sizes[i+1]) = (uintptr_t) hostaddrs[i]; |
| ++i; |
| } |
| } |
| } |
| |
| /* Helper function of GOMP_target{,_ext} routines. */ |
| |
| static void * |
| gomp_get_target_fn_addr (struct gomp_device_descr *devicep, |
| void (*host_fn) (void *)) |
| { |
| if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) |
| return (void *) host_fn; |
| else |
| { |
| gomp_mutex_lock (&devicep->lock); |
| if (devicep->state == GOMP_DEVICE_FINALIZED) |
| { |
| gomp_mutex_unlock (&devicep->lock); |
| return NULL; |
| } |
| |
| struct splay_tree_key_s k; |
| k.host_start = (uintptr_t) host_fn; |
| k.host_end = k.host_start + 1; |
| splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); |
| gomp_mutex_unlock (&devicep->lock); |
| if (tgt_fn == NULL) |
| return NULL; |
| |
| return (void *) tgt_fn->tgt_offset; |
| } |
| } |
| |
| /* Called when encountering a target directive. If DEVICE |
| is GOMP_DEVICE_ICV, it means use device-var ICV. If it is |
| GOMP_DEVICE_HOST_FALLBACK (or any value |
| larger than last available hw device), use host fallback. |
| FN is address of host code, UNUSED is part of the current ABI, but |
| we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays |
| with MAPNUM entries, with addresses of the host objects, |
| sizes of the host objects (resp. for pointer kind pointer bias |
| and assumed sizeof (void *) size) and kinds. */ |
| |
| void |
| GOMP_target (int device, void (*fn) (void *), const void *unused, |
| size_t mapnum, void **hostaddrs, size_t *sizes, |
| unsigned char *kinds) |
| { |
| struct gomp_device_descr *devicep = resolve_device (device, true); |
| |
| void *fn_addr; |
| if (devicep == NULL |
| || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) |
| /* All shared memory devices should use the GOMP_target_ext function. */ |
| || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM |
| || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) |
| return gomp_target_fallback (fn, hostaddrs, devicep, NULL); |
| |
| htab_t refcount_set = htab_create (mapnum); |
| struct target_mem_desc *tgt_vars |
| = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, |
| &refcount_set, GOMP_MAP_VARS_TARGET); |
| devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, |
| NULL); |
| htab_clear (refcount_set); |
| gomp_unmap_vars (tgt_vars, true, &refcount_set); |
| htab_free (refcount_set); |
| } |
| |
| static inline unsigned int |
| clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) |
| { |
| /* If we cannot run asynchronously, simply ignore nowait. */ |
| if (devicep != NULL && devicep->async_run_func == NULL) |
| flags &= ~GOMP_TARGET_FLAG_NOWAIT; |
| |
| return flags; |
| } |
| |
| /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, |
| and several arguments have been added: |
| FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. |
| DEPEND is array of dependencies, see GOMP_task for details. |
| |
| ARGS is a pointer to an array consisting of a variable number of both |
| device-independent and device-specific arguments, which can take one two |
| elements where the first specifies for which device it is intended, the type |
| and optionally also the value. If the value is not present in the first |
| one, the whole second element the actual value. The last element of the |
| array is a single NULL. Among the device independent can be for example |
| NUM_TEAMS and THREAD_LIMIT. |
| |
| NUM_TEAMS is positive if GOMP_teams will be called in the body with |
| that value, or 1 if teams construct is not present, or 0, if |
| teams construct does not have num_teams clause and so the choice is |
| implementation defined, and -1 if it can't be determined on the host |
| what value will GOMP_teams have on the device. |
| THREAD_LIMIT similarly is positive if GOMP_teams will be called in the |
| body with that value, or 0, if teams construct does not have thread_limit |
| clause or the teams construct is not present, or -1 if it can't be |
| determined on the host what value will GOMP_teams have on the device. */ |
| |
| void |
| GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, |
| void **hostaddrs, size_t *sizes, unsigned short *kinds, |
| unsigned int flags, void **depend, void **args) |
| { |
| struct gomp_device_descr *devicep = resolve_device (device, true); |
| size_t tgt_align = 0, tgt_size = 0; |
| bool fpc_done = false; |
| |
| flags = clear_unsupported_flags (devicep, flags); |
| |
| if (flags & GOMP_TARGET_FLAG_NOWAIT) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| /* Create a team if we don't have any around, as nowait |
| target tasks make sense to run asynchronously even when |
| outside of any parallel. */ |
| if (__builtin_expect (thr->ts.team == NULL, 0)) |
| { |
| struct gomp_team *team = gomp_new_team (1); |
| struct gomp_task *task = thr->task; |
| struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; |
| team->prev_ts = thr->ts; |
| thr->ts.team = team; |
| thr->ts.team_id = 0; |
| thr->ts.work_share = &team->work_shares[0]; |
| thr->ts.last_work_share = NULL; |
| #ifdef HAVE_SYNC_BUILTINS |
| thr->ts.single_count = 0; |
| #endif |
| thr->ts.static_trip = 0; |
| thr->task = &team->implicit_task[0]; |
| gomp_init_task (thr->task, NULL, icv); |
| if (task) |
| { |
| thr->task = task; |
| gomp_end_task (); |
| free (task); |
| thr->task = &team->implicit_task[0]; |
| } |
| else |
| pthread_setspecific (gomp_thread_destructor, thr); |
| } |
| if (thr->ts.team |
| && !thr->task->final_task) |
| { |
| gomp_create_target_task (devicep, fn, mapnum, hostaddrs, |
| sizes, kinds, flags, depend, args, |
| GOMP_TARGET_TASK_BEFORE_MAP); |
| return; |
| } |
| } |
| |
| /* If there are depend clauses, but nowait is not present |
| (or we are in a final task), block the parent task until the |
| dependencies are resolved and then just continue with the rest |
| of the function as if it is a merged task. */ |
| if (depend != NULL) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| if (thr->task && thr->task->depend_hash) |
| { |
| /* If we might need to wait, copy firstprivate now. */ |
| calculate_firstprivate_requirements (mapnum, sizes, kinds, |
| &tgt_align, &tgt_size); |
| if (tgt_align) |
| { |
| char *tgt = gomp_alloca (tgt_size + tgt_align - 1); |
| copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, |
| tgt_align, tgt_size); |
| } |
| fpc_done = true; |
| gomp_task_maybe_wait_for_dependencies (depend); |
| } |
| } |
| |