blob: 9a5b259223ea517c143eb0d825d537ff8bb07b0d [file] [log] [blame]
/* Copyright (C) 2013-2019 Free Software Foundation, Inc.
Contributed by Jakub Jelinek <jakub@redhat.com>.
This file is part of the GNU Offloading and Multi Processing Library
(libgomp).
Libgomp is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file contains the support of offloading. */
#include "config.h"
#include "libgomp.h"
#include "oacc-plugin.h"
#include "oacc-int.h"
#include "gomp-constants.h"
#include <limits.h>
#include <stdbool.h>
#include <stdlib.h>
#ifdef HAVE_INTTYPES_H
# include <inttypes.h> /* For PRIu64. */
#endif
#include <string.h>
#include <assert.h>
#include <errno.h>
#if defined(RC_CHECKING)
#include <stdio.h>
#endif
#ifdef PLUGIN_SUPPORT
#include <dlfcn.h>
#include "plugin-suffix.h"
#endif
#define FIELD_TGT_EMPTY (~(size_t) 0)
static void gomp_target_init (void);
/* The whole initialization code for offloading plugins is only run one. */
static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
/* Mutex for offload image registration. */
static gomp_mutex_t register_lock;
/* This structure describes an offload image.
It contains type of the target device, pointer to host table descriptor, and
pointer to target data. */
struct offload_image_descr {
unsigned version;
enum offload_target_type type;
const void *host_table;
const void *target_data;
};
/* Array of descriptors of offload images. */
static struct offload_image_descr *offload_images;
/* Total number of offload images. */
static int num_offload_images;
/* Array of descriptors for all available devices. */
static struct gomp_device_descr *devices;
/* Total number of available devices. */
static int num_devices;
/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */
static int num_devices_openmp;
/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */
static void *
gomp_realloc_unlock (void *old, size_t size)
{
void *ret = realloc (old, size);
if (ret == NULL)
{
gomp_mutex_unlock (&register_lock);
gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
}
return ret;
}
attribute_hidden void
gomp_init_targets_once (void)
{
(void) pthread_once (&gomp_is_initialized, gomp_target_init);
}
attribute_hidden int
gomp_get_num_devices (void)
{
gomp_init_targets_once ();
return num_devices_openmp;
}
static struct gomp_device_descr *
resolve_device (int device)
{
gomp_debug (0, "%s (%d)\n", __FUNCTION__, device);
int device_id;
if (device == GOMP_DEVICE_ICV)
{
struct gomp_task_icv *icv = gomp_icv (false);
device_id = icv->default_device_var;
}
else
device_id = device;
if (device_id < 0 || device_id >= gomp_get_num_devices ())
return NULL;
gomp_mutex_lock (&devices[device_id].lock);
if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
gomp_init_device (&devices[device_id]);
else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
{
gomp_mutex_unlock (&devices[device_id].lock);
return NULL;
}
gomp_mutex_unlock (&devices[device_id].lock);
/* If the device-var ICV does not actually have offload data available, don't
try use it (which will fail), and use host fallback instead. */
if (device == GOMP_DEVICE_ICV
&& !gomp_offload_target_available_p (devices[device_id].type))
return NULL;
gomp_debug (0, " %s (%d): %d\n", __FUNCTION__, device, device_id);
return &devices[device_id];
}
static inline splay_tree_key
gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
{
if (key->host_start != key->host_end)
return splay_tree_lookup (mem_map, key);
key->host_end++;
splay_tree_key n = splay_tree_lookup (mem_map, key);
key->host_end--;
if (n)
return n;
key->host_start--;
n = splay_tree_lookup (mem_map, key);
key->host_start++;
if (n)
return n;
return splay_tree_lookup (mem_map, key);
}
static inline splay_tree_key
gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
{
if (key->host_start != key->host_end)
return splay_tree_lookup (mem_map, key);
key->host_end++;
splay_tree_key n = splay_tree_lookup (mem_map, key);
key->host_end--;
return n;
}
static inline void
gomp_device_copy (struct gomp_device_descr *devicep,
bool (*copy_func) (int, void *, const void *, size_t),
const char *dst, void *dstaddr,
const char *src, const void *srcaddr,
size_t size)
{
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
}
}
/* Infrastructure for coalescing adjacent or nearly adjacent (in device
addresses) host to device memory transfers. */
struct gomp_coalesce_chunk
{
/* The starting and ending point of a coalesced chunk of memory. */
size_t start, end;
};
struct gomp_coalesce_buf
{
/* Buffer into which gomp_copy_host2dev will memcpy data and from which
it will be copied to the device. */
void *buf;
struct target_mem_desc *tgt;
/* Array with offsets, chunks[i].start is the starting offset and
chunks[i].end ending offset relative to tgt->tgt_start device address
of chunks which are to be copied to buf and later copied to device. */
struct gomp_coalesce_chunk *chunks;
/* Number of chunks in chunks array, or -1 if coalesce buffering should not
be performed. */
long chunk_cnt;
/* During construction of chunks array, how many memory regions are within
the last chunk. If there is just one memory region for a chunk, we copy
it directly to device rather than going through buf. */
long use_cnt;
};
/* Maximum size of memory region considered for coalescing. Larger copies
are performed directly. */
#define MAX_COALESCE_BUF_SIZE (32 * 1024)
/* Maximum size of a gap in between regions to consider them being copied
within the same chunk. All the device offsets considered are within
newly allocated device memory, so it isn't fatal if we copy some padding
in between from host to device. The gaps come either from alignment
padding or from memory regions which are not supposed to be copied from
host to device (e.g. map(alloc:), map(from:) etc.). */
#define MAX_COALESCE_BUF_GAP (4 * 1024)
/* Add region with device tgt_start relative offset and length to CBUF. */
static inline void
gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
{
if (len > MAX_COALESCE_BUF_SIZE || len == 0)
return;
if (cbuf->chunk_cnt)
{
if (cbuf->chunk_cnt < 0)
return;
if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
{
cbuf->chunk_cnt = -1;
return;
}
if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
{
cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
cbuf->use_cnt++;
return;
}
/* If the last chunk is only used by one mapping, discard it,
as it will be one host to device copy anyway and
memcpying it around will only waste cycles. */
if (cbuf->use_cnt == 1)
cbuf->chunk_cnt--;
}
cbuf->chunks[cbuf->chunk_cnt].start = start;
cbuf->chunks[cbuf->chunk_cnt].end = start + len;
cbuf->chunk_cnt++;
cbuf->use_cnt = 1;
}
/* Return true for mapping kinds which need to copy data from the
host to device for regions that weren't previously mapped. */
static inline bool
gomp_to_device_kind_p (int kind)
{
switch (kind)
{
case GOMP_MAP_ALLOC:
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_ALLOC:
case GOMP_MAP_ALWAYS_FROM:
return false;
default:
return true;
}
}
/* Copy host memory to an offload device. In asynchronous mode (if AQ is
non-NULL), when the source data is stack or may otherwise be deallocated
before the asynchronous copy takes place, EPHEMERAL must be passed as
TRUE. The CBUF isn't used for non-ephemeral asynchronous copies, because
the host data might not be computed yet (by an earlier asynchronous compute
region). */
attribute_hidden void
gomp_copy_host2dev (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
void *d, const void *h, size_t sz,
bool ephemeral, struct gomp_coalesce_buf *cbuf)
{
if (cbuf)
{
uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
{
long first = 0;
long last = cbuf->chunk_cnt - 1;
while (first <= last)
{
long middle = (first + last) >> 1;
if (cbuf->chunks[middle].end <= doff)
first = middle + 1;
else if (cbuf->chunks[middle].start <= doff)
{
if (doff + sz > cbuf->chunks[middle].end)
gomp_fatal ("internal libgomp cbuf error");
memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
h, sz);
return;
}
else
last = middle - 1;
}
}
}
if (__builtin_expect (aq != NULL, 0))
{
if (!devicep->openacc.async.host2dev_func (devicep->target_id, d, h, sz,
ephemeral, aq))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Copying of host object [%p..%p) to dev object [%p..%p) "
"failed", h, h + sz, d, d + sz);
}
}
else
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
}
attribute_hidden void
gomp_copy_dev2host (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
void *h, const void *d, size_t sz)
{
if (__builtin_expect (aq != NULL, 0))
{
if (!devicep->openacc.async.dev2host_func (devicep->target_id, h, d, sz,
aq))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Copying of dev object [%p..%p) to host object [%p..%p) "
"failed", d, d + sz, h, h + sz);
}
}
else
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
}
static void
gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
{
if (!devicep->free_func (devicep->target_id, devptr))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("error in freeing device memory block at %p", devptr);
}
}
#ifdef RC_CHECKING
void
dump_tgt (const char *where, struct target_mem_desc *tgt)
{
if (!getenv ("GOMP_DEBUG_TGT"))
return;
fprintf (stderr, "%s: %s: tgt=%p\n", __FUNCTION__, where, (void*) tgt);
fprintf (stderr, "refcount=%d\n", (int) tgt->refcount);
fprintf (stderr, "tgt_start=%p\n", (void*) tgt->tgt_start);
fprintf (stderr, "tgt_end=%p\n", (void*) tgt->tgt_end);
fprintf (stderr, "to_free=%p\n", tgt->to_free);
fprintf (stderr, "list_count=%d\n", (int) tgt->list_count);
for (int i = 0; i < tgt->list_count; i++)
{
fprintf (stderr, "list item %d:\n", i);
fprintf (stderr, " key: %p\n", (void*) tgt->list[i].key);
if (tgt->list[i].key)
{
fprintf (stderr, " key.host_start=%p\n",
(void*) tgt->list[i].key->host_start);
fprintf (stderr, " key.host_end=%p\n",
(void*) tgt->list[i].key->host_end);
fprintf (stderr, " key.tgt=%p\n", (void*) tgt->list[i].key->tgt);
fprintf (stderr, " key.offset=%d\n",
(int) tgt->list[i].key->tgt_offset);
fprintf (stderr, " key.refcount=%d\n",
(int) tgt->list[i].key->refcount);
if (tgt->list[i].key->virtual_refcount == VREFCOUNT_LINK_KEY)
fprintf (stderr, " key.u.link_key=%p\n",
(void*) tgt->list[i].key->u.link_key);
else
{
fprintf (stderr, " key.virtual_refcount=%d\n",
(int) tgt->list[i].key->virtual_refcount);
fprintf (stderr, " key.u.attach_count=%p\n",
(void*) tgt->list[i].key->u.attach_count);
}
}
}
fprintf (stderr, "\n");
}
static void
rc_check_clear (splay_tree_node node)
{
splay_tree_key k = &node->key;
k->refcount_chk = 0;
k->tgt->refcount_chk = 0;
k->tgt->mark = false;
if (node->left)
rc_check_clear (node->left);
if (node->right)
rc_check_clear (node->right);
}
static void
rc_check_count (splay_tree_node node)
{
splay_tree_key k = &node->key;
struct target_mem_desc *t;
/* Add virtual reference counts ("acc enter data", etc.) for this key. */
k->refcount_chk += k->virtual_refcount;
t = k->tgt;
t->refcount_chk++;
if (!t->mark)
{
for (int i = 0; i < t->list_count; i++)
if (t->list[i].key)
t->list[i].key->refcount_chk++;
t->mark = true;
}
if (node->left)
rc_check_count (node->left);
if (node->right)
rc_check_count (node->right);
}
static bool
rc_check_verify (splay_tree_node node, bool noisy, bool errors)
{
splay_tree_key k = &node->key;
struct target_mem_desc *t;
if (k->refcount != REFCOUNT_INFINITY)
{
if (noisy)
fprintf (stderr, "key %p (%p..+%d): rc=%d/%d, virt_rc=%d\n", k,
(void *) k->host_start, (int) (k->host_end - k->host_start),
(int) k->refcount, (int) k->refcount_chk,
(int) k->virtual_refcount);
if (k->refcount != k->refcount_chk)
{
if (noisy)
fprintf (stderr, " -- key refcount mismatch!\n");
errors = true;
}
t = k->tgt;
if (noisy)
fprintf (stderr, "tgt %p: rc=%d/%d\n", t, (int) t->refcount,
(int) t->refcount_chk);
if (t->refcount != t->refcount_chk)
{
if (noisy)
fprintf (stderr,
" -- target memory descriptor refcount mismatch!\n");
errors = true;
}
}
if (node->left)
errors |= rc_check_verify (node->left, noisy, errors);
if (node->right)
errors |= rc_check_verify (node->right, noisy, errors);
return errors;
}
/* Call with device locked. */
attribute_hidden void
gomp_rc_check (struct gomp_device_descr *devicep, struct target_mem_desc *tgt)
{
splay_tree sp = &devicep->mem_map;
bool noisy = getenv ("GOMP_DEBUG_TGT") != 0;
if (noisy)
fprintf (stderr, "\n*** GOMP_RC_CHECK ***\n\n");
if (sp->root)
{
rc_check_clear (sp->root);
for (struct target_mem_desc *t = tgt; t; t = t->prev)
{
t->refcount_chk = 0;
t->mark = false;
}
/* Add references for interconnected splay-tree keys. */
rc_check_count (sp->root);
/* Add references for the tgt for a currently-executing kernel and/or
any enclosing data directives. */
for (struct target_mem_desc *t = tgt; t; t = t->prev)
{
t->refcount_chk++;
if (!t->mark)
{
for (int i = 0; i < t->list_count; i++)
if (t->list[i].key)
t->list[i].key->refcount_chk++;
t->mark = true;
}
}
if (rc_check_verify (sp->root, noisy, false))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("refcount checking failure");
}
}
}
#endif
/* Handle the case where gomp_map_lookup, splay_tree_lookup or
gomp_map_0len_lookup found oldn for newn.
Helper function of gomp_map_vars. */
static inline void
gomp_map_vars_existing (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree_key oldn,
splay_tree_key newn, struct target_var_desc *tgt_var,
unsigned char kind, struct gomp_coalesce_buf *cbuf)
{
tgt_var->key = oldn;
tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
tgt_var->do_detach = kind == GOMP_MAP_ATTACH;
tgt_var->offset = newn->host_start - oldn->host_start;
tgt_var->length = newn->host_end - newn->host_start;
if ((kind & GOMP_MAP_FLAG_FORCE)
|| oldn->host_start > newn->host_start
|| oldn->host_end < newn->host_end)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Trying to map into device [%p..%p) object when "
"[%p..%p) is already mapped",
(void *) newn->host_start, (void *) newn->host_end,
(void *) oldn->host_start, (void *) oldn->host_end);
}
if (GOMP_MAP_ALWAYS_TO_P (kind))
gomp_copy_host2dev (devicep, aq,
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ newn->host_start - oldn->host_start),
(void *) newn->host_start,
newn->host_end - newn->host_start, false, cbuf);
if (oldn->refcount != REFCOUNT_INFINITY)
oldn->refcount++;
}
static int
get_kind (bool short_mapkind, void *kinds, int idx)
{
return short_mapkind ? ((unsigned short *) kinds)[idx]
: ((unsigned char *) kinds)[idx];
}
static void
gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
struct gomp_coalesce_buf *cbuf)
{
struct gomp_device_descr *devicep = tgt->device_descr;
struct splay_tree_s *mem_map = &devicep->mem_map;
struct splay_tree_key_s cur_node;
cur_node.host_start = host_ptr;
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + target_offset),
(void *) &cur_node.tgt_offset,
sizeof (void *), true, cbuf);
return;
}
/* Add bias to the pointer value. */
cur_node.host_start += bias;
cur_node.host_end = cur_node.host_start;
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
if (n == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Pointer target of array section wasn't mapped");
}
cur_node.host_start -= n->host_start;
cur_node.tgt_offset
= n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
/* At this point tgt_offset is target address of the
array section. Now subtract bias to get what we want
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
(void *) &cur_node.tgt_offset, sizeof (void *), true,
cbuf);
}
static void
gomp_map_fields_existing (struct target_mem_desc *tgt,
struct goacc_asyncqueue *aq, splay_tree_key n,
size_t first, size_t i, void **hostaddrs,
size_t *sizes, void *kinds,
struct gomp_coalesce_buf *cbuf)
{
struct gomp_device_descr *devicep = tgt->device_descr;
struct splay_tree_s *mem_map = &devicep->mem_map;
struct splay_tree_key_s cur_node;
int kind;
const bool short_mapkind = true;
const int typemask = short_mapkind ? 0xff : 0x7;
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizes[i];
splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
kind = get_kind (short_mapkind, kinds, i);
if (n2
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, aq, n2, &cur_node,
&tgt->list[i], kind & typemask, cbuf);
return;
}
if (sizes[i] == 0)
{
if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
{
cur_node.host_start--;
n2 = splay_tree_lookup (mem_map, &cur_node);
cur_node.host_start++;
if (n2
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start
== n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, aq, n2, &cur_node,
&tgt->list[i], kind & typemask, cbuf);
return;
}
}
cur_node.host_end++;
n2 = splay_tree_lookup (mem_map, &cur_node);
cur_node.host_end--;
if (n2
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
kind & typemask, cbuf);
return;
}
}
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Trying to map into device [%p..%p) structure element when "
"other mapped elements from the same structure weren't mapped "
"together with it", (void *) cur_node.host_start,
(void *) cur_node.host_end);
}
void
gomp_attach_pointer (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree mem_map,
splay_tree_key n, uintptr_t attach_to, size_t bias,
struct gomp_coalesce_buf *cbufp)
{
struct splay_tree_key_s s;
size_t size, idx;
if (n == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("enclosing struct not mapped for attach");
}
size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
/* We might have a pointer in a packed struct: however we cannot have more
than one such pointer in each pointer-sized portion of the struct, so
this is safe. */
idx = (attach_to - n->host_start) / sizeof (void *);
assert (n->virtual_refcount != VREFCOUNT_LINK_KEY);
if (!n->u.attach_count)
n->u.attach_count
= gomp_malloc_cleared (sizeof (*n->u.attach_count) * size);
if (n->u.attach_count[idx] < UINTPTR_MAX)
n->u.attach_count[idx]++;
else
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("attach count overflow");
}
if (n->u.attach_count[idx] == 1)
{
uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
- n->host_start;
uintptr_t target = (uintptr_t) *(void **) attach_to;
splay_tree_key tn;
uintptr_t data;
if ((void *) target == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("attempt to attach null pointer");
}
s.host_start = target + bias;
s.host_end = s.host_start + 1;
tn = splay_tree_lookup (mem_map, &s);
if (!tn)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("pointer target not mapped for attach");
}
data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
gomp_debug (1,
"%s: attaching host %p, target %p (struct base %p) to %p\n",
__FUNCTION__, (void *) attach_to, (void *) devptr,
(void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
sizeof (void *), true, cbufp);
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
(void *) attach_to, (int) n->u.attach_count[idx]);
}
void
gomp_detach_pointer (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree_key n,
uintptr_t detach_from, bool finalize,
struct gomp_coalesce_buf *cbufp)
{
size_t idx;
if (n == NULL)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("enclosing struct not mapped for detach");
}
idx = (detach_from - n->host_start) / sizeof (void *);
assert (n->virtual_refcount != VREFCOUNT_LINK_KEY);
if (!n->u.attach_count)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("no attachment counters for struct");
}
if (finalize)
n->u.attach_count[idx] = 1;
if (n->u.attach_count[idx] == 0)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("attach count underflow");
}
else
n->u.attach_count[idx]--;
if (n->u.attach_count[idx] == 0)
{
uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
- n->host_start;
uintptr_t target = (uintptr_t) *(void **) detach_from;
gomp_debug (1,
"%s: detaching host %p, target %p (struct base %p) to %p\n",
__FUNCTION__, (void *) detach_from, (void *) devptr,
(void *) (n->tgt->tgt_start + n->tgt_offset),
(void *) target);
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
sizeof (void *), true, cbufp);
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
(void *) detach_from, (int) n->u.attach_count[idx]);
}
uintptr_t
gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
{
if (tgt->list[i].key != NULL)
return tgt->list[i].key->tgt->tgt_start
+ tgt->list[i].key->tgt_offset
+ tgt->list[i].offset;
switch (tgt->list[i].offset)
{
case OFFSET_INLINED:
return (uintptr_t) hostaddrs[i];
case OFFSET_POINTER:
return 0;
case OFFSET_STRUCT:
return tgt->list[i + 1].key->tgt->tgt_start
+ tgt->list[i + 1].key->tgt_offset
+ tgt->list[i + 1].offset
+ (uintptr_t) hostaddrs[i]
- (uintptr_t) hostaddrs[i + 1];
default:
return tgt->tgt_start + tgt->list[i].offset;
}
}
/* Definitions for data structures describing dynamic, non-contiguous arrays
(Note: interfaces with compiler)
The compiler generates a descriptor for each such array, places the
descriptor on stack, and passes the address of the descriptor to the libgomp
runtime as a normal map argument. The runtime then processes the array
data structure setup, and replaces the argument with the new actual
array address for the child function.
Care must be taken such that the struct field and layout assumptions
of struct gomp_array_dim, gomp_array_descr_type inside the compiler
be consistant with the below declarations. */
struct gomp_array_dim {
size_t base;
size_t length;
size_t elem_size;
size_t is_array;
};
struct gomp_array_descr_type {
void *ptr;
size_t ndims;
struct gomp_array_dim dims[];
};
/* Internal dynamic array info struct, used only here inside the runtime. */
struct da_info
{
struct gomp_array_descr_type *descr;
size_t map_index;
size_t ptrblock_size;
size_t data_row_num;
size_t data_row_size;
};
static size_t
gomp_dynamic_array_count_rows (struct gomp_array_descr_type *descr)
{
size_t nrows = 1;
for (size_t d = 0; d < descr->ndims - 1; d++)
nrows *= descr->dims[d].length / sizeof (void *);
return nrows;
}
static void
gomp_dynamic_array_compute_info (struct da_info *da)
{
size_t d, n = 1;
struct gomp_array_descr_type *descr = da->descr;
da->ptrblock_size = 0;
for (d = 0; d < descr->ndims - 1; d++)
{
size_t dim_count = descr->dims[d].length / descr->dims[d].elem_size;
size_t dim_ptrblock_size = (descr->dims[d + 1].is_array
? 0 : descr->dims[d].length * n);
da->ptrblock_size += dim_ptrblock_size;
n *= dim_count;
}
da->data_row_num = n;
da->data_row_size = descr->dims[d].length;
}
static void
gomp_dynamic_array_fill_rows_1 (struct gomp_array_descr_type *descr, void *da,
size_t d, void ***row_ptr, size_t *count)
{
if (d < descr->ndims - 1)
{
size_t elsize = descr->dims[d].elem_size;
size_t n = descr->dims[d].length / elsize;
void *p = da + descr->dims[d].base;
for (size_t i = 0; i < n; i++)
{
void *ptr = p + i * elsize;
/* Deref if next dimension is not array. */
if (!descr->dims[d + 1].is_array)
ptr = *((void **) ptr);
gomp_dynamic_array_fill_rows_1 (descr, ptr, d + 1, row_ptr, count);
}
}
else
{
**row_ptr = da + descr->dims[d].base;
*row_ptr += 1;
*count += 1;
}
}
static size_t
gomp_dynamic_array_fill_rows (struct gomp_array_descr_type *descr, void *rows[])
{
size_t count = 0;
void **p = rows;
gomp_dynamic_array_fill_rows_1 (descr, descr->ptr, 0, &p, &count);
return count;
}
static void *
gomp_dynamic_array_create_ptrblock (struct da_info *da,
void *tgt_addr, void *tgt_data_rows[])
{
struct gomp_array_descr_type *descr = da->descr;
void *ptrblock = gomp_malloc (da->ptrblock_size);
void **curr_dim_ptrblock = (void **) ptrblock;
size_t n = 1;
for (size_t d = 0; d < descr->ndims - 1; d++)
{
int curr_dim_len = descr->dims[d].length;
int next_dim_len = descr->dims[d + 1].length;
int curr_dim_num = curr_dim_len / sizeof (void *);
void *next_dim_ptrblock
= (void *)(curr_dim_ptrblock + n * curr_dim_num);
for (int b = 0; b < n; b++)
for (int i = 0; i < curr_dim_num; i++)
{
if (d < descr->ndims - 2)
{
void *ptr = (next_dim_ptrblock
+ b * curr_dim_num * next_dim_len
+ i * next_dim_len);
void *tgt_ptr = tgt_addr + (ptr - ptrblock);
curr_dim_ptrblock[b * curr_dim_num + i] = tgt_ptr;
}
else
{
curr_dim_ptrblock[b * curr_dim_num + i]
= tgt_data_rows[b * curr_dim_num + i];
}
void *addr = &curr_dim_ptrblock[b * curr_dim_num + i];
assert (ptrblock <= addr && addr < ptrblock + da->ptrblock_size);
}
n *= curr_dim_num;
curr_dim_ptrblock = next_dim_ptrblock;
}
assert (n == da->data_row_num);
return ptrblock;
}
static inline __attribute__((always_inline)) struct target_mem_desc *
gomp_map_vars_internal (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes,
void *kinds, bool short_mapkind,
enum gomp_map_vars_kind pragma_kind)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
bool has_firstprivate = false;
const int rshift = short_mapkind ? 8 : 3;
const int typemask = short_mapkind ? 0xff : 0x7;
struct splay_tree_s *mem_map = &devicep->mem_map;
struct splay_tree_key_s cur_node;
struct target_mem_desc *tgt;
bool process_dynarrays = false;
size_t da_data_row_num = 0, row_start = 0;
size_t da_info_num = 0, da_index;
struct da_info *da_info = NULL;
struct target_var_desc *row_desc;
uintptr_t target_row_addr;
void **host_data_rows = NULL, **target_data_rows = NULL;
void *row;
if (mapnum > 0)
{
int kind = get_kind (short_mapkind, kinds, 0);
process_dynarrays = GOMP_MAP_DYNAMIC_ARRAY_P (kind & typemask);
}
if (process_dynarrays)
for (i = 0; i < mapnum; i++)
{
int kind = get_kind (short_mapkind, kinds, i);
if (GOMP_MAP_DYNAMIC_ARRAY_P (kind & typemask))
{
da_data_row_num += gomp_dynamic_array_count_rows (hostaddrs[i]);
da_info_num += 1;
}
}
tgt = gomp_malloc (sizeof (*tgt)
+ sizeof (tgt->list[0]) * (mapnum + da_data_row_num));
tgt->list_count = mapnum + da_data_row_num;
tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA
|| pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1;
tgt->device_descr = devicep;
struct gomp_coalesce_buf cbuf, *cbufp = NULL;
if (mapnum == 0)
{
tgt->tgt_start = 0;
tgt->tgt_end = 0;
return tgt;
}
if (da_info_num)
da_info = gomp_alloca (sizeof (struct da_info) * da_info_num);
if (da_data_row_num)
{
host_data_rows = gomp_malloc (sizeof (void *) * da_data_row_num);
target_data_rows = gomp_malloc (sizeof (void *) * da_data_row_num);
}
tgt_align = sizeof (void *);
tgt_size = 0;
cbuf.chunks = NULL;
cbuf.chunk_cnt = -1;
cbuf.use_cnt = 0;
cbuf.buf = NULL;
if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
{
size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
cbuf.chunk_cnt = 0;
}
if (pragma_kind == GOMP_MAP_VARS_TARGET)
{
size_t align = 4 * sizeof (void *);
tgt_align = align;
tgt_size = mapnum * sizeof (void *);
cbuf.chunk_cnt = 1;
cbuf.use_cnt = 1 + (mapnum > 1);
cbuf.chunks[0].start = 0;
cbuf.chunks[0].end = tgt_size;
}
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_FINALIZED)
{
gomp_mutex_unlock (&devicep->lock);
free (tgt);
return NULL;
}
for (i = 0, da_index = 0; i < mapnum; i++)
{
int kind = get_kind (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL
|| (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
{
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_INLINED;
continue;
}
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start;
splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
if (n == NULL)
{
if (pragma_kind == GOMP_MAP_VARS_OPENACC_IF_PRESENT)
{
/* No error, continue using the host address. */
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_INLINED;
continue;
}
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("use_device_ptr pointer wasn't mapped");
}
cur_node.host_start -= n->host_start;
hostaddrs[i]
= (void *) (n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start);
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_INLINED;
continue;
}
else if ((kind & typemask) == GOMP_MAP_STRUCT)
{
size_t first = i + 1;
size_t last = i + sizes[i];
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = (uintptr_t) hostaddrs[last]
+ sizes[last];
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_STRUCT;
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n == NULL)
{
size_t align = (size_t) 1 << (kind >> rshift);
if (tgt_align < align)
tgt_align = align;
tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt_size += cur_node.host_end - cur_node.host_start;
not_found_cnt += last - i;
for (i = first; i <= last; i++)
{
tgt->list[i].key = NULL;
if (!aq
&& gomp_to_device_kind_p (get_kind (short_mapkind, kinds,
i) & typemask))
gomp_coalesce_buf_add (&cbuf,
tgt_size - cur_node.host_end
+ (uintptr_t) hostaddrs[i],
sizes[i]);
}
i--;
continue;
}
for (i = first; i <= last; i++)
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
sizes, kinds, NULL);
i--;
continue;
}
else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
{
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_POINTER;
has_firstprivate = true;
continue;
}
else if (GOMP_MAP_DYNAMIC_ARRAY_P (kind & typemask))
{
/* Ignore dynamic arrays for now, we process them together
later. */
tgt->list[i].key = NULL;
tgt->list[i].offset = 0;
not_found_cnt++;
struct da_info *da = &da_info[da_index++];
da->descr = (struct gomp_array_descr_type *) hostaddrs[i];
da->map_index = i;
continue;
}
else if ((kind & typemask) == GOMP_MAP_ATTACH)
{
tgt->list[i].key = NULL;
has_firstprivate = true;
continue;
}
else if ((kind & typemask) == GOMP_MAP_NO_ALLOC)
{
tgt->list[i].key = NULL;
tgt->list[i].offset = 0;
continue;
}
cur_node.host_start = (uintptr_t) hostaddrs[i];
if (!GOMP_MAP_POINTER_P (kind & typemask)
&& (kind & typemask) != GOMP_MAP_ATTACH)
cur_node.host_end = cur_node.host_start + sizes[i];
else
cur_node.host_end = cur_node.host_start + sizeof (void *);
if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
{
tgt->list[i].key = NULL;
size_t align = (size_t) 1 << (kind >> rshift);
if (tgt_align < align)
tgt_align = align;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
if (!aq)
gomp_coalesce_buf_add (&cbuf, tgt_size,
cur_node.host_end - cur_node.host_start);
tgt_size += cur_node.host_end - cur_node.host_start;
has_firstprivate = true;
continue;
}
splay_tree_key n;
if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
{
n = gomp_map_0len_lookup (mem_map, &cur_node);
if (!n)
{
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_POINTER;
continue;
}
}
else
n = splay_tree_lookup (mem_map, &cur_node);
if (n && n->refcount != REFCOUNT_LINK)
gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
kind & typemask, NULL);
else
{
tgt->list[i].key = NULL;
size_t align = (size_t) 1 << (kind >> rshift);
not_found_cnt++;
if (tgt_align < align)
tgt_align = align;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
if (!aq && gomp_to_device_kind_p (kind & typemask))
gomp_coalesce_buf_add (&cbuf, tgt_size,
cur_node.host_end - cur_node.host_start);
tgt_size += cur_node.host_end - cur_node.host_start;
if ((kind & typemask) == GOMP_MAP_TO_PSET)
{
size_t j;
for (j = i + 1; j < mapnum; j++)
if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, j)
& typemask))
break;
else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
> cur_node.host_end))
break;
else
{
tgt->list[j].key = NULL;
i++;
}
}
}
}
/* For dynamic arrays. Each data row is one target item, separated from
the normal map clause items, hence we order them after mapnum. */
if (process_dynarrays)
{
for (i = 0, da_index = 0, row_start = 0; i < mapnum; i++)
{
int kind = get_kind (short_mapkind, kinds, i);
if (!GOMP_MAP_DYNAMIC_ARRAY_P (kind & typemask))
continue;
struct da_info *da = &da_info[da_index++];
struct gomp_array_descr_type *descr = da->descr;
size_t nr;
gomp_dynamic_array_compute_info (da);
/* We have allocated space in host/target_data_rows to place all the
row data block pointers, now we can start filling them in. */
nr = gomp_dynamic_array_fill_rows (descr, &host_data_rows[row_start]);
assert (nr == da->data_row_num);
size_t align = (size_t) 1 << (kind >> rshift);
if (tgt_align < align)
tgt_align = align;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt_size += da->ptrblock_size;
for (size_t j = 0; j < da->data_row_num; j++)
{
row = host_data_rows[row_start + j];
row_desc = &tgt->list[mapnum + row_start + j];
cur_node.host_start = (uintptr_t) row;
cur_node.host_end = cur_node.host_start + da->data_row_size;
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n)
{
assert (n->refcount != REFCOUNT_LINK);
gomp_map_vars_existing (devicep, aq, n, &cur_node, row_desc,
kind & typemask,
/* TODO: cbuf? */ NULL);
}
else
{
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt_size += da->data_row_size;
not_found_cnt++;
}
}
row_start += da->data_row_num;
}
}
if (devaddrs)
{
if (mapnum != 1)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("unexpected aggregation");
}
tgt->to_free = devaddrs[0];
tgt->tgt_start = (uintptr_t) tgt->to_free;
tgt->tgt_end = tgt->tgt_start + sizes[0];
}
else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
{
/* Allocate tgt_align aligned tgt_size block of memory. */
/* FIXME: Perhaps change interface to allocate properly aligned
memory. */
tgt->to_free = devicep->alloc_func (devicep->target_id,
tgt_size + tgt_align - 1);
if (!tgt->to_free)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("device memory allocation fail");
}
tgt->tgt_start = (uintptr_t) tgt->to_free;
tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
tgt->tgt_end = tgt->tgt_start + tgt_size;
if (cbuf.use_cnt == 1)
cbuf.chunk_cnt--;
if (cbuf.chunk_cnt > 0)
{
cbuf.buf
= malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
if (cbuf.buf)
{
cbuf.tgt = tgt;
cbufp = &cbuf;
}
}
}
else
{
tgt->to_free = NULL;
tgt->tgt_start = 0;
tgt->tgt_end = 0;
}
tgt_size = 0;
if (pragma_kind == GOMP_MAP_VARS_TARGET)
tgt_size = mapnum * sizeof (void *);
tgt->array = NULL;
if (not_found_cnt || has_firstprivate)
{
if (not_found_cnt)
tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
splay_tree_node array = tgt->array;
size_t j, field_tgt_offset = 0, field_tgt_clear = ~(size_t) 0;
uintptr_t field_tgt_base = 0;
for (i = 0; i < mapnum; i++)
if (tgt->list[i].key == NULL)
{
int kind = get_kind (short_mapkind, kinds, i);
if (hostaddrs[i] == NULL)
continue;
switch (kind & typemask)
{
size_t align, len, first, last;
splay_tree_key n;
case GOMP_MAP_FIRSTPRIVATE:
align = (size_t) 1 << (kind >> rshift);
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt->list[i].offset = tgt_size;
len = sizes[i];
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + tgt_size),
(void *) hostaddrs[i], len, false, cbufp);
tgt_size += len;
continue;
case GOMP_MAP_FIRSTPRIVATE_INT:
case GOMP_MAP_USE_DEVICE_PTR:
case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
continue;
case GOMP_MAP_STRUCT:
first = i + 1;
last = i + sizes[i];
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = (uintptr_t) hostaddrs[last]
+ sizes[last];
if (tgt->list[first].key != NULL)
continue;
n = splay_tree_lookup (mem_map, &cur_node);
if (n == NULL)
{
size_t align = (size_t) 1 << (kind >> rshift);
tgt_size -= (uintptr_t) hostaddrs[first]
- (uintptr_t) hostaddrs[i];
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt_size += (uintptr_t) hostaddrs[first]
- (uintptr_t) hostaddrs[i];
field_tgt_base = (uintptr_t) hostaddrs[first];
field_tgt_offset = tgt_size;
field_tgt_clear = last;
tgt_size += cur_node.host_end
- (uintptr_t) hostaddrs[first];
continue;
}
for (i = first; i <= last; i++)
gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
sizes, kinds, cbufp);
i--;
continue;
case GOMP_MAP_ALWAYS_POINTER:
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizeof (void *);
n = splay_tree_lookup (mem_map, &cur_node);
if (n == NULL
|| n->host_start > cur_node.host_start
|| n->host_end < cur_node.host_end)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("always pointer not mapped");
}
if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
!= GOMP_MAP_ALWAYS_POINTER)
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
if (cur_node.tgt_offset)
cur_node.tgt_offset -= sizes[i];
gomp_copy_host2dev (devicep, aq,
(void *) (n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start
- n->host_start),
(void *) &cur_node.tgt_offset,
sizeof (void *), true, cbufp);
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start - n->host_start;
continue;
case GOMP_MAP_ATTACH:
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n != NULL)
{
tgt->list[i].key = n;
tgt->list[i].offset = cur_node.host_start - n->host_start;
tgt->list[i].length = n->host_end - n->host_start;
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
tgt->list[i].do_detach
= (pragma_kind != GOMP_MAP_VARS_OPENACC_ENTER_DATA);
n->refcount++;
}
else
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("outer struct not mapped for attach");
}
gomp_attach_pointer (devicep, aq, mem_map, n,
(uintptr_t) hostaddrs[i], sizes[i],
cbufp);
continue;
}
case GOMP_MAP_NO_ALLOC:
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizes[i];
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n != NULL)
{
tgt->list[i].key = n;
tgt->list[i].offset = cur_node.host_start - n->host_start;
tgt->list[i].length = n->host_end - n->host_start;
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
tgt->list[i].do_detach = false;
n->refcount++;
}
else
{
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_INLINED;
tgt->list[i].length = sizes[i];
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
tgt->list[i].do_detach = false;
if (i + 1 < mapnum)
{
int kind2 = get_kind (short_mapkind, kinds, i + 1);
switch (kind2 & typemask)
{
case GOMP_MAP_ATTACH:
case GOMP_MAP_POINTER:
/* The data is not present but we have an attach
or pointer clause next. Skip over it. */
i++;
tgt->list[i].key = NULL;
tgt->list[i].offset = OFFSET_INLINED;
tgt->list[i].length = sizes[i];
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
tgt->list[i].do_detach = false;
break;
default:
break;
}
}
}
continue;
}
default:
break;
}
if (GOMP_MAP_DYNAMIC_ARRAY_P (kind & typemask))
{
tgt->list[i].key = &array->key;
tgt->list[i].key->tgt = tgt;
array++;
continue;
}
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) hostaddrs[i];
if (!GOMP_MAP_POINTER_P (kind & typemask))
k->host_end = k->host_start + sizes[i];
else
k->host_end = k->host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, k);
/* Need to account for the case where a struct field hasn't been
mapped onto the accelerator yet. */
if (n && n->refcount != REFCOUNT_LINK)
gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
kind & typemask, cbufp);
else
{
k->u.link_key = NULL;
if (n && n->refcount == REFCOUNT_LINK)
{
/* Replace target address of the pointer with target address
of mapped object in the splay tree. */
splay_tree_remove (mem_map, n);
k->u.link_key = n;
k->virtual_refcount = VREFCOUNT_LINK_KEY;
}
size_t align = (size_t) 1 << (kind >> rshift);
tgt->list[i].key = k;
k->tgt = tgt;
if (field_tgt_clear != FIELD_TGT_EMPTY)
{
k->tgt_offset = k->host_start - field_tgt_base
+ field_tgt_offset;
if (i == field_tgt_clear)
field_tgt_clear = FIELD_TGT_EMPTY;
}
else
{
tgt_size = (tgt_size + align - 1) & ~(align - 1);
k->tgt_offset = tgt_size;
tgt_size += k->host_end - k->host_start;
}
tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
tgt->list[i].always_copy_from
= GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
tgt->list[i].do_detach = false;
tgt->list[i].offset = 0;
tgt->list[i].length = k->host_end - k->host_start;
k->refcount = 1;
k->virtual_refcount = 0;
k->u.attach_count = NULL;
tgt->refcount++;
array->left = NULL;
array->right = NULL;
splay_tree_insert (mem_map, array);
switch (kind & typemask)
{
case GOMP_MAP_ALLOC:
case GOMP_MAP_FROM:
case GOMP_MAP_FORCE_ALLOC:
case GOMP_MAP_FORCE_FROM:
case GOMP_MAP_ALWAYS_FROM:
break;
case GOMP_MAP_TO:
case GOMP_MAP_TOFROM:
case GOMP_MAP_FORCE_TO:
case GOMP_MAP_FORCE_TOFROM:
case GOMP_MAP_ALWAYS_TO:
case GOMP_MAP_ALWAYS_TOFROM:
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
k->host_end - k->host_start, false,
cbufp);
break;
case GOMP_MAP_POINTER:
gomp_map_pointer (tgt, aq,
(uintptr_t) *(void **) k->host_start,
k->tgt_offset, sizes[i], cbufp);
break;
case GOMP_MAP_TO_PSET:
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
k->host_end - k->host_start, false,
cbufp);
for (j = i + 1; j < mapnum; j++)
if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds,
j)
& typemask))
break;
else if ((uintptr_t) hostaddrs[j] < k->host_start
|| ((uintptr_t) hostaddrs[j] + sizeof (void *)
> k->host_end))
break;
else
{
tgt->list[j].key = k;
tgt->list[j].copy_from = false;
tgt->list[j].always_copy_from = false;
tgt->list[j].do_detach = false;
if (k->refcount != REFCOUNT_INFINITY)
k->refcount++;
gomp_map_pointer (tgt, aq,
(uintptr_t) *(void **) hostaddrs[j],
k->tgt_offset
+ ((uintptr_t) hostaddrs[j]
- k->host_start),
sizes[j], cbufp);
i++;
}
break;
case GOMP_MAP_FORCE_PRESENT:
{
/* We already looked up the memory region above and it
was missing. */
size_t size = k->host_end - k->host_start;
gomp_mutex_unlock (&devicep->lock);
#ifdef HAVE_INTTYPES_H
gomp_fatal ("present clause: !acc_is_present (%p, "
"%"PRIu64" (0x%"PRIx64"))",
(void *) k->host_start,
(uint64_t) size, (uint64_t) size);
#else
gomp_fatal ("present clause: !acc_is_present (%p, "
"%lu (0x%lx))", (void *) k->host_start,
(unsigned long) size, (unsigned long) size);
#endif
}
break;
case GOMP_MAP_FORCE_DEVICEPTR:
assert (k->host_end - k->host_start == sizeof (void *));
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
sizeof (void *), false, cbufp);
break;
default:
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
kind);
}
if (k->virtual_refcount == VREFCOUNT_LINK_KEY && k->u.link_key)
{
/* Set link pointer on target to the device address of the
mapped object. */
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
&tgt_addr, sizeof (void *), true, NULL);
}
array++;
}
}
/* Processing of dynamic array rows. */
if (process_dynarrays)
{
for (i = 0, da_index = 0, row_start = 0; i < mapnum; i++)
{
int kind = get_kind (short_mapkind, kinds, i);
if (!GOMP_MAP_DYNAMIC_ARRAY_P (kind & typemask))
continue;
struct da_info *da = &da_info[da_index++];
assert (da->descr == hostaddrs[i]);
/* The map for the dynamic array itself is never copied from
during unmapping, its the data rows that count. Set copy from
flags are set to false here. */
tgt->list[i].copy_from = false;
tgt->list[i].always_copy_from = false;
tgt->list[i].do_detach = false;
size_t align = (size_t) 1 << (kind >> rshift);
tgt_size = (tgt_size + align - 1) & ~(align - 1);
/* For the map of the dynamic array itself, adjust so that the
passed device address points to the beginning of the
ptrblock. */
tgt->list[i].key->tgt_offset = tgt_size;
void *target_ptrblock = (void*) tgt->tgt_start + tgt_size;
tgt_size += da->ptrblock_size;
/* Add splay key for each data row in current DA. */
for (size_t j = 0; j < da->data_row_num; j++)
{
row = host_data_rows[row_start + j];
row_desc = &tgt->list[mapnum + row_start + j];
cur_node.host_start = (uintptr_t) row;
cur_node.host_end = cur_node.host_start + da->data_row_size;
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n)
{
assert (n->refcount != REFCOUNT_LINK);
gomp_map_vars_existing (devicep, aq, n, &cur_node,
row_desc, kind & typemask, cbufp);
target_row_addr = n->tgt->tgt_start + n->tgt_offset;
}
else
{
tgt->refcount++;
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) row;
k->host_end = k->host_start + da->data_row_size;
k->tgt = tgt;
k->refcount = 1;
k->virtual_refcount = 0;
k->u.attach_count = NULL;
tgt_size = (tgt_size + align - 1) & ~(align - 1);
target_row_addr = tgt->tgt_start + tgt_size;
k->tgt_offset = tgt_size;
tgt_size += da->data_row_size;
row_desc->key = k;
row_desc->copy_from
= GOMP_MAP_COPY_FROM_P (kind & typemask);
row_desc->always_copy_from
= GOMP_MAP_COPY_FROM_P (kind & typemask);
row_desc->do_detach = false;
row_desc->offset = 0;
row_desc->length = da->data_row_size;
array->left = NULL;
array->right = NULL;
splay_tree_insert (mem_map, array);
if (GOMP_MAP_COPY_TO_P (kind & typemask))
gomp_copy_host2dev (devicep, aq,
(void *) tgt->tgt_start
+ k->tgt_offset,
(void *) k->host_start,
da->data_row_size, false, cbufp);
array++;
}
target_data_rows[row_start + j] = (void *) target_row_addr;
}
/* Now we have the target memory allocated, and target offsets of
all row blocks assigned and calculated, we can construct the
accelerator side ptrblock and copy it in. */
if (da->ptrblock_size)
{
void *ptrblock = gomp_dynamic_array_create_ptrblock
(da, target_ptrblock, target_data_rows + row_start);
gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock,
da->ptrblock_size, true, cbufp);
free (ptrblock);
}
row_start += da->data_row_num;
}
assert (row_start == da_data_row_num && da_index == da_info_num);
}
}
if (da_data_row_num)
{
free (host_data_rows);
free (target_data_rows);
}
if (pragma_kind == GOMP_MAP_VARS_TARGET)
{
for (i = 0; i < mapnum; i++)
{
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + i * sizeof (void *)),
(void *) &cur_node.tgt_offset, sizeof (void *),
true, cbufp);
}
}
if (cbufp)
{
long c = 0;
for (c = 0; c < cbuf.chunk_cnt; ++c)
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + cbuf.chunks[c].start),
(char *) cbuf.buf + (cbuf.chunks[c].start
- cbuf.chunks[0].start),
cbuf.chunks[c].end - cbuf.chunks[c].start, true,
NULL);
free (cbuf.buf);
cbuf.buf = NULL;
cbufp = NULL;
}
/* If the variable from "omp target enter data" map-list was already mapped,
tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or
gomp_exit_data. */
if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA
|| pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
&& tgt->refcount == 0)
{
/* If we're about to discard a target_mem_desc with no "structural"
references (tgt->refcount == 0), any splay keys linked in the tgt's
list must have their virtual refcount incremented to represent that
"lost" reference in order to implement the semantics of the OpenACC
"present increment" operation properly. */
if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA)
for (i = 0; i < tgt->list_count; i++)
if (tgt->list[i].key)
tgt->list[i].key->virtual_refcount++;
free (tgt);
tgt = NULL;
}
gomp_mutex_unlock (&devicep->lock);
return tgt;
}
attribute_hidden struct target_mem_desc *
gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
{
return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
sizes, kinds, short_mapkind, pragma_kind);
}
attribute_hidden struct target_mem_desc *
gomp_map_vars_async (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, size_t mapnum,
void **hostaddrs, void **devaddrs, size_t *sizes,
void *kinds, bool short_mapkind,
enum gomp_map_vars_kind pragma_kind)
{
return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
sizes, kinds, short_mapkind, pragma_kind);
}
attribute_hidden void
gomp_unmap_tgt (struct target_mem_desc *tgt)
{
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
if (tgt->tgt_end)
gomp_free_device_memory (tgt->device_descr, tgt->to_free);
free (tgt->array);
free (tgt);
}
static bool
gomp_unref_tgt (void *ptr)
{
bool is_tgt_unmapped = false;
struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
if (tgt->refcount > 1)
tgt->refcount--;
else
{
gomp_unmap_tgt (tgt);
is_tgt_unmapped = true;
}
return is_tgt_unmapped;
}
static void
gomp_unref_tgt_void (void *ptr)
{
(void) gomp_unref_tgt (ptr);
}
static inline __attribute__((always_inline)) bool
gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
struct goacc_asyncqueue *aq)
{
bool is_tgt_unmapped = false;
splay_tree_remove (&devicep->mem_map, k);
if (k->virtual_refcount == VREFCOUNT_LINK_KEY)
{
if (k->u.link_key)
splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->u.link_key);
}
else if (k->u.attach_count)
free (k->u.attach_count);
if (aq)
devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
(void *) k->tgt);
else
is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
return is_tgt_unmapped;
}
attribute_hidden bool
gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
{
return gomp_remove_var_internal (devicep, k, NULL);
}
/* Remove a variable asynchronously. This actually removes the variable
mapping immediately, but retains the linked target_mem_desc until the
asynchronous operation has completed (as it may still refer to target
memory). The device lock must be held before entry, and remains locked on
exit. */
attribute_hidden void
gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
struct goacc_asyncqueue *aq)
{
(void) gomp_remove_var_internal (devicep, k, aq);
}
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
variables back from device to host: if it is false, it is assumed that this
has been done already. */
static inline __attribute__((always_inline)) void
gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
struct goacc_asyncqueue *aq)
{
struct gomp_device_descr *devicep = tgt->device_descr;
if (tgt->list_count == 0)
{
free (tgt);
return;
}
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_FINALIZED)
{
gomp_mutex_unlock (&devicep->lock);
free (tgt->array);
free (tgt);
return;
}
size_t i;
/* We must perform detachments before any copies back to the host. */
for (i = 0; i < tgt->list_count; i++)
{
splay_tree_key k = tgt->list[i].key;
if (k != NULL && tgt->list[i].do_detach)
gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
+ tgt->list[i].offset,
k->refcount == 1, NULL);
}
for (i = 0; i < tgt->list_count; i++)
{
splay_tree_key k = tgt->list[i].key;
if (k == NULL)
continue;
bool do_unmap = false;
if (k->tgt == tgt
&& k->virtual_refcount > 0
&& k->virtual_refcount != VREFCOUNT_LINK_KEY
&& k->refcount != REFCOUNT_INFINITY)
{
k->virtual_refcount--;
k->refcount--;
}
else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY)
k->refcount--;
else if (k->refcount == 1)
{
k->refcount--;
do_unmap = true;
}
if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|| tgt->list[i].always_copy_from)
gomp_copy_dev2host (devicep, aq,
(void *) (k->host_start + tgt->list[i].offset),
(void *) (k->tgt->tgt_start + k->tgt_offset
+ tgt->list[i].offset),
tgt->list[i].length);
if (do_unmap)
gomp_remove_var (devicep, k);
}
if (aq)
devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
(void *) tgt);
else
gomp_unref_tgt ((void *) tgt);
gomp_mutex_unlock (&devicep->lock);
}
attribute_hidden void
gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
{
gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
}
attribute_hidden void
gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
struct goacc_asyncqueue *aq)
{
gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
}
static void
gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
size_t *sizes, void *kinds, bool short_mapkind)
{
size_t i;
struct splay_tree_key_s cur_node;
const int typemask = short_mapkind ? 0xff : 0x7;
if (!devicep)
return;
if (mapnum == 0)
return;
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_FINALIZED)
{
gomp_mutex_unlock (&devicep->lock);
return;
}
for (i = 0; i < mapnum; i++)
if (sizes[i])
{
cur_node.host_start = (uintptr_t) hostaddrs[i];
cur_node.host_end = cur_node.host_start + sizes[i];
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
if (n)
{
int kind = get_kind (short_mapkind, kinds, i);
if (n->host_start > cur_node.host_start
|| n->host_end < cur_node.host_end)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("Trying to update [%p..%p) object when "
"only [%p..%p) is mapped",
(void *) cur_node.host_start,
(void *) cur_node.host_end,
(void *) n->host_start,
(void *) n->host_end);
}
void *hostaddr = (void *) cur_node.host_start;
void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start - n->host_start);
size_t size = cur_node.host_end - cur_node.host_start;
if (GOMP_MAP_COPY_TO_P (kind & typemask))
gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
false, NULL);
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
}
}
gomp_mutex_unlock (&devicep->lock);
}
/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
And insert to splay tree the mapping between addresses from HOST_TABLE and
from loaded target image. We rely in the host and device compiler
emitting variable and functions in the same order. */
static void
gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
const void *host_table, const void *target_data,
bool is_register_lock)
{
void **host_func_table = ((void ***) host_table)[0];
void **host_funcs_end = ((void ***) host_table)[1];
void **host_var_table = ((void ***) host_table)[2];
void **host_vars_end = ((void ***) host_table)[3];
/* The func table contains only addresses, the var table contains addresses
and corresponding sizes. */
int num_funcs = host_funcs_end - host_func_table;
int num_vars = (host_vars_end - host_var_table) / 2;
/* Load image to device and get target addresses for the image. */
struct addr_pair *target_table = NULL;
int i, num_target_entries;
num_target_entries
= devicep->load_image_func (devicep->target_id, version,
target_data, &target_table);
if (num_target_entries < num_funcs + num_vars)
{
gomp_mutex_unlock (&devicep->lock);
if (is_register_lock)
gomp_mutex_unlock (&register_lock);
gomp_fatal ("Cannot map target functions or variables"
" (expected %u, have %u)", num_funcs + num_vars,
num_target_entries);
}
/* Insert host-target address mapping into splay tree. */
struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
tgt->refcount = REFCOUNT_INFINITY;
tgt->tgt_start = 0;
tgt->tgt_end = 0;
tgt->to_free = NULL;
tgt->prev = NULL;
tgt->list_count = 0;
tgt->device_descr = devicep;
splay_tree_node array = tgt->array;
for (i = 0; i < num_funcs; i++)
{
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) host_func_table[i];
k->host_end = k->host_start + 1;
k->tgt = tgt;
k->tgt_offset = target_table[i].start;
k->refcount = REFCOUNT_INFINITY;
k->virtual_refcount = 0;
k->u.attach_count = NULL;
k->u.link_key = NULL;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
array++;
}
/* Most significant bit of the size in host and target tables marks
"omp declare target link" variables. */
const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
const uintptr_t size_mask = ~link_bit;
for (i = 0; i < num_vars; i++)
{
struct addr_pair *target_var = &target_table[num_funcs + i];
uintptr_t target_size = target_var->end - target_var->start;
if ((uintptr_t) host_var_table[i * 2 + 1] != target_size)
{
gomp_mutex_unlock (&devicep->lock);
if (is_register_lock)
gomp_mutex_unlock (&register_lock);
gomp_fatal ("Cannot map target variables (size mismatch)");
}
splay_tree_key k = &array->key;
k->host_start = (uintptr_t) host_var_table[i * 2];
k->host_end
= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
k->tgt = tgt;
k->tgt_offset = target_var->start;
k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY;
k->virtual_refcount = 0;
k->u.attach_count = NULL;
k->u.link_key = NULL;
array->left = NULL;
array->right = NULL;
splay_tree_insert (&devicep->mem_map, array);
array++;
}
free (target_table);
}
/* Unload the mappings described by target_data from device DEVICE_P.
The device must be locked. */
static void
gomp_unload_image_from_device (struct gomp_device_descr *devicep,
unsigned version,
const void *host_table, const void *target_data)
{
void **host_func_table = ((void ***) host_table)[0];
void **host_funcs_end = ((void ***) host_table)[1];
void **host_var_table = ((void ***) host_table)[2];
void **host_vars_end = ((void ***) host_table)[3];
/* The func table contains only addresses, the var table contains addresses
and corresponding sizes. */
int num_funcs = host_funcs_end - host_func_table;
int num_vars = (host_vars_end - host_var_table) / 2;
struct splay_tree_key_s k;
splay_tree_key node = NULL;
/* Find mapping at start of node array */
if (num_funcs || num_vars)
{
k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
: (uintptr_t) host_var_table[0]);
k.host_end = k.host_start + 1;
node = splay_tree_lookup (&devicep->mem_map, &k);
}
if (!devicep->unload_image_func (devicep->target_id, version, target_data))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("image unload fail");
}
/* Remove mappings from splay tree. */
int i;
for (i = 0; i < num_funcs; i++)
{
k.host_start = (uintptr_t) host_func_table[i];
k.host_end = k.host_start + 1;
splay_tree_remove (&devicep->mem_map, &k);
}
/* Most significant bit of the size in host and target tables marks
"omp declare target link" variables. */
const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
const uintptr_t size_mask = ~link_bit;
bool is_tgt_unmapped = false;
for (i = 0; i < num_vars; i++)
{
k.host_start = (uintptr_t) host_var_table[i * 2];
k.host_end
= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
splay_tree_remove (&devicep->mem_map, &k);
else
{
splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
is_tgt_unmapped = gomp_remove_var (devicep, n);
}
}
if (node && !is_tgt_unmapped)
{
free (node->tgt);
free (node);
}
}
/* This function should be called from every offload image while loading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin. */
void
GOMP_offload_register_ver (unsigned version, const void *host_table,
int target_type, const void *target_data)
{
int i;
if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
gomp_fatal ("Library too old for offload (version %u < %u)",
GOMP_VERSION, GOMP_VERSION_LIB (version));
gomp_mutex_lock (&register_lock);
/* Load image to all initialized devices. */
for (i = 0; i < num_devices; i++)
{
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->type == target_type
&& devicep->state == GOMP_DEVICE_INITIALIZED)
gomp_load_image_to_device (devicep, version,
host_table, target_data, true);
gomp_mutex_unlock (&devicep->lock);
}
/* Insert image to array of pending images. */
offload_images
= gomp_realloc_unlock (offload_images,
(num_offload_images + 1)
* sizeof (struct offload_image_descr));
offload_images[num_offload_images].version = version;
offload_images[num_offload_images].type = target_type;
offload_images[num_offload_images].host_table = host_table;
offload_images[num_offload_images].target_data = target_data;
num_offload_images++;
gomp_mutex_unlock (&register_lock);
}
void
GOMP_offload_register (const void *host_table, int target_type,
const void *target_data)
{
GOMP_offload_register_ver (0, host_table, target_type, target_data);
}
/* This function should be called from every offload image while unloading.
It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
the target, and TARGET_DATA needed by target plugin. */
void
GOMP_offload_unregister_ver (unsigned version, const void *host_table,
int target_type, const void *target_data)
{
int i;
gomp_mutex_lock (&register_lock);
/* Unload image from all initialized devices. */
for (i = 0; i < num_devices; i++)
{
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->type == target_type
&& devicep->state == GOMP_DEVICE_INITIALIZED)
gomp_unload_image_from_device (devicep, version,
host_table, target_data);
gomp_mutex_unlock (&devicep->lock);
}
/* Remove image from array of pending images. */
for (i = 0; i < num_offload_images; i++)
if (offload_images[i].target_data == target_data)
{
offload_images[i] = offload_images[--num_offload_images];
break;
}
gomp_mutex_unlock (&register_lock);
}
void
GOMP_offload_unregister (const void *host_table, int target_type,
const void *target_data)
{
GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
}
/* This function initializes the target device, specified by DEVICEP. DEVICEP
must be locked on entry, and remains locked on return. */
attribute_hidden void
gomp_init_device (struct gomp_device_descr *devicep)
{
gomp_debug (0, "%s (%s; %d; %d)\n", __FUNCTION__,
devicep->name, (int) devicep->type, devicep->target_id);
int i;
if (!devicep->init_device_func (devicep->target_id))
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("device initialization failed");
}
/* Load to device all images registered by the moment. */
for (i = 0; i < num_offload_images; i++)
{
struct offload_image_descr *image = &offload_images[i];
if (image->type == devicep->type)
gomp_load_image_to_device (devicep, image->version,
image->host_table, image->target_data,
false);
}
/* Initialize OpenACC asynchronous queues. */
goacc_init_asyncqueues (devicep);
devicep->state = GOMP_DEVICE_INITIALIZED;
}
/* This function finalizes the target device, specified by DEVICEP. DEVICEP
must be locked on entry, and remains locked on return. */
attribute_hidden bool
gomp_fini_device (struct gomp_device_descr *devicep)
{
bool ret = goacc_fini_asyncqueues (devicep);
ret &= devicep->fini_device_func (devicep->target_id);
devicep->state = GOMP_DEVICE_FINALIZED;
return ret;
}
attribute_hidden void
gomp_unload_device (struct gomp_device_descr *devicep)
{
if (devicep->state == GOMP_DEVICE_INITIALIZED)
{
unsigned i;
/* Unload from device all images registered at the moment. */
for (i = 0; i < num_offload_images; i++)
{
struct offload_image_descr *image = &offload_images[i];
if (image->type == devicep->type)
gomp_unload_image_from_device (devicep, image->version,
image->host_table,
image->target_data);
}
}
}
/* Do we have offload data available for the given offload target type?
Instead of verifying that *all* offload data is available that could
possibly be required, we instead just look for *any*. If we later find any
offload data missing, that's user error. */
attribute_hidden bool
gomp_offload_target_available_p (int type)
{
gomp_debug (0, "%s (%d)\n", __FUNCTION__, type);
bool available = false;
/* Has the offload target already been initialized? */
for (int i = 0; !available && i < num_devices; i++)
{
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->type == type
&& devicep->state == GOMP_DEVICE_INITIALIZED)
available = true;
gomp_mutex_unlock (&devicep->lock);
}
if (!available)
{
gomp_mutex_lock (&register_lock);
/* If there is no offload data available at all, we cannot later fail to
find any of it for a specific offload target. This is the case where
there are no offloaded code regions in user code, but there can still
be executable directives used, or runtime library calls made. */
if (num_offload_images == 0)
available = true;
/* Can the offload target be initialized? */
for (int i = 0; !available && i < num_offload_images; i++)
if (offload_images[i].type == type)
available = true;
gomp_mutex_unlock (&register_lock);
}
gomp_debug (0, " %s (%d): %d\n", __FUNCTION__, type, (int) available);
return available;
}
/* Host fallback for GOMP_target{,_ext} routines. */
static void
gomp_target_fallback (void (*fn) (void *), void **hostaddrs)
{
struct gomp_thread old_thr, *thr = gomp_thread ();
old_thr = *thr;
memset (thr, '\0', sizeof (*thr));
if (gomp_places_list)
{
thr->place = old_thr.place;
thr->ts.place_partition_len = gomp_places_list_len;
}
fn (hostaddrs);
gomp_free_thread (thr);
*thr = old_thr;
}
/* Calculate alignment and size requirements of a private copy of data shared
as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */
static inline void
calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
unsigned short *kinds, size_t *tgt_align,
size_t *tgt_size)
{
size_t i;
for (i = 0; i < mapnum; i++)
if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
{
size_t align = (size_t) 1 << (kinds[i] >> 8);
if (*tgt_align < align)
*tgt_align = align;
*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
*tgt_size += sizes[i];
}
}
/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */
static inline void
copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
size_t *sizes, unsigned short *kinds, size_t tgt_align,
size_t tgt_size)
{
uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
if (al)
tgt += tgt_align - al;
tgt_size = 0;
size_t i;
for (i = 0; i < mapnum; i++)
if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
{
size_t align = (size_t) 1 << (kinds[i] >> 8);
tgt_size = (tgt_size + align - 1) & ~(align - 1);
memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
hostaddrs[i] = tgt + tgt_size;
tgt_size = tgt_size + sizes[i];
}
}
/* Helper function of GOMP_target{,_ext} routines. */
static void *
gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
void (*host_fn) (void *))
{
if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
return (void *) host_fn;
else
{
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_FINALIZED)
{
gomp_mutex_unlock (&devicep->lock);
return NULL;
}
struct splay_tree_key_s k;
k.host_start = (uintptr_t) host_fn;
k.host_end = k.host_start + 1;
splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
gomp_mutex_unlock (&devicep->lock);
if (tgt_fn == NULL)
return NULL;
return (void *) tgt_fn->tgt_offset;
}
}
/* Called when encountering a target directive. If DEVICE
is GOMP_DEVICE_ICV, it means use device-var ICV. If it is
GOMP_DEVICE_HOST_FALLBACK (or any value
larger than last available hw device), use host fallback.
FN is address of host code, UNUSED is part of the current ABI, but
we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays
with MAPNUM entries, with addresses of the host objects,
sizes of the host objects (resp. for pointer kind pointer bias
and assumed sizeof (void *) size) and kinds. */
void
GOMP_target (int device, void (*fn) (void *), const void *unused,
size_t mapnum, void **hostaddrs, size_t *sizes,
unsigned char *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
void *fn_addr;
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
/* All shared memory devices should use the GOMP_target_ext function. */
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
return gomp_target_fallback (fn, hostaddrs);
struct target_mem_desc *tgt_vars
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
GOMP_MAP_VARS_TARGET);
devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
NULL);
gomp_unmap_vars (tgt_vars, true);
}
/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
and several arguments have been added:
FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
DEPEND is array of dependencies, see GOMP_task for details.
ARGS is a pointer to an array consisting of a variable number of both
device-independent and device-specific arguments, which can take one two
elements where the first specifies for which device it is intended, the type
and optionally also the value. If the value is not present in the first
one, the whole second element the actual value. The last element of the
array is a single NULL. Among the device independent can be for example
NUM_TEAMS and THREAD_LIMIT.
NUM_TEAMS is positive if GOMP_teams will be called in the body with
that value, or 1 if teams construct is not present, or 0, if
teams construct does not have num_teams clause and so the choice is
implementation defined, and -1 if it can't be determined on the host
what value will GOMP_teams have on the device.
THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
body with that value, or 0, if teams construct does not have thread_limit
clause or the teams construct is not present, or -1 if it can't be
determined on the host what value will GOMP_teams have on the device. */
void
GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned short *kinds,
unsigned int flags, void **depend, void **args)
{
struct gomp_device_descr *devicep = resolve_device (device);
size_t tgt_align = 0, tgt_size = 0;
bool fpc_done = false;
if (flags & GOMP_TARGET_FLAG_NOWAIT)
{
struct gomp_thread *thr = gomp_thread ();
/* Create a team if we don't have any around, as nowait
target tasks make sense to run asynchronously even when
outside of any parallel. */
if (__builtin_expect (thr->ts.team == NULL, 0))
{
struct gomp_team *team = gomp_new_team (1);
struct gomp_task *task = thr->task;
struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
team->prev_ts = thr->ts;
thr->ts.team = team;
thr->ts.team_id = 0;
thr->ts.work_share = &team->work_shares[0];
thr->ts.last_work_share = NULL;
#ifdef HAVE_SYNC_BUILTINS
thr->ts.single_count = 0;
#endif
thr->ts.static_trip = 0;
thr->task = &team->implicit_task[0];
gomp_init_task (thr->task, NULL, icv);
if (task)
{
thr->task = task;
gomp_end_task ();
free (task);
thr->task = &team->implicit_task[0];
}
else
pthread_setspecific (gomp_thread_destructor, thr);
}
if (thr->ts.team
&& !thr->task->final_task)
{
gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
sizes, kinds, flags, depend, args,
GOMP_TARGET_TASK_BEFORE_MAP);
return;
}
}
/* If there are depend clauses, but nowait is not present
(or we are in a final task), block the parent task until the
dependencies are resolved and then just continue with the rest
of the function as if it is a merged task. */
if (depend != NULL)
{
struct gomp_thread *thr = gomp_thread ();
if (thr->task && thr->task->depend_hash)
{
/* If we might need to wait, copy firstprivate now. */
calculate_firstprivate_requirements (mapnum, sizes, kinds,
&tgt_align, &tgt_size);
if (tgt_align)
{
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
tgt_align, tgt_size);
}
fpc_done = true;
gomp_task_maybe_wait_for_dependencies (depend);
}
}
void *fn_addr;
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|| !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
|| (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
{
if (!fpc_done)
{
calculate_firstprivate_requirements (mapnum, sizes, kinds,
&tgt_align, &tgt_size);
if (tgt_align)
{
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
tgt_align, tgt_size);
}
}
gomp_target_fallback (fn, hostaddrs);
return;
}
struct target_mem_desc *tgt_vars;
if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
{
if (!fpc_done)
{
calculate_firstprivate_requirements (mapnum, sizes, kinds,
&tgt_align, &tgt_size);
if (tgt_align)
{
char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
tgt_align, tgt_size);
}
}
tgt_vars = NULL;
}
else
tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
true, GOMP_MAP_VARS_TARGET);
devicep->run_func (devicep->target_id, fn_addr,
tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
args);
if (tgt_vars)
gomp_unmap_vars (tgt_vars, true);
}
/* Host fallback for GOMP_target_data{,_ext} routines. */
static void
gomp_target_data_fallback (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
if (icv->target_data)
{
/* Even when doing a host fallback, if there are any active
#pragma omp target data constructs, need to remember the
new #pragma omp target data, otherwise GOMP_target_end_data
would get out of sync. */
struct target_mem_desc *tgt
= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
GOMP_MAP_VARS_DATA);
tgt->prev = icv->target_data;
icv->target_data = tgt;
}
}
void
GOMP_target_data (int device, const void *unused, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned char *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|| (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
return gomp_target_data_fallback ();
struct target_mem_desc *tgt
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
GOMP_MAP_VARS_DATA);
struct gomp_task_icv *icv = gomp_icv (true);
tgt->prev = icv->target_data;
icv->target_data = tgt;
}
void
GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
size_t *sizes, unsigned short *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return gomp_target_data_fallback ();
struct target_mem_desc *tgt
= gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
GOMP_MAP_VARS_DATA);
struct gomp_task_icv *icv = gomp_icv (true);
tgt->prev = icv->target_data;
icv->target_data = tgt;
}
void
GOMP_target_end_data (void)
{
struct gomp_task_icv *icv = gomp_icv (false);
if (icv->target_data)
{
struct target_mem_desc *tgt = icv->target_data;
icv->target_data = tgt->prev;
gomp_unmap_vars (tgt, true);
}
}
void
GOMP_target_update (int device, const void *unused, size_t mapnum,
void **hostaddrs, size_t *sizes, unsigned char *kinds)
{
struct gomp_device_descr *devicep = resolve_device (device);
if (devicep == NULL
|| !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
|| devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
return;
gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
}
void
GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
size_t *sizes, unsigned short *kinds,
unsigned int flags, void **depend)
{
struct gomp_device_descr *devicep = resolve_device (device);
/* If there are depend clauses, but nowait is not present,