| /* OpenACC Runtime initialization routines |
| |
| Copyright (C) 2013-2022 Free Software Foundation, Inc. |
| |
| Contributed by Mentor Embedded. |
| |
| This file is part of the GNU Offloading and Multi Processing Library |
| (libgomp). |
| |
| Libgomp is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "libgomp.h" |
| #include "oacc-int.h" |
| #include "openacc.h" |
| #include <assert.h> |
| #include <stdlib.h> |
| #include <strings.h> |
| #include <stdbool.h> |
| #include <string.h> |
| |
| /* This lock is used to protect access to cached_base_dev, dispatchers and |
| the (abstract) initialisation state of attached offloading devices. */ |
| |
| static gomp_mutex_t acc_device_lock; |
| |
| static gomp_mutex_t acc_init_state_lock; |
| static enum { uninitialized, initializing, initialized } acc_init_state |
| = uninitialized; |
| static pthread_t acc_init_thread; |
| |
| /* A cached version of the dispatcher for the global "current" accelerator type, |
| e.g. used as the default when creating new host threads. This is the |
| device-type equivalent of goacc_device_num (which specifies which device to |
| use out of potentially several of the same type). If there are several |
| devices of a given type, this points at the first one. */ |
| |
| static struct gomp_device_descr *cached_base_dev = NULL; |
| |
| #if defined HAVE_TLS || defined USE_EMUTLS |
| __thread struct goacc_thread *goacc_tls_data; |
| #else |
| pthread_key_t goacc_tls_key; |
| #endif |
| static pthread_key_t goacc_cleanup_key; |
| |
| static struct goacc_thread *goacc_threads; |
| static gomp_mutex_t goacc_thread_lock; |
| |
| /* An array of dispatchers for device types, indexed by the type. This array |
| only references "base" devices, and other instances of the same type are |
| found by simply indexing from each such device (which are stored linearly, |
| grouped by device in target.c:devices). */ |
| static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 }; |
| |
| attribute_hidden void |
| goacc_register (struct gomp_device_descr *disp) |
| { |
| /* Only register the 0th device here. */ |
| if (disp->target_id != 0) |
| return; |
| |
| gomp_mutex_lock (&acc_device_lock); |
| |
| assert (acc_device_type (disp->type) != acc_device_none |
| && acc_device_type (disp->type) != acc_device_default |
| && acc_device_type (disp->type) != acc_device_not_host); |
| assert (!dispatchers[disp->type]); |
| dispatchers[disp->type] = disp; |
| |
| gomp_mutex_unlock (&acc_device_lock); |
| } |
| |
| static bool |
| known_device_type_p (acc_device_t d) |
| { |
| return d >= 0 && d < _ACC_device_hwm; |
| } |
| |
| static void |
| unknown_device_type_error (acc_device_t invalid_type) |
| { |
| gomp_fatal ("unknown device type %u", invalid_type); |
| } |
| |
| /* OpenACC names some things a little differently. */ |
| |
| static const char * |
| get_openacc_name (const char *name) |
| { |
| if (strcmp (name, "gcn") == 0) |
| return "radeon"; |
| else if (strcmp (name, "nvptx") == 0) |
| return "nvidia"; |
| else |
| return name; |
| } |
| |
| static const char * |
| name_of_acc_device_t (enum acc_device_t type) |
| { |
| switch (type) |
| { |
| case acc_device_none: return "none"; |
| case acc_device_default: return "default"; |
| case acc_device_host: return "host"; |
| case acc_device_not_host: return "not_host"; |
| case acc_device_nvidia: return "nvidia"; |
| case acc_device_radeon: return "radeon"; |
| default: unknown_device_type_error (type); |
| } |
| __builtin_unreachable (); |
| } |
| |
| /* ACC_DEVICE_LOCK must be held before calling this function. If FAIL_IS_ERROR |
| is true, this function raises an error if there are no devices of type D, |
| otherwise it returns NULL in that case. */ |
| |
| static struct gomp_device_descr * |
| resolve_device (acc_device_t d, bool fail_is_error) |
| { |
| acc_device_t d_arg = d; |
| |
| switch (d) |
| { |
| case acc_device_default: |
| { |
| if (goacc_device_type) |
| { |
| /* Lookup the named device. */ |
| while (known_device_type_p (++d)) |
| if (dispatchers[d] |
| && !strcasecmp (goacc_device_type, |
| get_openacc_name (dispatchers[d]->name)) |
| && dispatchers[d]->get_num_devices_func (0) > 0) |
| goto found; |
| |
| if (fail_is_error) |
| { |
| gomp_mutex_unlock (&acc_device_lock); |
| gomp_fatal ("device type %s not supported", goacc_device_type); |
| } |
| else |
| return NULL; |
| } |
| |
| /* No default device specified, so start scanning for any non-host |
| device that is available. */ |
| d = acc_device_not_host; |
| } |
| /* FALLTHROUGH */ |
| |
| case acc_device_not_host: |
| /* Find the first available device after acc_device_not_host. */ |
| while (known_device_type_p (++d)) |
| if (dispatchers[d] && dispatchers[d]->get_num_devices_func (0) > 0) |
| goto found; |
| if (d_arg == acc_device_default) |
| { |
| d = acc_device_host; |
| goto found; |
| } |
| if (fail_is_error) |
| { |
| gomp_mutex_unlock (&acc_device_lock); |
| gomp_fatal ("no device found"); |
| } |
| else |
| return NULL; |
| break; |
| |
| case acc_device_host: |
| break; |
| |
| default: |
| if (!known_device_type_p (d)) |
| { |
| if (fail_is_error) |
| goto unsupported_device; |
| else |
| return NULL; |
| } |
| break; |
| } |
| found: |
| |
| assert (d != acc_device_none |
| && d != acc_device_default |
| && d != acc_device_not_host); |
| |
| if (dispatchers[d] == NULL && fail_is_error) |
| { |
| unsupported_device: |
| gomp_mutex_unlock (&acc_device_lock); |
| gomp_fatal ("device type %s not supported", name_of_acc_device_t (d)); |
| } |
| |
| return dispatchers[d]; |
| } |
| |
| /* Emit a suitable error if no device of a particular type is available, or |
| the given device number is out-of-range. */ |
| static void |
| acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs) |
| { |
| if (ndevs == 0) |
| gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d)); |
| else |
| gomp_fatal ("device %u out of range", ord); |
| } |
| |
| /* This is called when plugins have been initialized, and serves to call |
| (indirectly) the target's device_init hook. Calling multiple times without |
| an intervening acc_shutdown_1 call is an error. ACC_DEVICE_LOCK must be |
| held before calling this function. */ |
| |
| static struct gomp_device_descr * |
| acc_init_1 (acc_device_t d, acc_construct_t parent_construct, int implicit) |
| { |
| gomp_mutex_lock (&acc_init_state_lock); |
| acc_init_state = initializing; |
| acc_init_thread = pthread_self (); |
| gomp_mutex_unlock (&acc_init_state_lock); |
| |
| bool check_not_nested_p; |
| if (implicit) |
| { |
| /* In the implicit case, there should (TODO: must?) already be something |
| have been set up for an outer construct. */ |
| check_not_nested_p = false; |
| } |
| else |
| { |
| check_not_nested_p = true; |
| /* TODO: should we set 'thr->prof_info' etc. in this case ('acc_init')? |
| The problem is, that we don't have 'thr' yet? (So, |
| 'check_not_nested_p = true' also is pointless actually.) */ |
| } |
| bool profiling_p = GOACC_PROFILING_DISPATCH_P (check_not_nested_p); |
| |
| acc_prof_info prof_info; |
| if (profiling_p) |
| { |
| prof_info.event_type = acc_ev_device_init_start; |
| prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; |
| prof_info.version = _ACC_PROF_INFO_VERSION; |
| prof_info.device_type = d; |
| prof_info.device_number = goacc_device_num; |
| prof_info.thread_id = -1; |
| prof_info.async = acc_async_sync; |
| prof_info.async_queue = prof_info.async; |
| prof_info.src_file = NULL; |
| prof_info.func_name = NULL; |
| prof_info.line_no = -1; |
| prof_info.end_line_no = -1; |
| prof_info.func_line_no = -1; |
| prof_info.func_end_line_no = -1; |
| } |
| acc_event_info device_init_event_info; |
| if (profiling_p) |
| { |
| device_init_event_info.other_event.event_type = prof_info.event_type; |
| device_init_event_info.other_event.valid_bytes |
| = _ACC_OTHER_EVENT_INFO_VALID_BYTES; |
| device_init_event_info.other_event.parent_construct = parent_construct; |
| device_init_event_info.other_event.implicit = implicit; |
| device_init_event_info.other_event.tool_info = NULL; |
| } |
| acc_api_info api_info; |
| if (profiling_p) |
| { |
| api_info.device_api = acc_device_api_none; |
| api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; |
| api_info.device_type = prof_info.device_type; |
| api_info.vendor = -1; |
| api_info.device_handle = NULL; |
| api_info.context_handle = NULL; |
| api_info.async_handle = NULL; |
| } |
| |
| if (profiling_p) |
| goacc_profiling_dispatch (&prof_info, &device_init_event_info, &api_info); |
| |
| struct gomp_device_descr *base_dev, *acc_dev; |
| int ndevs; |
| |
| base_dev = resolve_device (d, true); |
| |
| ndevs = base_dev->get_num_devices_func (0); |
| |
| if (ndevs <= 0 || goacc_device_num >= ndevs) |
| acc_dev_num_out_of_range (d, goacc_device_num, ndevs); |
| |
| acc_dev = &base_dev[goacc_device_num]; |
| |
| gomp_mutex_lock (&acc_dev->lock); |
| if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
| { |
| gomp_mutex_unlock (&acc_dev->lock); |
| gomp_fatal ("device already active"); |
| } |
| |
| gomp_init_device (acc_dev); |
| gomp_mutex_unlock (&acc_dev->lock); |
| |
| if (profiling_p) |
| { |
| prof_info.event_type = acc_ev_device_init_end; |
| device_init_event_info.other_event.event_type = prof_info.event_type; |
| goacc_profiling_dispatch (&prof_info, &device_init_event_info, |
| &api_info); |
| } |
| |
| /* We're setting 'initialized' *after* 'goacc_profiling_dispatch', so that a |
| nested 'acc_get_device_type' called from a profiling callback still sees |
| 'initializing', so that we don't deadlock when it then again tries to lock |
| 'goacc_prof_lock'. See also the discussion in 'acc_get_device_type'. */ |
| gomp_mutex_lock (&acc_init_state_lock); |
| acc_init_state = initialized; |
| gomp_mutex_unlock (&acc_init_state_lock); |
| |
| return base_dev; |
| } |
| |
| /* ACC_DEVICE_LOCK must be held before calling this function. */ |
| |
| static void |
| acc_shutdown_1 (acc_device_t d) |
| { |
| struct gomp_device_descr *base_dev; |
| struct goacc_thread *walk; |
| int ndevs, i; |
| bool devices_active = false; |
| |
| /* Get the base device for this device type. */ |
| base_dev = resolve_device (d, true); |
| |
| ndevs = base_dev->get_num_devices_func (0); |
| |
| /* Unload all the devices of this type that have been opened. */ |
| for (i = 0; i < ndevs; i++) |
| { |
| struct gomp_device_descr *acc_dev = &base_dev[i]; |
| |
| gomp_mutex_lock (&acc_dev->lock); |
| gomp_unload_device (acc_dev); |
| gomp_mutex_unlock (&acc_dev->lock); |
| } |
| |
| gomp_mutex_lock (&goacc_thread_lock); |
| |
| /* Free target-specific TLS data and close all devices. */ |
| for (walk = goacc_threads; walk != NULL; walk = walk->next) |
| { |
| if (walk->target_tls) |
| base_dev->openacc.destroy_thread_data_func (walk->target_tls); |
| |
| walk->target_tls = NULL; |
| |
| /* This would mean the user is shutting down OpenACC in the middle of an |
| "acc data" pragma. Likely not intentional. */ |
| if (walk->mapped_data) |
| { |
| gomp_mutex_unlock (&goacc_thread_lock); |
| gomp_fatal ("shutdown in 'acc data' region"); |
| } |
| |
| /* Similarly, if this happens then user code has done something weird. */ |
| if (walk->saved_bound_dev) |
| { |
| gomp_mutex_unlock (&goacc_thread_lock); |
| gomp_fatal ("shutdown during host fallback"); |
| } |
| |
| if (walk->dev) |
| { |
| gomp_mutex_lock (&walk->dev->lock); |
| |
| while (walk->dev->mem_map.root) |
| { |
| splay_tree_key k = &walk->dev->mem_map.root->key; |
| if (k->aux) |
| k->aux->link_key = NULL; |
| gomp_remove_var (walk->dev, k); |
| } |
| |
| gomp_mutex_unlock (&walk->dev->lock); |
| |
| walk->dev = NULL; |
| walk->base_dev = NULL; |
| } |
| } |
| |
| gomp_mutex_unlock (&goacc_thread_lock); |
| |
| /* Close all the devices of this type that have been opened. */ |
| bool ret = true; |
| for (i = 0; i < ndevs; i++) |
| { |
| struct gomp_device_descr *acc_dev = &base_dev[i]; |
| gomp_mutex_lock (&acc_dev->lock); |
| if (acc_dev->state == GOMP_DEVICE_INITIALIZED) |
| { |
| devices_active = true; |
| ret &= gomp_fini_device (acc_dev); |
| acc_dev->state = GOMP_DEVICE_UNINITIALIZED; |
| } |
| gomp_mutex_unlock (&acc_dev->lock); |
| } |
| |
| if (!ret) |
| gomp_fatal ("device finalization failed"); |
| |
| if (!devices_active) |
| gomp_fatal ("no device initialized"); |
| } |
| |
| static struct goacc_thread * |
| goacc_new_thread (void) |
| { |
| struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread)); |
| |
| #if defined HAVE_TLS || defined USE_EMUTLS |
| goacc_tls_data = thr; |
| #else |
| pthread_setspecific (goacc_tls_key, thr); |
| #endif |
| |
| pthread_setspecific (goacc_cleanup_key, thr); |
| |
| gomp_mutex_lock (&goacc_thread_lock); |
| thr->next = goacc_threads; |
| goacc_threads = thr; |
| gomp_mutex_unlock (&goacc_thread_lock); |
| |
| return thr; |
| } |
| |
| static void |
| goacc_destroy_thread (void *data) |
| { |
| struct goacc_thread *thr = data, *walk, *prev; |
| |
| gomp_mutex_lock (&goacc_thread_lock); |
| |
| if (thr) |
| { |
| struct gomp_device_descr *acc_dev = thr->dev; |
| |
| if (acc_dev && thr->target_tls) |
| { |
| acc_dev->openacc.destroy_thread_data_func (thr->target_tls); |
| thr->target_tls = NULL; |
| } |
| |
| assert (!thr->mapped_data); |
| |
| /* Remove from thread list. */ |
| for (prev = NULL, walk = goacc_threads; walk; |
| prev = walk, walk = walk->next) |
| if (walk == thr) |
| { |
| if (prev == NULL) |
| goacc_threads = walk->next; |
| else |
| prev->next = walk->next; |
| |
| free (thr); |
| |
| break; |
| } |
| |
| assert (walk); |
| } |
| |
| gomp_mutex_unlock (&goacc_thread_lock); |
| } |
| |
| /* Use the ORD'th device instance for the current host thread (or -1 for the |
| current global default). The device (and the runtime) must be initialised |
| before calling this function. */ |
| |
| void |
| goacc_attach_host_thread_to_device (int ord) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL; |
| int num_devices; |
| |
| if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0)) |
| return; |
| |
| if (ord < 0) |
| ord = goacc_device_num; |
| |
| /* Decide which type of device to use. If the current thread has a device |
| type already (e.g. set by acc_set_device_type), use that, else use the |
| global default. */ |
| if (thr && thr->base_dev) |
| base_dev = thr->base_dev; |
| else |
| { |
| assert (cached_base_dev); |
| base_dev = cached_base_dev; |
| } |
| |
| num_devices = base_dev->get_num_devices_func (0); |
| if (num_devices <= 0 || ord >= num_devices) |
| acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord, |
| num_devices); |
| |
| if (!thr) |
| thr = goacc_new_thread (); |
| |
| thr->base_dev = base_dev; |
| thr->dev = acc_dev = &base_dev[ord]; |
| thr->saved_bound_dev = NULL; |
| thr->mapped_data = NULL; |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| /* Initially, all callbacks for all events are enabled. */ |
| thr->prof_callbacks_enabled = true; |
| |
| thr->target_tls |
| = acc_dev->openacc.create_thread_data_func (ord); |
| } |
| |
| /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of |
| init/shutdown is per-process or per-thread. We choose per-process. */ |
| |
| void |
| acc_init (acc_device_t d) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error (d); |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| cached_base_dev = acc_init_1 (d, acc_construct_runtime_api, 0); |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| goacc_attach_host_thread_to_device (-1); |
| } |
| |
| ialias (acc_init) |
| |
| void |
| acc_shutdown (acc_device_t d) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error (d); |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| |
| acc_shutdown_1 (d); |
| |
| gomp_mutex_unlock (&acc_device_lock); |
| } |
| |
| ialias (acc_shutdown) |
| |
| int |
| acc_get_num_devices (acc_device_t d) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error (d); |
| |
| int n = 0; |
| struct gomp_device_descr *acc_dev; |
| |
| if (d == acc_device_none) |
| return 0; |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| acc_dev = resolve_device (d, false); |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| if (!acc_dev) |
| return 0; |
| |
| n = acc_dev->get_num_devices_func (0); |
| if (n < 0) |
| n = 0; |
| |
| return n; |
| } |
| |
| ialias (acc_get_num_devices) |
| |
| /* Set the device type for the current thread only (using the current global |
| default device number), initialising that device if necessary. Also set the |
| default device type for new threads to D. */ |
| |
| void |
| acc_set_device_type (acc_device_t d) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error (d); |
| |
| struct gomp_device_descr *base_dev, *acc_dev; |
| struct goacc_thread *thr = goacc_thread (); |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| prof_info.device_type = d; |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| |
| cached_base_dev = base_dev = resolve_device (d, true); |
| acc_dev = &base_dev[goacc_device_num]; |
| |
| gomp_mutex_lock (&acc_dev->lock); |
| if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
| gomp_init_device (acc_dev); |
| gomp_mutex_unlock (&acc_dev->lock); |
| |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| /* We're changing device type: invalidate the current thread's dev and |
| base_dev pointers. */ |
| if (thr && thr->base_dev != base_dev) |
| { |
| thr->base_dev = thr->dev = NULL; |
| if (thr->mapped_data) |
| gomp_fatal ("acc_set_device_type in 'acc data' region"); |
| } |
| |
| goacc_attach_host_thread_to_device (-1); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| } |
| |
| ialias (acc_set_device_type) |
| |
| static bool |
| self_initializing_p (void) |
| { |
| bool res; |
| gomp_mutex_lock (&acc_init_state_lock); |
| res = (acc_init_state == initializing |
| && pthread_equal (acc_init_thread, pthread_self ())); |
| gomp_mutex_unlock (&acc_init_state_lock); |
| return res; |
| } |
| |
| acc_device_t |
| acc_get_device_type (void) |
| { |
| acc_device_t res = acc_device_none; |
| struct gomp_device_descr *dev; |
| struct goacc_thread *thr = goacc_thread (); |
| |
| if (thr && thr->base_dev) |
| res = acc_device_type (thr->base_dev->type); |
| else if (self_initializing_p ()) |
| /* The Cuda libaccinj64.so version 9.0+ calls acc_get_device_type during the |
| acc_ev_device_init_start event callback, which is dispatched during |
| acc_init_1. Trying to lock acc_device_lock during such a call (as we do |
| in the else clause below), will result in deadlock, since the lock has |
| already been taken by the acc_init_1 caller. We work around this problem |
| by using the acc_get_device_type property "If the device type has not yet |
| been selected, the value acc_device_none may be returned". */ |
| ; |
| else |
| { |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| dev = resolve_device (acc_device_default, true); |
| gomp_mutex_unlock (&acc_device_lock); |
| res = acc_device_type (dev->type); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| } |
| |
| assert (res != acc_device_default |
| && res != acc_device_not_host |
| && res != acc_device_current); |
| |
| return res; |
| } |
| |
| ialias (acc_get_device_type) |
| |
| int |
| acc_get_device_num (acc_device_t d) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error (d); |
| |
| const struct gomp_device_descr *dev; |
| struct goacc_thread *thr = goacc_thread (); |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| prof_info.device_type = d; |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| dev = resolve_device (d, true); |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| |
| if (thr && thr->base_dev == dev && thr->dev) |
| return thr->dev->target_id; |
| |
| return goacc_device_num; |
| } |
| |
| ialias (acc_get_device_num) |
| |
| void |
| acc_set_device_num (int ord, acc_device_t d) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error (d); |
| |
| struct gomp_device_descr *base_dev, *acc_dev; |
| int num_devices; |
| |
| gomp_init_targets_once (); |
| |
| if (ord < 0) |
| ord = goacc_device_num; |
| |
| if ((int) d == 0) |
| /* Set whatever device is being used by the current host thread to use |
| device instance ORD. It's unclear if this is supposed to affect other |
| host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num). */ |
| goacc_attach_host_thread_to_device (ord); |
| else |
| { |
| gomp_mutex_lock (&acc_device_lock); |
| |
| cached_base_dev = base_dev = resolve_device (d, true); |
| |
| num_devices = base_dev->get_num_devices_func (0); |
| |
| if (num_devices <= 0 || ord >= num_devices) |
| acc_dev_num_out_of_range (d, ord, num_devices); |
| |
| acc_dev = &base_dev[ord]; |
| |
| gomp_mutex_lock (&acc_dev->lock); |
| if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED) |
| gomp_init_device (acc_dev); |
| gomp_mutex_unlock (&acc_dev->lock); |
| |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| goacc_attach_host_thread_to_device (ord); |
| } |
| |
| goacc_device_num = ord; |
| } |
| |
| ialias (acc_set_device_num) |
| |
| static union goacc_property_value |
| get_property_any (int ord, acc_device_t d, acc_device_property_t prop) |
| { |
| goacc_lazy_initialize (); |
| struct goacc_thread *thr = goacc_thread (); |
| |
| if (d == acc_device_current && thr && thr->dev) |
| return thr->dev->openacc.get_property_func (thr->dev->target_id, prop); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| |
| struct gomp_device_descr *dev = resolve_device (d, true); |
| |
| int num_devices = dev->get_num_devices_func (0); |
| |
| if (num_devices <= 0 || ord >= num_devices) |
| acc_dev_num_out_of_range (d, ord, num_devices); |
| |
| dev += ord; |
| |
| gomp_mutex_lock (&dev->lock); |
| if (dev->state == GOMP_DEVICE_UNINITIALIZED) |
| gomp_init_device (dev); |
| gomp_mutex_unlock (&dev->lock); |
| |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| assert (dev); |
| |
| return dev->openacc.get_property_func (dev->target_id, prop); |
| } |
| |
| size_t |
| acc_get_property (int ord, acc_device_t d, acc_device_property_t prop) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error(d); |
| |
| if (prop & GOACC_PROPERTY_STRING_MASK) |
| return 0; |
| else |
| return get_property_any (ord, d, prop).val; |
| } |
| |
| ialias (acc_get_property) |
| |
| const char * |
| acc_get_property_string (int ord, acc_device_t d, acc_device_property_t prop) |
| { |
| if (!known_device_type_p (d)) |
| unknown_device_type_error(d); |
| |
| if (prop & GOACC_PROPERTY_STRING_MASK) |
| return get_property_any (ord, d, prop).ptr; |
| else |
| return NULL; |
| } |
| |
| ialias (acc_get_property_string) |
| |
| /* For -O and higher, the compiler always attempts to expand acc_on_device, but |
| if the user disables the builtin, or calls it via a pointer, we'll need this |
| version. |
| |
| Compile this with optimization, so that the compiler expands |
| this, rather than generating infinitely recursive code. |
| |
| The function just forwards its argument to __builtin_acc_on_device. It does |
| not verify that the argument is a valid acc_device_t enumeration value. */ |
| |
| int __attribute__ ((__optimize__ ("O2"))) |
| acc_on_device (acc_device_t dev) |
| { |
| return __builtin_acc_on_device (dev); |
| } |
| |
| ialias (acc_on_device) |
| |
| attribute_hidden void |
| goacc_runtime_initialize (void) |
| { |
| gomp_mutex_init (&acc_device_lock); |
| |
| #if !(defined HAVE_TLS || defined USE_EMUTLS) |
| pthread_key_create (&goacc_tls_key, NULL); |
| #endif |
| |
| pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread); |
| |
| cached_base_dev = NULL; |
| |
| goacc_threads = NULL; |
| gomp_mutex_init (&goacc_thread_lock); |
| |
| /* Initialize and register the 'host' device type. */ |
| goacc_host_init (); |
| } |
| |
| static void __attribute__((destructor)) |
| goacc_runtime_deinitialize (void) |
| { |
| #if !(defined HAVE_TLS || defined USE_EMUTLS) |
| pthread_key_delete (goacc_tls_key); |
| #endif |
| pthread_key_delete (goacc_cleanup_key); |
| } |
| |
| /* Compiler helper functions */ |
| |
| attribute_hidden void |
| goacc_save_and_set_bind (acc_device_t d) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| |
| assert (!thr->saved_bound_dev); |
| |
| thr->saved_bound_dev = thr->dev; |
| thr->dev = dispatchers[d]; |
| } |
| |
| attribute_hidden void |
| goacc_restore_bind (void) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| |
| thr->dev = thr->saved_bound_dev; |
| thr->saved_bound_dev = NULL; |
| } |
| |
| /* This is called from any OpenACC support function that may need to implicitly |
| initialize the libgomp runtime, either globally or from a new host thread. |
| On exit "goacc_thread" will return a valid & populated thread block. */ |
| |
| attribute_hidden void |
| goacc_lazy_initialize (void) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| |
| if (thr && thr->dev) |
| return; |
| |
| gomp_init_targets_once (); |
| |
| gomp_mutex_lock (&acc_device_lock); |
| if (!cached_base_dev) |
| cached_base_dev = acc_init_1 (acc_device_default, |
| acc_construct_parallel, 1); |
| gomp_mutex_unlock (&acc_device_lock); |
| |
| goacc_attach_host_thread_to_device (-1); |
| } |