| /* OpenACC Runtime Library Definitions. |
| |
| Copyright (C) 2013-2022 Free Software Foundation, Inc. |
| |
| Contributed by Mentor Embedded. |
| |
| This file is part of the GNU Offloading and Multi Processing Library |
| (libgomp). |
| |
| Libgomp is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include <assert.h> |
| #include <string.h> |
| #include "openacc.h" |
| #include "libgomp.h" |
| #include "oacc-int.h" |
| |
| static struct goacc_thread * |
| get_goacc_thread (void) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| |
| if (!thr || !thr->dev) |
| gomp_fatal ("no device active"); |
| |
| return thr; |
| } |
| |
| static int |
| validate_async_val (int async) |
| { |
| if (!async_valid_p (async)) |
| gomp_fatal ("invalid async-argument: %d", async); |
| |
| if (async == acc_async_sync) |
| return -1; |
| |
| if (async == acc_async_noval) |
| return 0; |
| |
| if (async >= 0) |
| /* TODO: we reserve 0 for acc_async_noval before we can clarify the |
| semantics of "default_async". */ |
| return 1 + async; |
| else |
| __builtin_unreachable (); |
| } |
| |
| /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This |
| might return NULL if no asyncqueue is to be used. Otherwise, if CREATE, |
| create the asyncqueue if it doesn't exist yet. |
| |
| Unless CREATE, this will not generate any OpenACC Profiling Interface |
| events. */ |
| |
| attribute_hidden struct goacc_asyncqueue * |
| lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async) |
| { |
| async = validate_async_val (async); |
| if (async < 0) |
| return NULL; |
| |
| struct goacc_asyncqueue *ret_aq = NULL; |
| struct gomp_device_descr *dev = thr->dev; |
| |
| gomp_mutex_lock (&dev->openacc.async.lock); |
| |
| if (!create |
| && (async >= dev->openacc.async.nasyncqueue |
| || !dev->openacc.async.asyncqueue[async])) |
| goto end; |
| |
| if (async >= dev->openacc.async.nasyncqueue) |
| { |
| int diff = async + 1 - dev->openacc.async.nasyncqueue; |
| dev->openacc.async.asyncqueue |
| = gomp_realloc (dev->openacc.async.asyncqueue, |
| sizeof (goacc_aq) * (async + 1)); |
| memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue, |
| 0, sizeof (goacc_aq) * diff); |
| dev->openacc.async.nasyncqueue = async + 1; |
| } |
| |
| if (!dev->openacc.async.asyncqueue[async]) |
| { |
| dev->openacc.async.asyncqueue[async] |
| = dev->openacc.async.construct_func (dev->target_id); |
| |
| if (!dev->openacc.async.asyncqueue[async]) |
| { |
| gomp_mutex_unlock (&dev->openacc.async.lock); |
| gomp_fatal ("async %d creation failed", async); |
| } |
| |
| /* Link new async queue into active list. */ |
| goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list)); |
| n->aq = dev->openacc.async.asyncqueue[async]; |
| n->next = dev->openacc.async.active; |
| dev->openacc.async.active = n; |
| } |
| |
| ret_aq = dev->openacc.async.asyncqueue[async]; |
| |
| end: |
| gomp_mutex_unlock (&dev->openacc.async.lock); |
| return ret_aq; |
| } |
| |
| /* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This |
| might return NULL if no asyncqueue is to be used. Otherwise, create the |
| asyncqueue if it doesn't exist yet. */ |
| |
| attribute_hidden struct goacc_asyncqueue * |
| get_goacc_asyncqueue (int async) |
| { |
| struct goacc_thread *thr = get_goacc_thread (); |
| return lookup_goacc_asyncqueue (thr, true, async); |
| } |
| |
| int |
| acc_async_test (int async) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| |
| if (!thr || !thr->dev) |
| gomp_fatal ("no device active"); |
| |
| goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); |
| if (!aq) |
| return 1; |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| { |
| prof_info.async = async; |
| prof_info.async_queue = prof_info.async; |
| } |
| |
| int res = thr->dev->openacc.async.test_func (aq); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| |
| return res; |
| } |
| |
| int |
| acc_async_test_all (void) |
| { |
| struct goacc_thread *thr = get_goacc_thread (); |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| |
| int ret = 1; |
| gomp_mutex_lock (&thr->dev->openacc.async.lock); |
| for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) |
| if (!thr->dev->openacc.async.test_func (l->aq)) |
| { |
| ret = 0; |
| break; |
| } |
| gomp_mutex_unlock (&thr->dev->openacc.async.lock); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| |
| return ret; |
| } |
| |
| void |
| acc_wait (int async) |
| { |
| struct goacc_thread *thr = get_goacc_thread (); |
| |
| goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async); |
| if (!aq) |
| return; |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| { |
| prof_info.async = async; |
| prof_info.async_queue = prof_info.async; |
| } |
| |
| if (!thr->dev->openacc.async.synchronize_func (aq)) |
| gomp_fatal ("wait on %d failed", async); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| } |
| |
| /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */ |
| #ifdef HAVE_ATTRIBUTE_ALIAS |
| strong_alias (acc_wait, acc_async_wait) |
| #else |
| void |
| acc_async_wait (int async) |
| { |
| acc_wait (async); |
| } |
| #endif |
| |
| void |
| acc_wait_async (int async1, int async2) |
| { |
| struct goacc_thread *thr = get_goacc_thread (); |
| |
| goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1); |
| /* TODO: Is this also correct for acc_async_sync, assuming that in this case, |
| we'll always be synchronous anyways? */ |
| if (!aq1) |
| return; |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| { |
| prof_info.async = async2; |
| prof_info.async_queue = prof_info.async; |
| } |
| |
| goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2); |
| /* An async queue is always synchronized with itself. */ |
| if (aq1 == aq2) |
| goto out_prof; |
| |
| if (aq2) |
| { |
| if (!thr->dev->openacc.async.serialize_func (aq1, aq2)) |
| gomp_fatal ("ordering of async ids %d and %d failed", async1, async2); |
| } |
| else |
| { |
| /* TODO: Local thread synchronization. |
| Necessary for the "async2 == acc_async_sync" case, or can just skip? */ |
| if (!thr->dev->openacc.async.synchronize_func (aq1)) |
| gomp_fatal ("wait on %d failed", async1); |
| } |
| |
| out_prof: |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| } |
| |
| void |
| acc_wait_all (void) |
| { |
| struct goacc_thread *thr = goacc_thread (); |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| |
| bool ret = true; |
| gomp_mutex_lock (&thr->dev->openacc.async.lock); |
| for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) |
| ret &= thr->dev->openacc.async.synchronize_func (l->aq); |
| gomp_mutex_unlock (&thr->dev->openacc.async.lock); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| |
| if (!ret) |
| gomp_fatal ("wait all failed"); |
| } |
| |
| /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */ |
| #ifdef HAVE_ATTRIBUTE_ALIAS |
| strong_alias (acc_wait_all, acc_async_wait_all) |
| #else |
| void |
| acc_async_wait_all (void) |
| { |
| acc_wait_all (); |
| } |
| #endif |
| |
| void |
| acc_wait_all_async (int async) |
| { |
| struct goacc_thread *thr = get_goacc_thread (); |
| |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| { |
| prof_info.async = async; |
| prof_info.async_queue = prof_info.async; |
| } |
| |
| goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async); |
| |
| bool ret = true; |
| gomp_mutex_lock (&thr->dev->openacc.async.lock); |
| for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next) |
| { |
| if (waiting_queue) |
| ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue); |
| else |
| /* TODO: Local thread synchronization. |
| Necessary for the "async2 == acc_async_sync" case, or can just skip? */ |
| ret &= thr->dev->openacc.async.synchronize_func (l->aq); |
| } |
| gomp_mutex_unlock (&thr->dev->openacc.async.lock); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| |
| if (!ret) |
| gomp_fatal ("wait all async(%d) failed", async); |
| } |
| |
| void |
| GOACC_wait (int async, int num_waits, ...) |
| { |
| goacc_lazy_initialize (); |
| |
| struct goacc_thread *thr = goacc_thread (); |
| |
| /* No nesting. */ |
| assert (thr->prof_info == NULL); |
| assert (thr->api_info == NULL); |
| acc_prof_info prof_info; |
| acc_api_info api_info; |
| bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); |
| if (profiling_p) |
| { |
| prof_info.async = async; |
| prof_info.async_queue = prof_info.async; |
| } |
| |
| if (num_waits) |
| { |
| va_list ap; |
| |
| va_start (ap, num_waits); |
| goacc_wait (async, num_waits, &ap); |
| va_end (ap); |
| } |
| else if (async == acc_async_sync) |
| acc_wait_all (); |
| else |
| acc_wait_all_async (async); |
| |
| if (profiling_p) |
| { |
| thr->prof_info = NULL; |
| thr->api_info = NULL; |
| } |
| } |
| |
| attribute_hidden void |
| goacc_wait (int async, int num_waits, va_list *ap) |
| { |
| while (num_waits--) |
| { |
| int qid = va_arg (*ap, int); |
| |
| /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'. */ |
| if (qid == acc_async_noval) |
| { |
| if (async == acc_async_sync) |
| acc_wait_all (); |
| else |
| acc_wait_all_async (async); |
| break; |
| } |
| |
| if (acc_async_test (qid)) |
| continue; |
| |
| if (async == acc_async_sync) |
| acc_wait (qid); |
| else if (qid == async) |
| /* If we're waiting on the same asynchronous queue as we're |
| launching on, the queue itself will order work as |
| required, so there's no need to wait explicitly. */ |
| ; |
| else |
| acc_wait_async (qid, async); |
| } |
| } |
| |
| attribute_hidden void |
| goacc_async_free (struct gomp_device_descr *devicep, |
| struct goacc_asyncqueue *aq, void *ptr) |
| { |
| if (!aq) |
| free (ptr); |
| else |
| devicep->openacc.async.queue_callback_func (aq, free, ptr); |
| } |
| |
| /* This function initializes the asyncqueues for the device specified by |
| DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on |
| return. */ |
| |
| attribute_hidden void |
| goacc_init_asyncqueues (struct gomp_device_descr *devicep) |
| { |
| devicep->openacc.async.nasyncqueue = 0; |
| devicep->openacc.async.asyncqueue = NULL; |
| devicep->openacc.async.active = NULL; |
| gomp_mutex_init (&devicep->openacc.async.lock); |
| } |
| |
| /* This function finalizes the asyncqueues for the device specified by DEVICEP. |
| TODO DEVICEP must be locked on entry, and remains locked on return. */ |
| |
| attribute_hidden bool |
| goacc_fini_asyncqueues (struct gomp_device_descr *devicep) |
| { |
| bool ret = true; |
| gomp_mutex_lock (&devicep->openacc.async.lock); |
| if (devicep->openacc.async.nasyncqueue > 0) |
| { |
| goacc_aq_list next; |
| for (goacc_aq_list l = devicep->openacc.async.active; l; l = next) |
| { |
| ret &= devicep->openacc.async.destruct_func (l->aq); |
| next = l->next; |
| free (l); |
| } |
| free (devicep->openacc.async.asyncqueue); |
| devicep->openacc.async.nasyncqueue = 0; |
| devicep->openacc.async.asyncqueue = NULL; |
| devicep->openacc.async.active = NULL; |
| } |
| gomp_mutex_unlock (&devicep->openacc.async.lock); |
| gomp_mutex_destroy (&devicep->openacc.async.lock); |
| return ret; |
| } |