| /* Copyright (C) 2005-2024 Free Software Foundation, Inc. |
| Contributed by Richard Henderson <rth@redhat.com>. |
| |
| This file is part of the GNU Offloading and Multi Processing Library |
| (libgomp). |
| |
| Libgomp is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This file handles the LOOP (FOR/DO) construct. */ |
| |
| #include <limits.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include "libgomp.h" |
| |
| ialias (GOMP_loop_ull_runtime_next) |
| ialias_redirect (GOMP_taskgroup_reduction_register) |
| |
| typedef unsigned long long gomp_ull; |
| |
| /* Initialize the given work share construct from the given arguments. */ |
| |
| static inline void |
| gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start, |
| gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched, |
| gomp_ull chunk_size) |
| { |
| ws->sched = sched; |
| ws->chunk_size_ull = chunk_size; |
| /* Canonicalize loops that have zero iterations to ->next == ->end. */ |
| ws->end_ull = ((up && start > end) || (!up && start < end)) |
| ? start : end; |
| ws->incr_ull = incr; |
| ws->next_ull = start; |
| ws->mode = 0; |
| if (sched == GFS_DYNAMIC) |
| { |
| ws->chunk_size_ull *= incr; |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| { |
| /* For dynamic scheduling prepare things to make each iteration |
| faster. */ |
| struct gomp_thread *thr = gomp_thread (); |
| struct gomp_team *team = thr->ts.team; |
| long nthreads = team ? team->nthreads : 1; |
| |
| if (__builtin_expect (up, 1)) |
| { |
| /* Cheap overflow protection. */ |
| if (__builtin_expect ((nthreads | ws->chunk_size_ull) |
| < 1ULL << (sizeof (gomp_ull) |
| * __CHAR_BIT__ / 2 - 1), 1)) |
| ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1 |
| - (nthreads + 1) * ws->chunk_size_ull); |
| } |
| /* Cheap overflow protection. */ |
| else if (__builtin_expect ((nthreads | -ws->chunk_size_ull) |
| < 1ULL << (sizeof (gomp_ull) |
| * __CHAR_BIT__ / 2 - 1), 1)) |
| ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull |
| - (__LONG_LONG_MAX__ * 2ULL + 1)); |
| } |
| #endif |
| } |
| if (!up) |
| ws->mode |= 2; |
| } |
| |
| /* The *_start routines are called when first encountering a loop construct |
| that is not bound directly to a parallel construct. The first thread |
| that arrives will create the work-share construct; subsequent threads |
| will see the construct exists and allocate work from it. |
| |
| START, END, INCR are the bounds of the loop; due to the restrictions of |
| OpenMP, these values must be the same in every thread. This is not |
| verified (nor is it entirely verifiable, since START is not necessarily |
| retained intact in the work-share data structure). CHUNK_SIZE is the |
| scheduling parameter; again this must be identical in all threads. |
| |
| Returns true if there's any work for this thread to perform. If so, |
| *ISTART and *IEND are filled with the bounds of the iteration block |
| allocated to this thread. Returns false if all work was assigned to |
| other threads prior to this thread's arrival. */ |
| |
| static bool |
| gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| thr->ts.static_trip = 0; |
| if (gomp_work_share_start (0)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| GFS_STATIC, chunk_size); |
| gomp_work_share_init_done (); |
| } |
| |
| return !gomp_iter_ull_static_next (istart, iend); |
| } |
| |
| static bool |
| gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| if (gomp_work_share_start (0)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| GFS_DYNAMIC, chunk_size); |
| gomp_work_share_init_done (); |
| } |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| ret = gomp_iter_ull_dynamic_next (istart, iend); |
| #else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| #endif |
| |
| return ret; |
| } |
| |
| static bool |
| gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| if (gomp_work_share_start (0)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| GFS_GUIDED, chunk_size); |
| gomp_work_share_init_done (); |
| } |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| ret = gomp_iter_ull_guided_next (istart, iend); |
| #else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_guided_next_locked (istart, iend); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| #endif |
| |
| return ret; |
| } |
| |
| bool |
| GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_task_icv *icv = gomp_icv (false); |
| switch (icv->run_sched_var & ~GFS_MONOTONIC) |
| { |
| case GFS_STATIC: |
| return gomp_loop_ull_static_start (up, start, end, incr, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_DYNAMIC: |
| return gomp_loop_ull_dynamic_start (up, start, end, incr, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_GUIDED: |
| return gomp_loop_ull_guided_start (up, start, end, incr, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_AUTO: |
| /* For now map to schedule(static), later on we could play with feedback |
| driven choice. */ |
| return gomp_loop_ull_static_start (up, start, end, incr, |
| 0, istart, iend); |
| default: |
| abort (); |
| } |
| } |
| |
| static long |
| gomp_adjust_sched (long sched, gomp_ull *chunk_size) |
| { |
| sched &= ~GFS_MONOTONIC; |
| switch (sched) |
| { |
| case GFS_STATIC: |
| case GFS_DYNAMIC: |
| case GFS_GUIDED: |
| return sched; |
| /* GFS_RUNTIME is used for runtime schedule without monotonic |
| or nonmonotonic modifiers on the clause. |
| GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic |
| modifier. */ |
| case GFS_RUNTIME: |
| /* GFS_AUTO is used for runtime schedule with nonmonotonic |
| modifier. */ |
| case GFS_AUTO: |
| { |
| struct gomp_task_icv *icv = gomp_icv (false); |
| sched = icv->run_sched_var & ~GFS_MONOTONIC; |
| switch (sched) |
| { |
| case GFS_STATIC: |
| case GFS_DYNAMIC: |
| case GFS_GUIDED: |
| *chunk_size = icv->run_sched_chunk_size; |
| break; |
| case GFS_AUTO: |
| sched = GFS_STATIC; |
| *chunk_size = 0; |
| break; |
| default: |
| abort (); |
| } |
| return sched; |
| } |
| default: |
| abort (); |
| } |
| } |
| |
| bool |
| GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, long sched, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend, |
| uintptr_t *reductions, void **mem) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| thr->ts.static_trip = 0; |
| if (reductions) |
| gomp_workshare_taskgroup_start (); |
| if (gomp_work_share_start (0)) |
| { |
| sched = gomp_adjust_sched (sched, &chunk_size); |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| sched, chunk_size); |
| if (reductions) |
| { |
| GOMP_taskgroup_reduction_register (reductions); |
| thr->task->taskgroup->workshare = true; |
| thr->ts.work_share->task_reductions = reductions; |
| } |
| if (mem) |
| { |
| uintptr_t size = (uintptr_t) *mem; |
| #define INLINE_ORDERED_TEAM_IDS_OFF \ |
| ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \ |
| + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1)) |
| if (sizeof (struct gomp_work_share) |
| <= INLINE_ORDERED_TEAM_IDS_OFF |
| || __alignof__ (struct gomp_work_share) < __alignof__ (long long) |
| || size > (sizeof (struct gomp_work_share) |
| - INLINE_ORDERED_TEAM_IDS_OFF)) |
| *mem |
| = (void *) (thr->ts.work_share->ordered_team_ids |
| = gomp_malloc_cleared (size)); |
| else |
| *mem = memset (((char *) thr->ts.work_share) |
| + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size); |
| } |
| gomp_work_share_init_done (); |
| } |
| else |
| { |
| if (reductions) |
| { |
| uintptr_t *first_reductions = thr->ts.work_share->task_reductions; |
| gomp_workshare_task_reduction_register (reductions, |
| first_reductions); |
| } |
| if (mem) |
| { |
| if ((offsetof (struct gomp_work_share, inline_ordered_team_ids) |
| & (__alignof__ (long long) - 1)) == 0) |
| *mem = (void *) thr->ts.work_share->ordered_team_ids; |
| else |
| { |
| uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids; |
| p += __alignof__ (long long) - 1; |
| p &= ~(__alignof__ (long long) - 1); |
| *mem = (void *) p; |
| } |
| } |
| } |
| |
| return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); |
| } |
| |
| /* The *_ordered_*_start routines are similar. The only difference is that |
| this work-share construct is initialized to expect an ORDERED section. */ |
| |
| static bool |
| gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| thr->ts.static_trip = 0; |
| if (gomp_work_share_start (1)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| GFS_STATIC, chunk_size); |
| gomp_ordered_static_init (); |
| gomp_work_share_init_done (); |
| } |
| |
| return !gomp_iter_ull_static_next (istart, iend); |
| } |
| |
| static bool |
| gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| if (gomp_work_share_start (1)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| GFS_DYNAMIC, chunk_size); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| gomp_work_share_init_done (); |
| } |
| else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| |
| ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
| if (ret) |
| gomp_ordered_first (); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| |
| return ret; |
| } |
| |
| static bool |
| gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| if (gomp_work_share_start (1)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| GFS_GUIDED, chunk_size); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| gomp_work_share_init_done (); |
| } |
| else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| |
| ret = gomp_iter_ull_guided_next_locked (istart, iend); |
| if (ret) |
| gomp_ordered_first (); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| |
| return ret; |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| struct gomp_task_icv *icv = gomp_icv (false); |
| switch (icv->run_sched_var & ~GFS_MONOTONIC) |
| { |
| case GFS_STATIC: |
| return gomp_loop_ull_ordered_static_start (up, start, end, incr, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_DYNAMIC: |
| return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_GUIDED: |
| return gomp_loop_ull_ordered_guided_start (up, start, end, incr, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_AUTO: |
| /* For now map to schedule(static), later on we could play with feedback |
| driven choice. */ |
| return gomp_loop_ull_ordered_static_start (up, start, end, incr, |
| 0, istart, iend); |
| default: |
| abort (); |
| } |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, long sched, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend, |
| uintptr_t *reductions, void **mem) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| size_t ordered = 1; |
| bool ret; |
| |
| thr->ts.static_trip = 0; |
| if (reductions) |
| gomp_workshare_taskgroup_start (); |
| if (mem) |
| ordered += (uintptr_t) *mem; |
| if (gomp_work_share_start (ordered)) |
| { |
| sched = gomp_adjust_sched (sched, &chunk_size); |
| gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, |
| sched, chunk_size); |
| if (reductions) |
| { |
| GOMP_taskgroup_reduction_register (reductions); |
| thr->task->taskgroup->workshare = true; |
| thr->ts.work_share->task_reductions = reductions; |
| } |
| if (sched == GFS_STATIC) |
| gomp_ordered_static_init (); |
| else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| gomp_work_share_init_done (); |
| } |
| else |
| { |
| if (reductions) |
| { |
| uintptr_t *first_reductions = thr->ts.work_share->task_reductions; |
| gomp_workshare_task_reduction_register (reductions, |
| first_reductions); |
| } |
| sched = thr->ts.work_share->sched; |
| if (sched != GFS_STATIC) |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| } |
| |
| if (mem) |
| { |
| uintptr_t p |
| = (uintptr_t) (thr->ts.work_share->ordered_team_ids |
| + (thr->ts.team ? thr->ts.team->nthreads : 1)); |
| p += __alignof__ (long long) - 1; |
| p &= ~(__alignof__ (long long) - 1); |
| *mem = (void *) p; |
| } |
| |
| switch (sched) |
| { |
| case GFS_STATIC: |
| case GFS_AUTO: |
| return !gomp_iter_ull_static_next (istart, iend); |
| case GFS_DYNAMIC: |
| ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
| break; |
| case GFS_GUIDED: |
| ret = gomp_iter_ull_guided_next_locked (istart, iend); |
| break; |
| default: |
| abort (); |
| } |
| |
| if (ret) |
| gomp_ordered_first (); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| return ret; |
| } |
| |
| /* The *_doacross_*_start routines are similar. The only difference is that |
| this work-share construct is initialized to expect an ORDERED(N) - DOACROSS |
| section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 |
| and other COUNTS array elements tell the library number of iterations |
| in the ordered inner loops. */ |
| |
| static bool |
| gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull chunk_size, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| thr->ts.static_trip = 0; |
| if (gomp_work_share_start (0)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
| GFS_STATIC, chunk_size); |
| gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); |
| gomp_work_share_init_done (); |
| } |
| |
| return !gomp_iter_ull_static_next (istart, iend); |
| } |
| |
| static bool |
| gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull chunk_size, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| if (gomp_work_share_start (0)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
| GFS_DYNAMIC, chunk_size); |
| gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); |
| gomp_work_share_init_done (); |
| } |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| ret = gomp_iter_ull_dynamic_next (istart, iend); |
| #else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| #endif |
| |
| return ret; |
| } |
| |
| static bool |
| gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull chunk_size, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| if (gomp_work_share_start (0)) |
| { |
| gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
| GFS_GUIDED, chunk_size); |
| gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); |
| gomp_work_share_init_done (); |
| } |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| ret = gomp_iter_ull_guided_next (istart, iend); |
| #else |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_guided_next_locked (istart, iend); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| #endif |
| |
| return ret; |
| } |
| |
| bool |
| GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_task_icv *icv = gomp_icv (false); |
| switch (icv->run_sched_var & ~GFS_MONOTONIC) |
| { |
| case GFS_STATIC: |
| return gomp_loop_ull_doacross_static_start (ncounts, counts, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_DYNAMIC: |
| return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_GUIDED: |
| return gomp_loop_ull_doacross_guided_start (ncounts, counts, |
| icv->run_sched_chunk_size, |
| istart, iend); |
| case GFS_AUTO: |
| /* For now map to schedule(static), later on we could play with feedback |
| driven choice. */ |
| return gomp_loop_ull_doacross_static_start (ncounts, counts, |
| 0, istart, iend); |
| default: |
| abort (); |
| } |
| } |
| |
| bool |
| GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts, |
| long sched, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend, |
| uintptr_t *reductions, void **mem) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| thr->ts.static_trip = 0; |
| if (reductions) |
| gomp_workshare_taskgroup_start (); |
| if (gomp_work_share_start (0)) |
| { |
| size_t extra = 0; |
| if (mem) |
| extra = (uintptr_t) *mem; |
| sched = gomp_adjust_sched (sched, &chunk_size); |
| gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, |
| sched, chunk_size); |
| gomp_doacross_ull_init (ncounts, counts, chunk_size, extra); |
| if (reductions) |
| { |
| GOMP_taskgroup_reduction_register (reductions); |
| thr->task->taskgroup->workshare = true; |
| thr->ts.work_share->task_reductions = reductions; |
| } |
| gomp_work_share_init_done (); |
| } |
| else |
| { |
| if (reductions) |
| { |
| uintptr_t *first_reductions = thr->ts.work_share->task_reductions; |
| gomp_workshare_task_reduction_register (reductions, |
| first_reductions); |
| } |
| sched = thr->ts.work_share->sched; |
| } |
| |
| if (mem) |
| *mem = thr->ts.work_share->doacross->extra; |
| |
| return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); |
| } |
| |
| /* The *_next routines are called when the thread completes processing of |
| the iteration block currently assigned to it. If the work-share |
| construct is bound directly to a parallel construct, then the iteration |
| bounds may have been set up before the parallel. In which case, this |
| may be the first iteration for the thread. |
| |
| Returns true if there is work remaining to be performed; *ISTART and |
| *IEND are filled with a new iteration block. Returns false if all work |
| has been assigned. */ |
| |
| static bool |
| gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return !gomp_iter_ull_static_next (istart, iend); |
| } |
| |
| static bool |
| gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| bool ret; |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| ret = gomp_iter_ull_dynamic_next (istart, iend); |
| #else |
| struct gomp_thread *thr = gomp_thread (); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| #endif |
| |
| return ret; |
| } |
| |
| static bool |
| gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| bool ret; |
| |
| #if defined HAVE_SYNC_BUILTINS && defined __LP64__ |
| ret = gomp_iter_ull_guided_next (istart, iend); |
| #else |
| struct gomp_thread *thr = gomp_thread (); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_guided_next_locked (istart, iend); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| #endif |
| |
| return ret; |
| } |
| |
| bool |
| GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| switch (thr->ts.work_share->sched) |
| { |
| case GFS_STATIC: |
| case GFS_AUTO: |
| return gomp_loop_ull_static_next (istart, iend); |
| case GFS_DYNAMIC: |
| return gomp_loop_ull_dynamic_next (istart, iend); |
| case GFS_GUIDED: |
| return gomp_loop_ull_guided_next (istart, iend); |
| default: |
| abort (); |
| } |
| } |
| |
| /* The *_ordered_*_next routines are called when the thread completes |
| processing of the iteration block currently assigned to it. |
| |
| Returns true if there is work remaining to be performed; *ISTART and |
| *IEND are filled with a new iteration block. Returns false if all work |
| has been assigned. */ |
| |
| static bool |
| gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| int test; |
| |
| gomp_ordered_sync (); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| test = gomp_iter_ull_static_next (istart, iend); |
| if (test >= 0) |
| gomp_ordered_static_next (); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| |
| return test == 0; |
| } |
| |
| static bool |
| gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| gomp_ordered_sync (); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_dynamic_next_locked (istart, iend); |
| if (ret) |
| gomp_ordered_next (); |
| else |
| gomp_ordered_last (); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| |
| return ret; |
| } |
| |
| static bool |
| gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| bool ret; |
| |
| gomp_ordered_sync (); |
| gomp_mutex_lock (&thr->ts.work_share->lock); |
| ret = gomp_iter_ull_guided_next_locked (istart, iend); |
| if (ret) |
| gomp_ordered_next (); |
| else |
| gomp_ordered_last (); |
| gomp_mutex_unlock (&thr->ts.work_share->lock); |
| |
| return ret; |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| struct gomp_thread *thr = gomp_thread (); |
| |
| switch (thr->ts.work_share->sched) |
| { |
| case GFS_STATIC: |
| case GFS_AUTO: |
| return gomp_loop_ull_ordered_static_next (istart, iend); |
| case GFS_DYNAMIC: |
| return gomp_loop_ull_ordered_dynamic_next (istart, iend); |
| case GFS_GUIDED: |
| return gomp_loop_ull_ordered_guided_next (istart, iend); |
| default: |
| abort (); |
| } |
| } |
| |
| /* We use static functions above so that we're sure that the "runtime" |
| function can defer to the proper routine without interposition. We |
| export the static function with a strong alias when possible, or with |
| a wrapper function otherwise. */ |
| |
| #ifdef HAVE_ATTRIBUTE_ALIAS |
| extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start |
| __attribute__((alias ("gomp_loop_ull_static_start"))); |
| extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start |
| __attribute__((alias ("gomp_loop_ull_dynamic_start"))); |
| extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start |
| __attribute__((alias ("gomp_loop_ull_guided_start"))); |
| extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start |
| __attribute__((alias ("gomp_loop_ull_dynamic_start"))); |
| extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start |
| __attribute__((alias ("gomp_loop_ull_guided_start"))); |
| extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start |
| __attribute__((alias ("GOMP_loop_ull_runtime_start"))); |
| extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start |
| __attribute__((alias ("GOMP_loop_ull_runtime_start"))); |
| |
| extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start |
| __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); |
| extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start |
| __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start"))); |
| extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start |
| __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); |
| |
| extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start |
| __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); |
| extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start |
| __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); |
| extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start |
| __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); |
| |
| extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next |
| __attribute__((alias ("gomp_loop_ull_static_next"))); |
| extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next |
| __attribute__((alias ("gomp_loop_ull_dynamic_next"))); |
| extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next |
| __attribute__((alias ("gomp_loop_ull_guided_next"))); |
| extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next |
| __attribute__((alias ("gomp_loop_ull_dynamic_next"))); |
| extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next |
| __attribute__((alias ("gomp_loop_ull_guided_next"))); |
| extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next |
| __attribute__((alias ("GOMP_loop_ull_runtime_next"))); |
| extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next |
| __attribute__((alias ("GOMP_loop_ull_runtime_next"))); |
| |
| extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next |
| __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); |
| extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next |
| __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next"))); |
| extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next |
| __attribute__((alias ("gomp_loop_ull_ordered_guided_next"))); |
| #else |
| bool |
| GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart, |
| iend); |
| } |
| |
| bool |
| GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, |
| iend); |
| } |
| |
| bool |
| GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, |
| iend); |
| } |
| |
| bool |
| GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start, |
| gomp_ull end, gomp_ull incr, |
| gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, |
| iend); |
| } |
| |
| bool |
| GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, |
| iend); |
| } |
| |
| bool |
| GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start, |
| gomp_ull end, gomp_ull incr, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start, |
| gomp_ull end, gomp_ull incr, |
| gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size, |
| istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size, |
| istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, |
| gomp_ull incr, gomp_ull chunk_size, |
| gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size, |
| istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull chunk_size, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, |
| istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull chunk_size, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, |
| istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, |
| gomp_ull chunk_size, gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, |
| istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_static_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_dynamic_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_guided_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_dynamic_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_guided_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return GOMP_loop_ull_runtime_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart, |
| gomp_ull *iend) |
| { |
| return GOMP_loop_ull_runtime_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_ordered_static_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_ordered_dynamic_next (istart, iend); |
| } |
| |
| bool |
| GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) |
| { |
| return gomp_loop_ull_ordered_guided_next (istart, iend); |
| } |
| #endif |