| /* Copyright (C) 2019-2020 Free Software Foundation, Inc. |
| Contributed by Nicolas Koenig |
| |
| This file is part of the GNU Fortran Native Coarray Library (libnca). |
| |
| Libnca is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| Libnca is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "libgfortran.h" |
| #include "libcoarraynative.h" |
| #include "allocator.h" |
| #include "hashmap.h" |
| #include "lock.h" |
| #include "collective_subroutine.h" |
| |
| #include <unistd.h> |
| #include <sys/mman.h> |
| #include <sys/wait.h> |
| #include <string.h> |
| |
| #define GFORTRAN_ENV_NUM_IMAGES "GFORTRAN_NUM_IMAGES" |
| #define GFORTRAN_ENV_SHARED_MEMORY_SIZE "GFORTRAN_SHARED_MEMORY_SIZE" |
| |
| nca_local_data *local = NULL; |
| |
| image this_image = { -1, NULL }; |
| |
| /* Get image number from environment or sysconf. */ |
| |
| static int |
| get_environ_image_num (void) |
| { |
| char *num_images_char; |
| int nimages; |
| num_images_char = getenv (GFORTRAN_ENV_NUM_IMAGES); |
| if (!num_images_char) |
| return sysconf (_SC_NPROCESSORS_ONLN); /* TODO: Make portable. */ |
| /* TODO: Error checking. */ |
| nimages = atoi (num_images_char); |
| return nimages; |
| } |
| |
| /* Get the amount of memory for the shared memory block. This is picked from |
| an environment variable. If that is not there, pick a reasonable default. |
| Note that on a 64-bit system which allows overcommit, there is no penalty in |
| reserving a large space and then not using it. */ |
| |
| static size_t |
| get_memory_size (void) |
| { |
| char *e; |
| size_t sz = 0; |
| e = getenv (GFORTRAN_ENV_SHARED_MEMORY_SIZE); |
| if (e) |
| { |
| char *num, suffix; |
| int rv; |
| rv = sscanf (e, "%zu%1s",&sz, &suffix); |
| if (rv == 2) |
| { |
| switch (suffix) |
| { |
| case 'k': |
| case 'K': |
| sz *= ((size_t) 1) << 10; |
| break; |
| case 'm': |
| case 'M': |
| sz *= ((size_t) 1) << 20; |
| break; |
| case 'g': |
| case 'G': |
| sz *= ((size_t) 1) << 30; |
| break; |
| default: |
| sz = 0; |
| } |
| } |
| } |
| if (sz == 0) |
| { |
| /* Use 256 MB for 32-bit systems and 4 GB for 64-bit systems. */ |
| if (sizeof (size_t) == 4) |
| sz = ((size_t) 1) << 28; |
| else |
| sz = ((size_t) 1) << 34; |
| } |
| return sz; |
| } |
| |
| /* Get a master. */ |
| |
| static master * |
| get_master (void) |
| { |
| master *m; |
| m = SHMPTR_AS ( |
| master *, |
| shared_memory_get_mem_with_alignment ( |
| &local->sm, |
| sizeof (master) + sizeof (image_tracker) * local->total_num_images, |
| __alignof__(master)), |
| &local->sm); |
| m->has_failed_image = 0; |
| m->finished_images = 0; |
| waitable_counter_init (&m->num_active_images, local->total_num_images); |
| return m; |
| } |
| |
| /* Ensure things are initialized. */ |
| |
| void |
| ensure_initialization (void) |
| { |
| size_t shmem_size; |
| |
| if (local) |
| return; |
| |
| local = malloc (sizeof (nca_local_data)); // Is malloc already init'ed at |
| // that point? Maybe use |
| // mmap(MAP_ANON) instead |
| pagesize = sysconf (_SC_PAGE_SIZE); |
| shmem_size = round_to_pagesize (get_memory_size()); |
| local->total_num_images = get_environ_image_num (); |
| shared_memory_init (&local->sm, shmem_size); |
| shared_memory_prepare (&local->sm); |
| if (this_image.m == NULL) /* A bit of a hack, but we |
| need the master early. */ |
| this_image.m = get_master (); |
| alloc_iface_init (&local->ai, &local->sm); |
| collsub_iface_init (&local->ci, &local->ai, &local->sm); |
| sync_iface_init (&local->si, &local->ai, &local->sm); |
| } |
| |
| /* Test for failed or stopped images. */ |
| |
| int |
| test_for_cas_errors (int *stat, char *errmsg, size_t errmsg_length) |
| { |
| size_t errmsg_written_bytes; |
| |
| /* This rather strange ordering is mandated by the standard. */ |
| if (this_image.m->finished_images) |
| { |
| if (stat) |
| { |
| *stat = CAS_STAT_STOPPED_IMAGE; |
| if (errmsg) |
| { |
| errmsg_written_bytes |
| = snprintf (errmsg, errmsg_length, |
| "Stopped images present (currently %d)", |
| this_image.m->finished_images); |
| if (errmsg_written_bytes > errmsg_length - 1) |
| errmsg_written_bytes = errmsg_length - 1; |
| |
| memset (errmsg + errmsg_written_bytes, ' ', |
| errmsg_length - errmsg_written_bytes); |
| } |
| } |
| else |
| { |
| fprintf (stderr, "Stopped images present (currently %d)", |
| this_image.m->finished_images); |
| exit(1); |
| } |
| } |
| else if (this_image.m->has_failed_image) |
| { |
| if (stat) |
| { |
| *stat = CAS_STAT_FAILED_IMAGE; |
| if (errmsg) |
| { |
| errmsg_written_bytes |
| = snprintf (errmsg, errmsg_length, |
| "Failed images present (currently %d)", |
| this_image.m->has_failed_image); |
| if (errmsg_written_bytes > errmsg_length - 1) |
| errmsg_written_bytes = errmsg_length - 1; |
| |
| memset (errmsg + errmsg_written_bytes, ' ', |
| errmsg_length - errmsg_written_bytes); |
| } |
| } |
| else |
| { |
| fprintf (stderr, "Failed images present (currently %d)\n", |
| this_image.m->has_failed_image); |
| exit(1); |
| } |
| } |
| else |
| { |
| if (stat) |
| *stat = 0; |
| |
| return 0; |
| } |
| return 1; |
| } |
| |
| /* Check if an image is active. */ |
| |
| int |
| master_is_image_active (master *m, int image_num) |
| { |
| return m->images[image_num].status == IMAGE_OK; |
| } |
| |
| /* Get number of active images. */ |
| |
| int |
| master_get_num_active_images (master *m) |
| { |
| return waitable_counter_get_val (&m->num_active_images); |
| } |
| |
| /* Bind barrier to counter. */ |
| |
| void |
| master_bind_active_image_barrier (master *m, counter_barrier *b) |
| { |
| bind_counter_barrier (b, &m->num_active_images); |
| } |
| |
| /* Main wrapper. */ |
| |
| static void __attribute__ ((noreturn)) |
| image_main_wrapper (void (*image_main) (void), image *this) |
| { |
| this_image = *this; |
| |
| sync_all (&local->si); |
| |
| image_main (); |
| |
| exit (0); |
| } |
| |
| void |
| error_on_missing_images (void) |
| { |
| if (master_get_num_active_images (this_image.m) != local->total_num_images) |
| exit (1); |
| } |
| |
| /* This is called from main, with a pointer to the user's program as |
| argument. It forks the images and waits for their completion. */ |
| |
| void |
| cas_master (void (*image_main) (void)) |
| { |
| master *m; |
| int i, j; |
| pid_t new; |
| image im; |
| int exit_code = 0; |
| int chstatus; |
| ensure_initialization (); |
| |
| m = this_image.m; |
| im.m = m; |
| |
| for (im.image_num = 0; im.image_num < local->total_num_images; |
| im.image_num++) |
| { |
| if ((new = fork ())) |
| { |
| if (new == -1) |
| { |
| dprintf (2, "error spawning child\n"); |
| exit_code = 1; |
| } |
| m->images[im.image_num].pid = new; |
| m->images[im.image_num].status = IMAGE_OK; |
| } |
| else |
| image_main_wrapper (image_main, &im); |
| } |
| for (i = 0; i < local->total_num_images; i++) |
| { |
| new = wait (&chstatus); |
| if (WIFEXITED (chstatus) && !WEXITSTATUS (chstatus)) |
| { |
| j = 0; |
| for (; j < local->total_num_images && m->images[j].pid != new; j++) |
| ; |
| m->images[j].status = IMAGE_SUCCESS; |
| m->finished_images++; /* FIXME: Needs to be atomic, probably. */ |
| } |
| else if (!WIFEXITED (chstatus) || WEXITSTATUS (chstatus)) |
| { |
| j = 0; |
| for (; j < local->total_num_images && m->images[j].pid != new; j++) |
| ; |
| m->images[j].status = IMAGE_FAILED; |
| m->has_failed_image++; /* FIXME: Needs to be atomic, probably. */ |
| for (; j < local->total_num_images; j++) |
| m->images[j].active_image_index--; |
| dprintf (2, "ERROR: Image %d(%#x) failed\n", j, new); |
| exit_code = 1; |
| } |
| waitable_counter_add (&m->num_active_images, -1); |
| } |
| exit (exit_code); |
| } |