blob: e92f617260caad8066d3f6216fb5e5809783531f [file] [log] [blame]
/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
Contributed by Nicolas Koenig
This file is part of the GNU Fortran Native Coarray Library (libnca).
Libnca is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libnca is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgfortran.h"
#include "libcoarraynative.h"
#include "lock.h"
#include <string.h>
static inline int
div_ru (int divident, int divisor)
{
return (divident + divisor - 1) / divisor;
}
/* Need to keep this in sync with
trans-array.h:gfc_coarray_allocation_type. */
enum gfc_coarray_allocation_type
{
GFC_NCA_NORMAL_COARRAY = 1,
GFC_NCA_LOCK_COARRAY,
GFC_NCA_EVENT_COARRAY,
};
void cas_coarray_alloc (gfc_array_void *, size_t, int, int);
export_proto (cas_coarray_alloc);
void cas_coarray_alloc_chk (gfc_array_void *, size_t, int, int, int *,
char *, size_t);
export_proto (cas_coarray_alloc_chk);
void cas_coarray_free (gfc_array_void *, int);
export_proto (cas_coarray_free);
int cas_coarray_this_image (int);
export_proto (cas_coarray_this_image);
int cas_coarray_num_images (int);
export_proto (cas_coarray_num_images);
void cas_coarray_sync_all (int *);
export_proto (cas_coarray_sync_all);
void cas_sync_images (int, int *, int *, char *, size_t);
export_proto (cas_sync_images);
void cas_lock (void *);
export_proto (cas_lock);
void cas_unlock (void *);
export_proto (cas_unlock);
void cas_collsub_reduce_array (gfc_array_char *, void (*) (void *, void *),
int *, int *, char *, size_t);
export_proto (cas_collsub_reduce_array);
void cas_collsub_reduce_scalar (void *, index_type, void (*) (void *, void *),
int *, int *, char *, size_t);
export_proto (cas_collsub_reduce_scalar);
void cas_collsub_broadcast_array (gfc_array_char *restrict, int, int *, char *,
size_t);
export_proto (cas_collsub_broadcast_array);
void cas_collsub_broadcast_scalar (void *restrict, size_t, int, int *, char *,
size_t);
export_proto (cas_collsub_broadcast_scalar);
static void
cas_coarray_alloc_work (gfc_array_void *desc, size_t elem_size, int corank,
int alloc_type)
{
int i, last_rank_index;
int num_coarray_elems, num_elems; /* Excludes the last dimension, because it
will have to be determined later. */
int extent_last_codimen;
size_t last_lbound;
size_t size_in_bytes;
if (alloc_type == GFC_NCA_LOCK_COARRAY)
elem_size = sizeof (pthread_mutex_t);
else if (alloc_type == GFC_NCA_EVENT_COARRAY)
elem_size = sizeof (char); /* replace with proper type. */
last_rank_index = GFC_DESCRIPTOR_RANK (desc) + corank - 1;
num_elems = 1;
num_coarray_elems = 1;
for (i = 0; i < GFC_DESCRIPTOR_RANK (desc); i++)
num_elems *= GFC_DESCRIPTOR_EXTENT (desc, i);
for (i = GFC_DESCRIPTOR_RANK (desc); i < last_rank_index; i++)
{
num_elems *= GFC_DESCRIPTOR_EXTENT (desc, i);
num_coarray_elems *= GFC_DESCRIPTOR_EXTENT (desc, i);
}
extent_last_codimen = div_ru (local->total_num_images, num_coarray_elems);
last_lbound = GFC_DIMENSION_LBOUND (desc->dim[last_rank_index]);
GFC_DIMENSION_SET (desc->dim[last_rank_index], last_lbound,
last_lbound + extent_last_codimen - 1, num_elems);
size_in_bytes = elem_size * num_elems * extent_last_codimen;
if (alloc_type == GFC_NCA_LOCK_COARRAY)
{
lock_array *addr;
int expected = 0;
/* Allocate enough space for the metadata infront of the lock
array. */
addr = get_memory_by_id_zero (
&local->ai, size_in_bytes + sizeof (lock_array), (intptr_t)desc);
/* Use of a traditional spin lock to avoid race conditions with
the initization of the mutex. We could alternatively put a
global lock around allocate, but that would probably be
slower. */
while (!__atomic_compare_exchange_n (&addr->owner, &expected,
this_image.image_num + 1, false,
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
;
if (!addr->initialized++)
{
for (i = 0; i < local->total_num_images; i++)
initialize_shared_mutex (&addr->arr[i]);
}
__atomic_store_n (&addr->owner, 0, __ATOMIC_SEQ_CST);
desc->base_addr = &addr->arr;
}
else if (alloc_type == GFC_NCA_EVENT_COARRAY)
(void)0; // TODO
else
desc->base_addr =
get_memory_by_id (&local->ai, size_in_bytes, (intptr_t) desc);
}
void
cas_coarray_alloc (gfc_array_void *desc, size_t elem_size, int corank,
int alloc_type)
{
ensure_initialization (); /* This function might be the first one to be
called, if it is called in a constructor. */
cas_coarray_alloc_work (desc, elem_size, corank, alloc_type);
}
void
cas_coarray_alloc_chk (gfc_array_void *desc, size_t elem_size, int corank,
int alloc_type, int *status, char *errmsg,
size_t errmsg_len)
{
STAT_ERRMSG_ENTRY_CHECK (status, errmsg, errmsg_len);
if (unlikely(GFC_DESCRIPTOR_DATA (desc) != NULL))
{
if (status == NULL)
{
fprintf (stderr,"Image %d: Attempting to allocate already allocated "
"variable at %p %p\n", this_image.image_num + 1, (void *) desc,
desc->base_addr);
exit (1);
}
else
{
*status = LIBERROR_ALLOCATION;
if (errmsg)
{
size_t errmsg_written_bytes;
errmsg_written_bytes
= snprintf (errmsg, errmsg_len, "Attempting to allocate already "
"allocated variable");
if (errmsg_written_bytes > errmsg_len - 1)
errmsg_written_bytes = errmsg_len - 1;
memset (errmsg + errmsg_written_bytes, ' ',
errmsg_len - errmsg_written_bytes);
}
return;
}
}
cas_coarray_alloc_work (desc, elem_size, corank, alloc_type);
sync_all (&local->si);
}
void
cas_coarray_free (gfc_array_void *desc, int alloc_type)
{
int i;
if (alloc_type == GFC_NCA_LOCK_COARRAY)
{
lock_array *la;
int expected = 0;
la = desc->base_addr - offsetof (lock_array, arr);
/* TODO: Fix this, replace with some kind of atomic initilization. */
while (!__atomic_compare_exchange_n (&la->owner, &expected,
this_image.image_num + 1, false,
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
;
if (!--la->initialized)
{
/* Coarray locks can be removed and just normal
pthread_mutex can be used. */
for (i = 0; i < local->total_num_images; i++)
pthread_mutex_destroy (&la->arr[i]);
}
__atomic_store_n (&la->owner, 0, __ATOMIC_SEQ_CST);
}
else if (alloc_type == GFC_NCA_EVENT_COARRAY)
(void)0; // TODO
free_memory_with_id (&local->ai, (intptr_t)desc);
desc->base_addr = NULL;
}
int
cas_coarray_this_image (int distance __attribute__ ((unused)))
{
return this_image.image_num + 1;
}
int
cas_coarray_num_images (int distance __attribute__ ((unused)))
{
return local->total_num_images;
}
void
cas_coarray_sync_all (int *stat)
{
STAT_ERRMSG_ENTRY_CHECK (stat, NULL, 0);
sync_all (&local->si);
}
void
cas_sync_images (int s, int *images, int *stat, char *error,
size_t err_size)
{
STAT_ERRMSG_ENTRY_CHECK (stat, error, err_size);
sync_table (&local->si, images, s);
}
void
cas_lock (void *lock)
{
pthread_mutex_lock (lock);
}
void
cas_unlock (void *lock)
{
pthread_mutex_unlock (lock);
}
void
cas_collsub_reduce_array (gfc_array_char *desc,
void (*assign_function) (void *, void *),
int *result_image, int *stat, char *errmsg,
size_t errmsg_len)
{
STAT_ERRMSG_ENTRY_CHECK (stat, errmsg, errmsg_len);
collsub_reduce_array (&local->ci, desc, result_image, assign_function);
}
void
cas_collsub_reduce_scalar (void *obj, index_type elem_size,
void (*assign_function) (void *, void *),
int *result_image, int *stat, char *errmsg,
size_t errmsg_len)
{
STAT_ERRMSG_ENTRY_CHECK (stat, errmsg, errmsg_len);
collsub_reduce_scalar (&local->ci, obj, elem_size, result_image,
assign_function);
}
void
cas_collsub_broadcast_array (gfc_array_char *restrict a, int source_image,
int *stat, char *errmsg, size_t errmsg_len)
{
STAT_ERRMSG_ENTRY_CHECK (stat, errmsg, errmsg_len);
collsub_broadcast_array (&local->ci, a, source_image - 1);
}
void
cas_collsub_broadcast_scalar (void *restrict obj, size_t size,
int source_image, int *stat, char *errmsg,
size_t errmsg_len)
{
STAT_ERRMSG_ENTRY_CHECK (stat, errmsg, errmsg_len);
collsub_broadcast_scalar (&local->ci, obj, size, source_image - 1);
}