blob: a39f0ae390f5b8693708b9e7b921df33e621cde3 [file] [log] [blame]
/* Copyright (C) 2020 Free Software Foundation, Inc.
Contributed by Nicolas Koenig
This file is part of the GNU Fortran Native Coarray Library (libnca).
Libnca is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
Libnca is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libgfortran.h"
#include <string.h>
#include "libcoarraynative.h"
#include "collective_subroutine.h"
#include "allocator.h"
#include "counter_barrier.h"
#include <string.h>
void *
get_collsub_buf (collsub_iface *ci, size_t size)
{
void *ret;
pthread_mutex_lock (&ci->s->mutex);
/* curr_size is always at least sizeof(double), so we don't need to worry
about size == 0. */
if (size > ci->s->curr_size)
{
shared_free (ci->a, ci->s->collsub_buf, ci->s->curr_size);
ci->s->collsub_buf = shared_malloc (ci->a, size);
ci->s->curr_size = size;
}
ret = SHMPTR_AS (void *, ci->s->collsub_buf, ci->sm);
pthread_mutex_unlock (&ci->s->mutex);
return ret;
}
/* This function syncs all images with one another. It will only return once
all images have called it. */
void
collsub_sync (collsub_iface *ci)
{
counter_barrier_wait (&ci->s->barrier);
}
/* assign_function is needed since we only know how to assign the type inside
the compiler. It should be implemented as follows:
void assign_function (void *a, void *b)
{
*((t *) a) = reduction_operation ((t *) a, (t *) b);
}
*/
void
collsub_reduce_array (collsub_iface *ci, gfc_array_char *desc,
int *result_image,
void (*assign_function) (void *, void *))
{
void *buffer;
pack_info pi;
bool packed;
int cbit = 0;
int imoffset;
index_type elem_size;
index_type this_image_size_bytes;
char *this_image_buf;
error_on_missing_images ();
packed = pack_array_prepare (&pi, desc);
if (pi.num_elem == 0)
return;
elem_size = GFC_DESCRIPTOR_SIZE (desc);
this_image_size_bytes = elem_size * pi.num_elem;
buffer
= get_collsub_buf (ci, this_image_size_bytes * local->total_num_images);
this_image_buf = buffer + this_image_size_bytes * this_image.image_num;
if (packed)
memcpy (this_image_buf, GFC_DESCRIPTOR_DATA (desc), this_image_size_bytes);
else
pack_array_finish (&pi, desc, this_image_buf);
collsub_sync (ci);
for (; ((this_image.image_num >> cbit) & 1) == 0
&& (local->total_num_images >> cbit) != 0;
cbit++)
{
imoffset = 1 << cbit;
if (this_image.image_num + imoffset < local->total_num_images)
/* Reduce arrays elementwise. */
for (ssize_t i = 0; i < pi.num_elem; i++)
assign_function (this_image_buf + elem_size * i,
this_image_buf + this_image_size_bytes * imoffset
+ elem_size * i);
collsub_sync (ci);
}
for (; (local->total_num_images >> cbit) != 0; cbit++)
collsub_sync (ci);
if (!result_image || (*result_image - 1 ) == this_image.image_num)
{
if (packed)
memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, this_image_size_bytes);
else
unpack_array_finish (&pi, desc, buffer);
}
finish_collective_subroutine (ci);
}
void
collsub_reduce_scalar (collsub_iface *ci, void *obj, index_type elem_size,
int *result_image,
void (*assign_function) (void *, void *))
{
void *buffer;
int cbit = 0;
int imoffset;
char *this_image_buf;
error_on_missing_images ();
buffer = get_collsub_buf (
ci, elem_size * master_get_num_active_images (this_image.m));
this_image_buf = buffer + elem_size * this_image.image_num;
memcpy (this_image_buf, obj, elem_size);
collsub_sync (ci);
for (; ((this_image.image_num >> cbit) & 1) == 0
&& (local->total_num_images >> cbit) != 0;
cbit++)
{
imoffset = 1 << cbit;
if (this_image.image_num + imoffset < local->total_num_images)
{
/* Reduce arrays elementwise. */
assign_function (this_image_buf,
this_image_buf + elem_size * imoffset);
}
collsub_sync (ci);
}
for (; (master_get_num_active_images (this_image.m) >> cbit) != 0; cbit++)
collsub_sync (ci);
if (!result_image || *result_image == this_image.image_num)
memcpy (obj, buffer, elem_size);
finish_collective_subroutine (ci);
}
/* Do not use sync_all(), because the program should deadlock in the case that
* some images are on a sync_all barrier while others are in a collective
* subroutine. */
void
collsub_iface_init (collsub_iface *ci, alloc_iface *ai, shared_memory *sm)
{
ci->s = SHARED_MEMORY_RAW_ALLOC_PTR (sm, collsub_iface_shared);
ci->s->collsub_buf = shared_malloc (
get_allocator (ai), sizeof (double) * local->total_num_images);
ci->s->curr_size = sizeof (double) * local->total_num_images;
ci->sm = sm;
ci->a = get_allocator (ai);
master_bind_active_image_barrier (this_image.m, &ci->s->barrier);
initialize_shared_mutex (&ci->s->mutex);
}
void
collsub_broadcast_scalar (collsub_iface *ci, void *obj, index_type elem_size,
int source_image /* Adjusted in the wrapper. */)
{
void *buffer;
buffer = get_collsub_buf (ci, elem_size);
if (source_image == this_image.image_num)
{
memcpy (buffer, obj, elem_size);
collsub_sync (ci);
}
else
{
collsub_sync (ci);
memcpy (obj, buffer, elem_size);
}
finish_collective_subroutine (ci);
}
void
collsub_broadcast_array (collsub_iface *ci, gfc_array_char *desc,
int source_image)
{
void *buffer;
pack_info pi;
bool packed;
index_type elem_size;
index_type size_bytes;
packed = pack_array_prepare (&pi, desc);
if (pi.num_elem == 0)
return;
elem_size = GFC_DESCRIPTOR_SIZE (desc);
size_bytes = elem_size * pi.num_elem;
buffer = get_collsub_buf (ci, size_bytes);
if (source_image == this_image.image_num)
{
if (packed)
memcpy (buffer, GFC_DESCRIPTOR_DATA (desc), size_bytes);
else
pack_array_finish (&pi, desc, buffer);
collsub_sync (ci);
}
else
{
collsub_sync (ci);
if (packed)
memcpy (GFC_DESCRIPTOR_DATA (desc), buffer, size_bytes);
else
unpack_array_finish (&pi, desc, buffer);
}
finish_collective_subroutine (ci);
}