blob: e48f3074d58f0eac4913e359ce34c5506a0ad2aa [file] [log] [blame]
/* { dg-do run { target openacc_nvidia_accel_selected } } */
/* { dg-additional-options "-lcuda" } */
/* { dg-require-effective-target openacc_cuda } */
#include <openacc.h>
#include <stdlib.h>
#include "cuda.h"
#include <stdio.h>
#define n 128
int
main (void)
{
CUresult r;
CUstream stream1;
int N = n;
int a[n];
int c[n];
acc_init (acc_device_nvidia);
r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
if (r != CUDA_SUCCESS)
{
fprintf (stderr, "cuStreamCreate failed: %d\n", r);
abort ();
}
acc_set_cuda_stream (1, stream1);
for (int i = 0; i < n; i++)
{
a[i] = 3;
c[i] = 0;
}
#pragma acc data copy (a, c) copyin (N)
{
#pragma acc parallel async (1)
;
#pragma acc parallel async (1) num_gangs (320)
#pragma acc loop gang
for (int ii = 0; ii < N; ii++)
c[ii] = (a[ii] + a[N - ii - 1]);
#pragma acc parallel async (1)
#pragma acc loop seq
for (int ii = 0; ii < n; ii++)
a[ii] = 6;
#pragma acc wait (1)
}
for (int i = 0; i < n; i++)
if (c[i] != 6)
abort ();
return 0;
}