blob: d7cb174b9e15a43676efddee59a7727db1627514 [file] [log] [blame]
/* Check whether hipBlas' daxpy works with an interop object.
daxpy(N, DA, DX, INCX, DY, INCY)
calculates (for DX = DY = 1):
DY(1:N) = DY(1:N) + DA * DX(1:N)
and otherwise N array elements, taking every INCX-th or INCY-th one, repectively.
Based on the interop example in OpenMP's example document */
/* Minimal check whether HIP works - by checking whether the API routines
seem to work. This includes a fallback if the header is not
available. */
#if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__)
#error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
#endif
#if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__)
#error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined"
#endif
#include <assert.h>
#include <omp.h>
#include "../libgomp.c-c++-common/on_device_arch.h"
#if __has_include(<hipblas/hipblas.h>) && (__has_include(<library_types.h>) || !defined(__HIP_PLATFORM_NVIDIA__)) && !defined(USE_HIP_FALLBACK_HEADER)
#ifdef __HIP_PLATFORM_NVIDIA__
/* There seems to be an issue with hip/library_types.h including
CUDA's "library_types.h". Include CUDA's one explicitly here.
Could possibly worked around by using -isystem vs. -I. */
#include <library_types.h>
/* For some reasons, the following symbols do not seem to get
mapped from HIP to CUDA, causing link errors. */
#define hipblasSetStream cublasSetStream_v2
#define hipblasDaxpy cublasDaxpy_v2
#define hipblasCreate cublasCreate_v2
#endif
#include <hipblas/hipblas.h>
#elif defined(__HIP_PLATFORM_AMD__)
/* Add a poor man's fallback declaration. */
#if !defined(USE_HIP_FALLBACK_HEADER)
#warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_AMD__"
#endif
typedef enum
{
HIPBLAS_STATUS_SUCCESS = 0
} hipblasStatus_t;
typedef struct ihipStream_t* hipStream_t;
typedef void* hipblasHandle_t;
hipblasStatus_t hipblasCreate (hipblasHandle_t*);
hipblasStatus_t hipblasSetStream (hipblasHandle_t, hipStream_t);
hipblasStatus_t hipblasDaxpy (hipblasHandle_t, int, const double*, const double*, int, double*, int);
#else
/* Add a poor man's fallback declaration. */
#if !defined(USE_HIP_FALLBACK_HEADER)
#warning "Using fallback declaration for <hipblas/hipblas.h> for __HIP_PLATFORM_NVIDA__"
#endif
#if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && __has_include(<cublas_v2.h>) && !defined(USE_CUDA_FALLBACK_HEADER)
#include <cuda.h>
#include <cudaTypedefs.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#else
/* Add a poor man's fallback declaration. */
#if defined(USE_CUDA_FALLBACK_HEADER)
// no warning
#elif !__has_include(<cuda.h>)
#warning "Using GCC's cuda.h as fallback for cuda.h"
#elif !__has_include(<cudaTypedefs.h>)
#warning "Using GCC's cuda.h as fallback for cudaTypedefs.h"
#elif !__has_include(<cuda_runtime.h>)
#warning "Using GCC's cuda.h as fallback for cuda_runtime.h"
#else
#warning "Using GCC's cuda.h as fallback for cublas_v2.h"
#endif
#include "../../../include/cuda/cuda.h"
typedef enum {
CUBLAS_STATUS_SUCCESS = 0,
} cublasStatus_t;
typedef CUstream cudaStream_t;
typedef struct cublasContext* cublasHandle_t;
#define cublasCreate cublasCreate_v2
cublasStatus_t cublasCreate_v2 (cublasHandle_t *);
#define cublasSetStream cublasSetStream_v2
cublasStatus_t cublasSetStream_v2 (cublasHandle_t, cudaStream_t);
#define cublasDaxpy cublasDaxpy_v2
cublasStatus_t cublasDaxpy_v2(cublasHandle_t, int, const double*, const double*, int, double*, int);
#endif
#define HIPBLAS_STATUS_SUCCESS CUBLAS_STATUS_SUCCESS
#define hipblasStatus_t cublasStatus_t
#define hipStream_t cudaStream_t
#define hipblasHandle_t cublasHandle_t
#define hipblasCreate cublasCreate
#define hipblasSetStream cublasSetStream
#define hipblasDaxpy cublasDaxpy
#endif
static int used_variant = 0;
void
run_hipBlasdaxpy (int n, double da, const double *dx, int incx, double *dy, int incy, omp_interop_t obj)
{
used_variant = 1;
omp_interop_rc_t res;
hipblasStatus_t stat;
omp_intptr_t fr = omp_get_interop_int(obj, omp_ipr_fr_id, &res);
assert (res == omp_irc_success && fr == omp_ifr_hip);
hipStream_t stream = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res);
assert (res == omp_irc_success);
hipblasHandle_t handle;
stat = hipblasCreate (&handle);
assert (stat == HIPBLAS_STATUS_SUCCESS);
stat = hipblasSetStream (handle, stream);
assert (stat == HIPBLAS_STATUS_SUCCESS);
/* 'da' can be in host or device space, 'dx' and 'dy' must be in device space. */
stat = hipblasDaxpy (handle, n, &da, dx, 1, dy, 1) ;
assert (stat == HIPBLAS_STATUS_SUCCESS);
}
#if defined(__HIP_PLATFORM_AMD__)
#pragma omp declare variant(run_hipBlasdaxpy) \
match(construct={dispatch}, target_device={kind(nohost), arch("amdgcn")}) \
adjust_args(need_device_ptr : dx, dy) \
append_args(interop(targetsync, prefer_type("hip")))
#elif defined(__HIP_PLATFORM_NVIDIA__)
#pragma omp declare variant(run_hipBlasdaxpy) \
match(construct={dispatch}, target_device={kind(nohost), arch("nvptx")}) \
adjust_args(need_device_ptr : dx, dy) \
append_args(interop(targetsync, prefer_type("hip")))
#else
#error "wrong platform"
#endif
void
run_daxpy (int n, double da, const double *dx, int incx, double *dy, int incy)
{
used_variant = 2;
if (incx == 1 && incy == 1)
#pragma omp simd
for (int i = 0; i < n; i++)
dy[i] += da * dx[i];
else
{
int ix = 0;
int iy = 0;
for (int i = 0; i < n; i++)
{
dy[iy] += da * dx[ix];
ix += incx;
iy += incy;
}
}
}
void
run_test (int dev)
{
constexpr int N = 1024;
// A = {1,2,...,N}
// B = {-1, -2, ..., N}
// B' = daxpy (N, 3, A, incx=1, B, incy=1)
// = B + 3*A
// -> B' = {0, 2, 4, 6, ... }
double A[N], B[N];
double factor = 3.0;
for (int i = 0; i < N; i++)
{
A[i] = i;
B[i] = -i;
}
if (dev != omp_initial_device && dev != omp_get_num_devices ())
{
#pragma omp target enter data device(dev) map(A, B)
}
used_variant = 99;
#pragma omp dispatch device(dev)
run_daxpy (N, factor, A, 1, B, 1);
if (dev != omp_initial_device && dev != omp_get_num_devices ())
{
#pragma omp target exit data device(dev) map(release: A) map(from: B)
int tmp = omp_get_default_device ();
omp_set_default_device (dev);
#if defined(__HIP_PLATFORM_AMD__)
if (on_device_arch_gcn ())
#else
if (on_device_arch_nvptx ())
#endif
assert (used_variant == 1);
else
assert (used_variant == 2);
omp_set_default_device (tmp);
}
else
assert (used_variant == 2);
for (int i = 0; i < N; i++)
assert (B[i] == 2*i);
}
int
main ()
{
int ndev = omp_get_num_devices ();
for (int dev = 0; dev <= ndev; dev++)
run_test (dev);
run_test (omp_initial_device);
return 0;
}