| /* Minimal check whether HIP works - by checking whether the API routines |
| seem to work. This includes various fallbacks if the header is not |
| available. */ |
| |
| #include <assert.h> |
| #include <omp.h> |
| |
| #if !defined(__HIP_PLATFORM_AMD__) && !defined(__HIP_PLATFORM_NVIDIA__) |
| #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" |
| #endif |
| |
| #if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_PLATFORM_NVIDIA__) |
| #error "Either __HIP_PLATFORM_AMD__ or __HIP_PLATFORM_NVIDIA__ must be defined" |
| #endif |
| |
| #if __has_include(<hip/hip_runtime_api.h>) && !defined(USE_HIP_FALLBACK_HEADER) |
| #include <hip/hip_runtime_api.h> |
| |
| #elif defined(__HIP_PLATFORM_AMD__) |
| /* Add a poor man's fallback declaration. */ |
| #if !defined(USE_HIP_FALLBACK_HEADER) |
| #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_AMD__" |
| #endif |
| |
| typedef struct ihipStream_t* hipStream_t; |
| typedef struct ihipCtx_t* hipCtx_t; |
| typedef int hipError_t; |
| typedef int hipDevice_t; |
| enum { |
| hipSuccess = 0, |
| hipErrorNotSupported = 801 |
| }; |
| |
| typedef enum hipDeviceAttribute_t { |
| hipDeviceAttributeClockRate = 5, |
| hipDeviceAttributeMaxGridDimX = 29 |
| } hipDeviceAttribute_t; |
| |
| hipError_t hipDeviceGetAttribute (int *, hipDeviceAttribute_t, hipDevice_t); |
| hipError_t hipCtxGetApiVersion (hipCtx_t, int *); |
| hipError_t hipStreamGetDevice (hipStream_t, hipDevice_t *); |
| hipError_t hipStreamQuery (hipStream_t); |
| |
| #elif defined(__HIP_PLATFORM_NVIDIA__) |
| /* Add a poor man's fallback declaration. */ |
| #if !defined(USE_HIP_FALLBACK_HEADER) |
| #warning "Using fallback declaration for <hip/hip_runtime_api.h> for __HIP_PLATFORM_NVIDIA__" |
| #endif |
| |
| #if __has_include(<cuda.h>) && __has_include(<cudaTypedefs.h>) && __has_include(<cuda_runtime.h>) && !defined(USE_CUDA_FALLBACK_HEADER) |
| #include <cuda.h> |
| #include <cudaTypedefs.h> |
| #include <cuda_runtime.h> |
| #else |
| #if defined(USE_CUDA_FALLBACK_HEADER) |
| // no warning |
| #elif !__has_include(<cuda.h>) |
| #warning "Using GCC's cuda.h as fallback for cuda.h" |
| #elif !__has_include(<cudaTypedefs.h>) |
| #warning "Using GCC's cuda.h as fallback for cudaTypedefs.h" |
| #else |
| #warning "Using GCC's cuda.h as fallback for cuda_runtime.h" |
| #endif |
| |
| #include "../../../include/cuda/cuda.h" |
| |
| typedef int cudaError_t; |
| enum { |
| cudaSuccess = 0 |
| }; |
| |
| enum cudaDeviceAttr { |
| cudaDevAttrClockRate = 13, |
| cudaDevAttrMaxGridDimX = 5 |
| }; |
| |
| cudaError_t cudaDeviceGetAttribute (int *, enum cudaDeviceAttr, int); |
| CUresult cuCtxGetApiVersion(CUcontext, unsigned int *); |
| CUresult cuStreamGetCtx (CUstream, CUcontext *); |
| #endif |
| |
| typedef CUstream hipStream_t; |
| typedef CUcontext hipCtx_t; |
| typedef CUdevice hipDevice_t; |
| |
| typedef int hipError_t; |
| typedef int hipDevice_t; |
| enum { |
| hipSuccess = 0, |
| hipErrorNotSupported = 801 |
| }; |
| |
| |
| typedef enum hipDeviceAttribute_t { |
| hipDeviceAttributeClockRate = 5, |
| hipDeviceAttributeMaxGridDimX = 29 |
| } hipDeviceAttribute_t; |
| |
| inline static hipError_t |
| hipDeviceGetAttribute (int *ival, hipDeviceAttribute_t attr, hipDevice_t dev) |
| { |
| enum cudaDeviceAttr cuattr; |
| switch (attr) |
| { |
| case hipDeviceAttributeClockRate: |
| cuattr = cudaDevAttrClockRate; |
| break; |
| case hipDeviceAttributeMaxGridDimX: |
| cuattr = cudaDevAttrMaxGridDimX; |
| break; |
| default: |
| assert (0); |
| } |
| return cudaDeviceGetAttribute (ival, cuattr, dev) != cudaSuccess; |
| } |
| |
| inline static hipError_t |
| hipCtxGetApiVersion (hipCtx_t ctx, int *ver) |
| { |
| unsigned uver; |
| hipError_t err; |
| err = cuCtxGetApiVersion (ctx, &uver) != CUDA_SUCCESS; |
| *ver = (int) uver; |
| return err; |
| } |
| |
| inline static hipError_t |
| hipStreamGetDevice (hipStream_t stream, hipDevice_t *dev) |
| { |
| #if CUDA_VERSION >= 12080 |
| return cudaStreamGetDevice (stream, dev); |
| #else |
| hipError_t err; |
| CUcontext ctx; |
| err = cuStreamGetCtx (stream, &ctx) != CUDA_SUCCESS; |
| if (err == hipSuccess) |
| err = cuCtxPushCurrent (ctx) != CUDA_SUCCESS; |
| if (err == hipSuccess) |
| err = cuCtxGetDevice (dev) != CUDA_SUCCESS; |
| if (err == hipSuccess) |
| err = cuCtxPopCurrent (&ctx) != CUDA_SUCCESS; |
| return err; |
| #endif |
| } |
| |
| inline static hipError_t |
| hipStreamQuery (hipStream_t stream) |
| { |
| return cuStreamQuery (stream) != CUDA_SUCCESS; |
| } |
| |
| #else |
| #error "should be unreachable" |
| #endif |
| |
| int |
| main () |
| { |
| int ivar; |
| omp_interop_rc_t res; |
| omp_interop_t obj = omp_interop_none; |
| hipError_t hip_err; |
| |
| #pragma omp interop init(target, targetsync, prefer_type("hip") : obj) |
| |
| omp_interop_fr_t fr = (omp_interop_fr_t) omp_get_interop_int (obj, omp_ipr_fr_id, &res); |
| assert (res == omp_irc_success); |
| assert (fr == omp_ifr_hip); |
| |
| ivar = (int) omp_get_interop_int (obj, omp_ipr_vendor, &res); |
| assert (res == omp_irc_success); |
| int vendor_is_amd = ivar == 1; |
| #if defined(__HIP_PLATFORM_AMD__) |
| assert (ivar == 1); |
| #elif defined(__HIP_PLATFORM_NVIDIA__) |
| assert (ivar == 11); |
| #else |
| assert (0); |
| #endif |
| |
| |
| /* Check whether the omp_ipr_device -> hipDevice_t yields a valid device. */ |
| |
| hipDevice_t hip_dev = (int) omp_get_interop_int (obj, omp_ipr_device, &res); |
| assert (res == omp_irc_success); |
| |
| /* Assume a clock size is available and > 1 GHz; value is in kHz. */ |
| hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeClockRate, hip_dev); |
| assert (hip_err == hipSuccess); |
| assert (ivar > 1000000 /* kHz */); |
| |
| /* Assume that the MaxGridDimX is available and > 1024. */ |
| hip_err = hipDeviceGetAttribute (&ivar, hipDeviceAttributeMaxGridDimX, hip_dev); |
| assert (hip_err == hipSuccess); |
| assert (ivar > 1024); |
| |
| |
| /* Check whether the omp_ipr_device_context -> hipCtx_t yields a context. */ |
| |
| hipCtx_t hip_ctx = (hipCtx_t) omp_get_interop_ptr (obj, omp_ipr_device_context, &res); |
| assert (res == omp_irc_success); |
| |
| /* Assume API Version > 0 for Nvidia, hipErrorNotSupported for AMD. */ |
| ivar = -99; |
| #pragma GCC diagnostic push |
| #pragma GCC diagnostic ignored "-Wdeprecated-declarations" |
| hip_err = hipCtxGetApiVersion (hip_ctx, &ivar); |
| #pragma GCC diagnostic pop |
| |
| if (vendor_is_amd) |
| assert (hip_err == hipErrorNotSupported && ivar == -99); |
| else |
| { |
| assert (hip_err == hipSuccess); |
| assert (ivar > 0); |
| } |
| |
| |
| /* Check whether the omp_ipr_targetsync -> hipStream_t yields a stream. */ |
| |
| hipStream_t hip_sm = (hipStream_t) omp_get_interop_ptr (obj, omp_ipr_targetsync, &res); |
| assert (res == omp_irc_success); |
| |
| hipDevice_t dev_stream = 99; |
| hip_err = hipStreamGetDevice (hip_sm, &dev_stream); |
| assert (hip_err == hipSuccess); |
| assert (dev_stream == hip_dev); |
| |
| /* All jobs should have been completed (as there were none none) */ |
| hip_err = hipStreamQuery (hip_sm); |
| assert (hip_err == hipSuccess); |
| |
| #pragma omp interop destroy(obj) |
| } |