diff --git a/projects/hip/Jenkinsfile b/projects/hip/Jenkinsfile index 6d37f10e3c..e6b60f398e 100644 --- a/projects/hip/Jenkinsfile +++ b/projects/hip/Jenkinsfile @@ -167,8 +167,6 @@ def docker_build_inside_image( def build_image, String inside_args, String platf } // Cap the maximum amount of testing, in case of hangs - // Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133 - // Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1 timeout(time: 1, unit: 'HOURS') { stage("${platform} unit testing") @@ -178,7 +176,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf cd ${build_dir_rel} make install -j\$(nproc) make build_tests -i -j\$(nproc) - ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)" + ctest """ // If unit tests output a junit or xunit file in the future, jenkins can parse that file // to display test results on the dashboard diff --git a/projects/hip/bin/hipcc b/projects/hip/bin/hipcc index cea6211a87..68e4a96721 100755 --- a/projects/hip/bin/hipcc +++ b/projects/hip/bin/hipcc @@ -498,6 +498,10 @@ foreach $arg (@ARGV) $obj = "$tmpdir/$obj"; my $fileType = `file $obj`; my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/); + if ($fileType =~ m/ELF/) { + my $sections = `readelf -e -W $obj`; + $isObj = !($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/); + } $allIsObj = ($allIsObj and $isObj); if ($isObj) { $realObjs = ($realObjs . " " . $obj); diff --git a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 9906ca6fa6..b4f379879b 100644 --- a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -22,8 +22,8 @@ | typedef |`CUDA_RESOURCE_VIEW_DESC_st` | | | struct |`CUDA_TEXTURE_DESC` | | | typedef |`CUDA_TEXTURE_DESC_st` | | -| struct |`CUdevprop` |`hipDeviceProp_t` | -| typedef |`CUdevprop_st` |`hipDeviceProp_t` | +| struct |`CUdevprop` | | +| typedef |`CUdevprop_st` | | | struct |`CUipcEventHandle` |`ihipIpcEventHandle_t` | | typedef |`CUipcEventHandle_st` |`ihipIpcEventHandle_t` | | struct |`CUipcMemHandle` |`hipIpcMemHandle_t` | @@ -763,6 +763,7 @@ | `cuDeviceGetName` | `hipDeviceGetName` | | `cuDeviceTotalMem` | `hipDeviceTotalMem` | | `cuDeviceGetLuid` | | +| `cuDeviceGetUuid` | | ## **6. Device Management [DEPRECATED]** @@ -792,9 +793,9 @@ | `cuCtxGetCurrent` | `hipCtxGetCurrent` | | `cuCtxGetDevice` | `hipCtxGetDevice` | | `cuCtxGetFlags` | `hipCtxGetFlags` | -| `cuCtxGetLimit` | | +| `cuCtxGetLimit` | `hipDeviceGetLimit` | | `cuCtxGetSharedMemConfig` | `hipCtxGetSharedMemConfig` | -| `cuCtxGetStreamPriorityRange` | | +| `cuCtxGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange`| | `cuCtxPopCurrent` | `hipCtxPopCurrent` | | `cuCtxPushCurrent` | `hipCtxPushCurrent` | | `cuCtxSetCacheConfig` | `hipCtxSetCacheConfig` | @@ -835,16 +836,16 @@ |-----------------------------------------------------------|-------------------------------| | `cuArray3DCreate` | `hipArray3DCreate` | | `cuArray3DGetDescriptor` | | -| `cuArrayCreate` | | +| `cuArrayCreate` | `hipArrayCreate` | | `cuArrayDestroy` | | | `cuArrayGetDescriptor` | | | `cuDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` | | `cuDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | -| `cuIpcCloseMemHandle` | | +| `cuIpcCloseMemHandle` | `hipIpcCloseMemHandle` | | `cuIpcGetEventHandle` | | -| `cuIpcGetMemHandle` | | +| `cuIpcGetMemHandle` | `hipIpcGetMemHandle` | | `cuIpcOpenEventHandle` | | -| `cuIpcOpenMemHandle` | | +| `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` | | `cuMemAlloc` | `hipMalloc` | | `cuMemAllocHost` | | | `cuMemAllocManaged` | | @@ -867,7 +868,7 @@ | `cuMemcpyDtoDAsync` | `hipMemcpyDtoDAsync` | | `cuMemcpyDtoH` | `hipMemcpyDtoH` | | `cuMemcpyDtoHAsync` | `hipMemcpyDtoHAsync` | -| `cuMemcpyHtoA` | | +| `cuMemcpyHtoA` | `hipMemcpyHtoA` | | `cuMemcpyHtoAAsync` | | | `cuMemcpyHtoD` | `hipMemcpyHtoD` | | `cuMemcpyHtoDAsync` | `hipMemcpyHtoDAsync` | @@ -875,11 +876,11 @@ | `cuMemcpyPeerAsync` | | | `cuMemFree` | `hipFree` | | `cuMemFreeHost` | `hipFreeHost` | -| `cuMemGetAddressRange` | | +| `cuMemGetAddressRange` | `hipMemGetAddressRange` | | `cuMemGetInfo` | `hipMemGetInfo` | | `cuMemHostAlloc` | `hipHostMalloc` | -| `cuMemHostGetDevicePointer` | | -| `cuMemHostGetFlags` | | +| `cuMemHostGetDevicePointer` | `hipHostGetDevicePointer` | +| `cuMemHostGetFlags` | `hipHostGetFlags` | | `cuMemHostRegister` | `hipHostRegister` | | `cuMemHostUnregister` | `hipHostUnregister` | | `cuMemsetD16` | | @@ -892,8 +893,8 @@ | `cuMemsetD2D8Async` | | | `cuMemsetD32` | `hipMemset` | | `cuMemsetD32Async` | `hipMemsetAsync` | -| `cuMemsetD2D8` | | -| `cuMemsetD2D8Async` | | +| `cuMemsetD8` | `hipMemsetD8` | +| `cuMemsetD8Async` | | | `cuMipmappedArrayCreate` | | | `cuMipmappedArrayDestroy` | | | `cuMipmappedArrayGetLevel` | | @@ -916,8 +917,8 @@ |-----------------------------------------------------------|-------------------------------| | `cuStreamAddCallback` | `hipStreamAddCallback` | | `cuStreamAttachMemAsync` | | -| `cuStreamCreate` | | -| `cuStreamCreateWithPriority` | | +| `cuStreamCreate` | `hipStreamCreateWithFlags` | +| `cuStreamCreateWithPriority` | `hipStreamCreateWithPriority` | | `cuStreamDestroy` | `hipStreamDestroy` | | `cuStreamGetFlags` | `hipStreamGetFlags` | | `cuStreamGetPriority` | `hipStreamGetPriority` | @@ -932,7 +933,7 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| -| `cuEventCreate` | `hipEventCreate` | +| `cuEventCreate` | `hipEventCreateWithFlags` | | `cuEventDestroy` | `hipEventDestroy` | | `cuEventElapsedTime` | `hipEventElapsedTime` | | `cuEventQuery` | `hipEventQuery` | @@ -967,10 +968,13 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| | `cuFuncGetAttribute` | | +| `cuFuncSetAttribute` | | | `cuFuncSetCacheConfig` | `hipFuncSetCacheConfig` | | `cuFuncSetSharedMemConfig` | | | `cuLaunchKernel` | `hipModuleLaunchKernel` | | `cuLaunchHostFunc` | | +| `cuLaunchCooperativeKernel` | | +| `cuLaunchCooperativeKernelMultiDevice` | | ## **18. Execution Control [DEPRECATED]** @@ -1047,8 +1051,8 @@ | `cuTexRefGetMipmapLevelBias` | | | `cuTexRefGetMipmapLevelClamp` | | | `cuTexRefGetMipmappedArray` | | -| `cuTexRefSetAddress` | | -| `cuTexRefSetAddress2D` | | +| `cuTexRefSetAddress` | `hipTexRefSetAddress` | +| `cuTexRefSetAddress2D` | `hipTexRefSetAddress2D` | | `cuTexRefSetAddressMode` | `hipTexRefSetAddressMode` | | `cuTexRefSetArray` | `hipTexRefSetArray` | | `cuTexRefSetBorderColor` | | @@ -1233,3 +1237,4 @@ | `cuEGLStreamProducerReturnFrame` | | | `cuGraphicsEGLRegisterImage` | | | `cuGraphicsResourceGetMappedEglFrame` | | +| `cuEventCreateFromEGLSync` | | diff --git a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 7e5cd6fc3d..087b49b977 100644 --- a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -11,7 +11,7 @@ | `cudaDeviceGetLimit` | `hipDeviceGetLimit` | | `cudaDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` | | `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` | -| `cudaDeviceGetStreamPriorityRange` | | +| `cudaDeviceGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange` | | `cudaDeviceReset` | `hipDeviceReset` | | `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` | | `cudaDeviceSetLimit` | `hipDeviceSetLimit` | @@ -19,7 +19,7 @@ | `cudaDeviceSynchronize` | `hipDeviceSynchronize` | | `cudaGetDevice` | `hipGetDevice` | | `cudaGetDeviceCount` | `hipGetDeviceCount` | -| `cudaGetDeviceFlags` | | +| `cudaGetDeviceFlags` | `hipCtxGetFlags` | | `cudaGetDeviceProperties` | `hipGetDeviceProperties` | | `cudaIpcCloseMemHandle` | `hipIpcCloseMemHandle` | | `cudaIpcGetEventHandle` | `hipIpcGetEventHandle` | @@ -56,12 +56,15 @@ |-----------------------------------------------------------|-------------------------------| | `cudaStreamAddCallback` | `hipStreamAddCallback` | | `cudaStreamAttachMemAsync` | | +| `cudaStreamBeginCapture` | | +| `cudaStreamEndCapture` | | +| `cudaStreamIsCapturing` | | | `cudaStreamCreate` | `hipStreamCreate` | | `cudaStreamCreateWithFlags` | `hipStreamCreateWithFlags` | -| `cudaStreamCreateWithPriority` | | +| `cudaStreamCreateWithPriority` | `hipStreamCreateWithPriority` | | `cudaStreamDestroy` | `hipStreamDestroy` | | `cudaStreamGetFlags` | `hipStreamGetFlags` | -| `cudaStreamGetPriority` | | +| `cudaStreamGetPriority` | `hipStreamGetPriority` | | `cudaStreamQuery` | `hipStreamQuery` | | `cudaStreamSynchronize` | `hipStreamSynchronize` | | `cudaStreamWaitEvent` | `hipStreamWaitEvent` | @@ -82,7 +85,14 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| - +| `cudaSignalExternalSemaphoresAsync` | | +| `cudaWaitExternalSemaphoresAsync` | | +| `cudaImportExternalMemory` | | +| `cudaExternalMemoryGetMappedBuffer` | | +| `cudaExternalMemoryGetMappedMipmappedArray` | | +| `cudaDestroyExternalMemory` | | +| `cudaImportExternalSemaphore` | | +| `cudaDestroyExternalSemaphore` | | ## **7. Execution Control** diff --git a/projects/hip/docs/markdown/hip-math-api.md b/projects/hip/docs/markdown/hip-math-api.md index 37efafbbbf..9b8a3f2f11 100644 --- a/projects/hip/docs/markdown/hip-math-api.md +++ b/projects/hip/docs/markdown/hip-math-api.md @@ -1433,7 +1433,7 @@ __device__ float __expf(float x); __device__ static float __fadd_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fadd_rn @@ -1441,7 +1441,7 @@ __device__ static float __fadd_rd(float x, float y); __device__ static float __fadd_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fadd_ru @@ -1449,7 +1449,7 @@ __device__ static float __fadd_rn(float x, float y); __device__ static float __fadd_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fadd_rz @@ -1457,7 +1457,7 @@ __device__ static float __fadd_ru(float x, float y); __device__ static float __fadd_rz(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_rd @@ -1465,7 +1465,7 @@ __device__ static float __fadd_rz(float x, float y); __device__ static float __fdiv_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_rn @@ -1473,7 +1473,7 @@ __device__ static float __fdiv_rd(float x, float y); __device__ static float __fdiv_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_ru @@ -1481,7 +1481,7 @@ __device__ static float __fdiv_rn(float x, float y); __device__ static float __fdiv_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdiv_rz @@ -1489,7 +1489,7 @@ __device__ static float __fdiv_ru(float x, float y); __device__ static float __fdiv_rz(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fdividef @@ -1505,7 +1505,7 @@ __device__ static float __fdividef(float x, float y); __device__ float __fmaf_rd(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmaf_rn @@ -1513,7 +1513,7 @@ __device__ float __fmaf_rd(float x, float y, float z); __device__ float __fmaf_rn(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmaf_ru @@ -1521,7 +1521,7 @@ __device__ float __fmaf_rn(float x, float y, float z); __device__ float __fmaf_ru(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmaf_rz @@ -1529,7 +1529,7 @@ __device__ float __fmaf_ru(float x, float y, float z); __device__ float __fmaf_rz(float x, float y, float z); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_rd @@ -1537,7 +1537,7 @@ __device__ float __fmaf_rz(float x, float y, float z); __device__ static float __fmul_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_rn @@ -1545,7 +1545,7 @@ __device__ static float __fmul_rd(float x, float y); __device__ static float __fmul_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_ru @@ -1553,7 +1553,7 @@ __device__ static float __fmul_rn(float x, float y); __device__ static float __fmul_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fmul_rz @@ -1561,7 +1561,7 @@ __device__ static float __fmul_ru(float x, float y); __device__ static float __fmul_rz(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_rd @@ -1569,7 +1569,7 @@ __device__ static float __fmul_rz(float x, float y); __device__ float __frcp_rd(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_rn @@ -1577,7 +1577,7 @@ __device__ float __frcp_rd(float x); __device__ float __frcp_rn(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_ru @@ -1585,7 +1585,7 @@ __device__ float __frcp_rn(float x); __device__ float __frcp_ru(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frcp_rz @@ -1593,7 +1593,7 @@ __device__ float __frcp_ru(float x); __device__ float __frcp_rz(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __frsqrt_rn @@ -1601,7 +1601,7 @@ __device__ float __frcp_rz(float x); __device__ float __frsqrt_rn(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_rd @@ -1609,7 +1609,7 @@ __device__ float __frsqrt_rn(float x); __device__ float __fsqrt_rd(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_rn @@ -1617,7 +1617,7 @@ __device__ float __fsqrt_rd(float x); __device__ float __fsqrt_rn(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_ru @@ -1625,7 +1625,7 @@ __device__ float __fsqrt_rn(float x); __device__ float __fsqrt_ru(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsqrt_rz @@ -1633,7 +1633,7 @@ __device__ float __fsqrt_ru(float x); __device__ float __fsqrt_rz(float x); ``` -**Description:** Supported +**Description:** Unsupported ### __fsub_rd @@ -1641,7 +1641,7 @@ __device__ float __fsqrt_rz(float x); __device__ static float __fsub_rd(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fsub_rn @@ -1649,7 +1649,7 @@ __device__ static float __fsub_rd(float x, float y); __device__ static float __fsub_rn(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported ### __fsub_ru @@ -1657,7 +1657,15 @@ __device__ static float __fsub_rn(float x, float y); __device__ static float __fsub_ru(float x, float y); ``` -**Description:** Supported +**Description:** Unsupported + + +### __fsub_rz +```cpp +__device__ static float __fsub_rz(float x, float y); + +``` +**Description:** Unsupported ### __log10f @@ -1729,7 +1737,7 @@ __device__ float __tanf(float x); __device__ static double __dadd_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dadd_rn @@ -1737,7 +1745,7 @@ __device__ static double __dadd_rd(double x, double y); __device__ static double __dadd_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dadd_ru @@ -1745,7 +1753,7 @@ __device__ static double __dadd_rn(double x, double y); __device__ static double __dadd_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dadd_rz @@ -1753,7 +1761,7 @@ __device__ static double __dadd_ru(double x, double y); __device__ static double __dadd_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_rd @@ -1761,7 +1769,7 @@ __device__ static double __dadd_rz(double x, double y); __device__ static double __ddiv_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_rn @@ -1769,7 +1777,7 @@ __device__ static double __ddiv_rd(double x, double y); __device__ static double __ddiv_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_ru @@ -1777,7 +1785,7 @@ __device__ static double __ddiv_rn(double x, double y); __device__ static double __ddiv_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __ddiv_rz @@ -1785,7 +1793,7 @@ __device__ static double __ddiv_ru(double x, double y); __device__ static double __ddiv_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_rd @@ -1793,7 +1801,7 @@ __device__ static double __ddiv_rz(double x, double y); __device__ static double __dmul_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_rn @@ -1801,7 +1809,7 @@ __device__ static double __dmul_rd(double x, double y); __device__ static double __dmul_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_ru @@ -1809,7 +1817,7 @@ __device__ static double __dmul_rn(double x, double y); __device__ static double __dmul_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dmul_rz @@ -1817,7 +1825,7 @@ __device__ static double __dmul_ru(double x, double y); __device__ static double __dmul_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_rd @@ -1825,7 +1833,7 @@ __device__ static double __dmul_rz(double x, double y); __device__ double __drcp_rd(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_rn @@ -1833,7 +1841,7 @@ __device__ double __drcp_rd(double x); __device__ double __drcp_rn(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_ru @@ -1841,7 +1849,7 @@ __device__ double __drcp_rn(double x); __device__ double __drcp_ru(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __drcp_rz @@ -1849,7 +1857,7 @@ __device__ double __drcp_ru(double x); __device__ double __drcp_rz(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_rd @@ -1857,7 +1865,7 @@ __device__ double __drcp_rz(double x); __device__ double __dsqrt_rd(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_rn @@ -1865,7 +1873,7 @@ __device__ double __dsqrt_rd(double x); __device__ double __dsqrt_rn(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_ru @@ -1873,7 +1881,7 @@ __device__ double __dsqrt_rn(double x); __device__ double __dsqrt_ru(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsqrt_rz @@ -1881,7 +1889,7 @@ __device__ double __dsqrt_ru(double x); __device__ double __dsqrt_rz(double x); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_rd @@ -1889,7 +1897,7 @@ __device__ double __dsqrt_rz(double x); __device__ static double __dsub_rd(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_rn @@ -1897,7 +1905,7 @@ __device__ static double __dsub_rd(double x, double y); __device__ static double __dsub_rn(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_ru @@ -1905,7 +1913,7 @@ __device__ static double __dsub_rn(double x, double y); __device__ static double __dsub_ru(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __dsub_rz @@ -1913,7 +1921,7 @@ __device__ static double __dsub_ru(double x, double y); __device__ static double __dsub_rz(double x, double y); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_rd @@ -1921,7 +1929,7 @@ __device__ static double __dsub_rz(double x, double y); __device__ double __fma_rd(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_rn @@ -1929,7 +1937,7 @@ __device__ double __fma_rd(double x, double y, double z); __device__ double __fma_rn(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_ru @@ -1937,7 +1945,7 @@ __device__ double __fma_rn(double x, double y, double z); __device__ double __fma_ru(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __fma_rz @@ -1945,7 +1953,7 @@ __device__ double __fma_ru(double x, double y, double z); __device__ double __fma_rz(double x, double y, double z); ``` -**Description:** Supported +**Description:** Unsupported ### __brev diff --git a/projects/hip/hipify-clang/src/CUDA2HIP.cpp b/projects/hip/hipify-clang/src/CUDA2HIP.cpp index 02f3ae0f12..c0879c1f98 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP.cpp @@ -51,6 +51,8 @@ const std::map& CUDA_RENAMES_MAP() { ret.insert(CUDA_DRIVER_FUNCTION_MAP.begin(), CUDA_DRIVER_FUNCTION_MAP.end()); ret.insert(CUDA_RUNTIME_TYPE_NAME_MAP.begin(), CUDA_RUNTIME_TYPE_NAME_MAP.end()); ret.insert(CUDA_RUNTIME_FUNCTION_MAP.begin(), CUDA_RUNTIME_FUNCTION_MAP.end()); + ret.insert(CUDA_COMPLEX_TYPE_NAME_MAP.begin(), CUDA_COMPLEX_TYPE_NAME_MAP.end()); + ret.insert(CUDA_COMPLEX_FUNCTION_MAP.begin(), CUDA_COMPLEX_FUNCTION_MAP.end()); ret.insert(CUDA_BLAS_TYPE_NAME_MAP.begin(), CUDA_BLAS_TYPE_NAME_MAP.end()); ret.insert(CUDA_BLAS_FUNCTION_MAP.begin(), CUDA_BLAS_FUNCTION_MAP.end()); ret.insert(CUDA_RAND_TYPE_NAME_MAP.begin(), CUDA_RAND_TYPE_NAME_MAP.end()); diff --git a/projects/hip/hipify-clang/src/CUDA2HIP.h b/projects/hip/hipify-clang/src/CUDA2HIP.h index 9593c216a4..5c3a6fa246 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP.h +++ b/projects/hip/hipify-clang/src/CUDA2HIP.h @@ -15,6 +15,10 @@ extern const std::map CUDA_DRIVER_TYPE_NAME_MAP; extern const std::map CUDA_DRIVER_FUNCTION_MAP; // Maps the names of CUDA RUNTIME API types to the corresponding HIP types extern const std::map CUDA_RUNTIME_TYPE_NAME_MAP; +// Maps the names of CUDA Complex API types to the corresponding HIP types +extern const std::map CUDA_COMPLEX_TYPE_NAME_MAP; +// Maps the names of CUDA Complex API functions to the corresponding HIP functions +extern const std::map CUDA_COMPLEX_FUNCTION_MAP; // Maps the names of CUDA RUNTIME API functions to the corresponding HIP functions extern const std::map CUDA_RUNTIME_FUNCTION_MAP; // Maps the names of CUDA BLAS API types to the corresponding HIP types diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp new file mode 100644 index 0000000000..3bc7c4f0a0 --- /dev/null +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_functions.cpp @@ -0,0 +1,28 @@ +#include "CUDA2HIP.h" + +// Maps the names of CUDA DRIVER API types to the corresponding HIP types +const std::map CUDA_COMPLEX_FUNCTION_MAP{ + {"cuCrealf", {"hipCrealf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCimagf", {"hipCimagf", CONV_COMPLEX, API_COMPLEX}}, + {"make_cuFloatComplex", {"make_hipFloatComplex", CONV_COMPLEX, API_COMPLEX}}, + {"cuConjf", {"hipConjf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCaddf", {"hipCaddf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCsubf", {"hipCsubf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCmulf", {"hipCmulf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCdivf", {"hipCdivf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCabsf", {"hipCabsf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCreal", {"hipCreal", CONV_COMPLEX, API_COMPLEX}}, + {"cuCimag", {"hipCimag", CONV_COMPLEX, API_COMPLEX}}, + {"make_cuDoubleComplex", {"make_hipDoubleComplex", CONV_COMPLEX, API_COMPLEX}}, + {"cuConj", {"hipConj", CONV_COMPLEX, API_COMPLEX}}, + {"cuCadd", {"hipCadd", CONV_COMPLEX, API_COMPLEX}}, + {"cuCsub", {"hipCsub", CONV_COMPLEX, API_COMPLEX}}, + {"cuCmul", {"hipCmul", CONV_COMPLEX, API_COMPLEX}}, + {"cuCdiv", {"hipCdiv", CONV_COMPLEX, API_COMPLEX}}, + {"cuCabs", {"hipCabs", CONV_COMPLEX, API_COMPLEX}}, + {"make_cuComplex", {"make_hipComplex", CONV_COMPLEX, API_COMPLEX}}, + {"cuComplexFloatToDouble", {"hipComplexFloatToDouble", CONV_COMPLEX, API_COMPLEX}}, + {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}}, + {"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}}, + {"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}}, +}; diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp new file mode 100644 index 0000000000..f371cf3b9a --- /dev/null +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Complex_API_types.cpp @@ -0,0 +1,8 @@ +#include "CUDA2HIP.h" + +// Maps the names of CUDA DRIVER API types to the corresponding HIP types +const std::map CUDA_COMPLEX_TYPE_NAME_MAP{ + {"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}}, + {"cuDoubleComplex", {"hipDoubleComplex", CONV_TYPE, API_COMPLEX}}, + {"cuComplex", {"hipComplex", CONV_TYPE, API_COMPLEX}}, +}; diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index d74c4d4f1a..6871be877b 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -1,426 +1,751 @@ #include "CUDA2HIP.h" - -// Map of all functions +// Map of all CUDA Driver API functions const std::map CUDA_DRIVER_FUNCTION_MAP{ + // 5.2. Error Handling + // no analogue + // NOTE: cudaGetErrorName and hipGetErrorName have different signature + {"cuGetErrorName", {"hipGetErrorName_", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: cudaGetErrorString and hipGetErrorString have different signature + {"cuGetErrorString", {"hipGetErrorString_", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, - ///////////////////////////// CUDA DRIVER API ///////////////////////////// + // 5.3. Initialization + // no analogue + {"cuInit", {"hipInit", CONV_INIT, API_DRIVER}}, - // Error Handling - {"cuGetErrorName", {"hipGetErrorName___", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, // cudaGetErrorName (hipGetErrorName) has different signature - {"cuGetErrorString", {"hipGetErrorString___", CONV_ERROR, API_DRIVER, HIP_UNSUPPORTED}}, // cudaGetErrorString (hipGetErrorString) has different signature + // 5.4 Version Management + // cudaDriverGetVersion + {"cuDriverGetVersion", {"hipDriverGetVersion", CONV_VERSION, API_DRIVER}}, - // Init - {"cuInit", {"hipInit", CONV_INIT, API_DRIVER}}, + // 5.5. Device Management + // cudaGetDevice + // NOTE: cudaGetDevice has additional attr: int ordinal + {"cuDeviceGet", {"hipGetDevice", CONV_DEVICE, API_DRIVER}}, + // cudaDeviceGetAttribute + {"cuDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_DRIVER}}, + // cudaGetDeviceCount + {"cuDeviceGetCount", {"hipGetDeviceCount", CONV_DEVICE, API_DRIVER}}, + // no analogue + {"cuDeviceGetLuid", {"hipDeviceGetLuid", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuDeviceGetName", {"hipDeviceGetName", CONV_DEVICE, API_DRIVER}}, + // no analogue + {"cuDeviceGetUuid", {"hipDeviceGetUuid", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuDeviceTotalMem", {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}}, + {"cuDeviceTotalMem_v2", {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}}, - // Driver - {"cuDriverGetVersion", {"hipDriverGetVersion", CONV_VERSION, API_DRIVER}}, + // 5.6. Device Management [DEPRECATED] + {"cuDeviceComputeCapability", {"hipDeviceComputeCapability", CONV_DEVICE, API_DRIVER}}, + {"cuDeviceGetProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_DRIVER}}, - // Context Management - {"cuCtxCreate_v2", {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxDestroy_v2", {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCacheConfig", {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetCurrent", {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetDevice", {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetFlags", {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetLimit", {"hipCtxGetLimit", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxGetSharedMemConfig", {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxGetStreamPriorityRange", {"hipCtxGetStreamPriorityRange", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxPopCurrent_v2", {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxPushCurrent_v2", {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCacheConfig", {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetCurrent", {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSetLimit", {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, - {"cuCtxSynchronize", {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER}}, - // Context Management [DEPRECATED] - {"cuCtxAttach", {"hipCtxAttach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuCtxDetach", {"hipCtxDetach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.7. Primary Context Management + // no analogues + {"cuDevicePrimaryCtxGetState", {"hipDevicePrimaryCtxGetState", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxRelease", {"hipDevicePrimaryCtxRelease", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxReset", {"hipDevicePrimaryCtxReset", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxRetain", {"hipDevicePrimaryCtxRetain", CONV_CONTEXT, API_DRIVER}}, + {"cuDevicePrimaryCtxSetFlags", {"hipDevicePrimaryCtxSetFlags", CONV_CONTEXT, API_DRIVER}}, - // Primary Context Management - {"cuDevicePrimaryCtxGetState", {"hipDevicePrimaryCtxGetState", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRelease", {"hipDevicePrimaryCtxRelease", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxReset", {"hipDevicePrimaryCtxReset", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxRetain", {"hipDevicePrimaryCtxRetain", CONV_CONTEXT, API_DRIVER}}, - {"cuDevicePrimaryCtxSetFlags", {"hipDevicePrimaryCtxSetFlags", CONV_CONTEXT, API_DRIVER}}, + // 5.8. Context Management + // no analogues, except a few + {"cuCtxCreate", {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxCreate_v2", {"hipCtxCreate", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxDestroy", {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxDestroy_v2", {"hipCtxDestroy", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetApiVersion", {"hipCtxGetApiVersion", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetCacheConfig", {"hipCtxGetCacheConfig", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetCurrent", {"hipCtxGetCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetDevice", {"hipCtxGetDevice", CONV_CONTEXT, API_DRIVER}}, + // cudaGetDeviceFlags + // TODO: rename to hipGetDeviceFlags + {"cuCtxGetFlags", {"hipCtxGetFlags", CONV_CONTEXT, API_DRIVER}}, + // cudaDeviceGetLimit + {"cuCtxGetLimit", {"hipDeviceGetLimit", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxGetSharedMemConfig", {"hipCtxGetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, + // cudaDeviceGetStreamPriorityRange + {"cuCtxGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPopCurrent", {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPopCurrent_v2", {"hipCtxPopCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPushCurrent", {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxPushCurrent_v2", {"hipCtxPushCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSetCacheConfig", {"hipCtxSetCacheConfig", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSetCurrent", {"hipCtxSetCurrent", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSetLimit", {"hipCtxSetLimit", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuCtxSetSharedMemConfig", {"hipCtxSetSharedMemConfig", CONV_CONTEXT, API_DRIVER}}, + {"cuCtxSynchronize", {"hipCtxSynchronize", CONV_CONTEXT, API_DRIVER}}, - // 1. Device Management - {"cuDeviceGet", {"hipGetDevice", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetName", {"hipDeviceGetName", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetCount", {"hipGetDeviceCount", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceTotalMem_v2", {"hipDeviceTotalMem", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetLuid", {"hipDeviceGetLuid", CONV_DEVICE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.9. Context Management [DEPRECATED] + // no analogues + {"cuCtxAttach", {"hipCtxAttach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuCtxDetach", {"hipCtxDetach", CONV_CONTEXT, API_DRIVER, HIP_UNSUPPORTED}}, - // 12. Peer Context Memory Access - {"cuCtxEnablePeerAccess", {"hipCtxEnablePeerAccess", CONV_PEER, API_DRIVER}}, - {"cuCtxDisablePeerAccess", {"hipCtxDisablePeerAccess", CONV_PEER, API_DRIVER}}, - {"cuDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", CONV_PEER, API_DRIVER}}, - {"cuDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaDeviceGetP2PAttribute) + // 5.10. Module Management + // no analogues + {"cuLinkAddData", {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkAddData_v2", {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkAddFile", {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkAddFile_v2", {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkComplete", {"hipLinkComplete", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkCreate", {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkCreate_v2", {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuLinkDestroy", {"hipLinkDestroy", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuModuleGetFunction", {"hipModuleGetFunction", CONV_MODULE, API_DRIVER}}, + {"cuModuleGetGlobal", {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}}, + {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}}, + {"cuModuleGetSurfRef", {"hipModuleGetSurfRef", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuModuleGetTexRef", {"hipModuleGetTexRef", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoad", {"hipModuleLoad", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoadData", {"hipModuleLoadData", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoadDataEx", {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}}, + {"cuModuleLoadFatBinary", {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuModuleUnload", {"hipModuleUnload", CONV_MODULE, API_DRIVER}}, - // Device Management [DEPRECATED] - {"cuDeviceComputeCapability", {"hipDeviceComputeCapability", CONV_DEVICE, API_DRIVER}}, - {"cuDeviceGetProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_DRIVER}}, + // 5.11. Memory Management + // no analogue + {"cuArray3DCreate", {"hipArray3DCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArray3DCreate_v2", {"hipArray3DCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArray3DGetDescriptor_v2", {"hipArray3DGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArrayCreate", {"hipArrayCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArrayCreate_v2", {"hipArrayCreate", CONV_MEMORY, API_DRIVER}}, + {"cuArrayDestroy", {"hipArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArrayGetDescriptor", {"hipArrayGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArrayGetDescriptor_v2", {"hipArrayGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDeviceGetByPCIBusId + {"cuDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_MEMORY, API_DRIVER}}, + // cudaDeviceGetPCIBusId + {"cuDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_MEMORY, API_DRIVER}}, + // cudaIpcCloseMemHandle + {"cuIpcCloseMemHandle", {"hipIpcCloseMemHandle", CONV_MEMORY, API_DRIVER}}, + // cudaIpcGetEventHandle + {"cuIpcGetEventHandle", {"hipIpcGetEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaIpcGetMemHandle + {"cuIpcGetMemHandle", {"hipIpcGetMemHandle", CONV_MEMORY, API_DRIVER}}, + // cudaIpcOpenEventHandle + {"cuIpcOpenEventHandle", {"hipIpcOpenEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaIpcOpenMemHandle + {"cuIpcOpenMemHandle", {"hipIpcOpenMemHandle", CONV_MEMORY, API_DRIVER}}, + // cudaMalloc + {"cuMemAlloc", {"hipMalloc", CONV_MEMORY, API_DRIVER}}, + {"cuMemAlloc_v2", {"hipMalloc", CONV_MEMORY, API_DRIVER}}, + // cudaHostAlloc + {"cuMemAllocHost", {"hipMemAllocHost", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemAllocHost_v2", {"hipMemAllocHost", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemAllocManaged", {"hipMemAllocManaged", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemAllocPitch", {"hipMemAllocPitch", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemAllocPitch_v2", {"hipMemAllocPitch", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy due to different signatures + {"cuMemcpy", {"hipMemcpy_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy2D due to different signatures + {"cuMemcpy2D", {"hipMemcpy2D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2D_v2", {"hipMemcpy2D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy2DAsync due to different signatures + {"cuMemcpy2DAsync", {"hipMemcpy2DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2DAsync_v2", {"hipMemcpy2DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpy2DUnaligned", {"hipMemcpy2DUnaligned", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy2DUnaligned_v2", {"hipMemcpy2DUnaligned", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3D due to different signatures + {"cuMemcpy3D", {"hipMemcpy3D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy3D_v2", {"hipMemcpy3D_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3DAsync due to different signatures + {"cuMemcpy3DAsync", {"hipMemcpy3DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpy3DAsync_v2", {"hipMemcpy3DAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3DPeer due to different signatures + {"cuMemcpy3DPeer", {"hipMemcpy3DPeer_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpy3DPeerAsync due to different signatures + {"cuMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpyAsync due to different signatures + {"cuMemcpyAsync", {"hipMemcpyAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpyArrayToArray due to different signatures + {"cuMemcpyAtoA", {"hipMemcpyAtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoA_v2", {"hipMemcpyAtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyAtoD", {"hipMemcpyAtoD", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoD_v2", {"hipMemcpyAtoD", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyAtoH", {"hipMemcpyAtoH", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoH_v2", {"hipMemcpyAtoH", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyAtoHAsync_v2", {"hipMemcpyAtoHAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyDtoA", {"hipMemcpyDtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyDtoA_v2", {"hipMemcpyDtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyDtoD", {"hipMemcpyDtoD", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyDtoDAsync", {"hipMemcpyDtoDAsync", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoDAsync_v2", {"hipMemcpyDtoDAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyDtoH", {"hipMemcpyDtoH", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoH_v2", {"hipMemcpyDtoH", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyDtoHAsync", {"hipMemcpyDtoHAsync", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyDtoHAsync_v2", {"hipMemcpyDtoHAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyHtoA", {"hipMemcpyHtoA", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyHtoA_v2", {"hipMemcpyHtoA", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemcpyHtoAAsync_v2", {"hipMemcpyHtoAAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemcpyHtoD", {"hipMemcpyHtoD", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemcpyHtoDAsync", {"hipMemcpyHtoDAsync", CONV_MEMORY, API_DRIVER}}, + {"cuMemcpyHtoDAsync_v2", {"hipMemcpyHtoDAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + // NOTE: Not equal to cudaMemcpyPeer due to different signatures + {"cuMemcpyPeer", {"hipMemcpyPeer_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMemcpyPeerAsync due to different signatures + {"cuMemcpyPeerAsync", {"hipMemcpyPeerAsync_", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaFree + {"cuMemFree", {"hipFree", CONV_MEMORY, API_DRIVER}}, + {"cuMemFree_v2", {"hipFree", CONV_MEMORY, API_DRIVER}}, + // cudaFreeHost + {"cuMemFreeHost", {"hipHostFree", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemGetAddressRange", {"hipMemGetAddressRange", CONV_MEMORY, API_DRIVER}}, + {"cuMemGetAddressRange_v2", {"hipMemGetAddressRange", CONV_MEMORY, API_DRIVER}}, + // cudaMemGetInfo + {"cuMemGetInfo", {"hipMemGetInfo", CONV_MEMORY, API_DRIVER}}, + {"cuMemGetInfo_v2", {"hipMemGetInfo", CONV_MEMORY, API_DRIVER}}, + // cudaHostAlloc + {"cuMemHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_DRIVER}}, + // cudaHostGetDevicePointer + {"cuMemHostGetDevicePointer", {"hipHostGetDevicePointer", CONV_MEMORY, API_DRIVER}}, + {"cuMemHostGetDevicePointer_v2", {"hipHostGetDevicePointer", CONV_MEMORY, API_DRIVER}}, + // cudaHostGetFlags + {"cuMemHostGetFlags", {"hipMemHostGetFlags", CONV_MEMORY, API_DRIVER}}, + // cudaHostRegister + {"cuMemHostRegister", {"hipHostRegister", CONV_MEMORY, API_DRIVER}}, + {"cuMemHostRegister_v2", {"hipHostRegister", CONV_MEMORY, API_DRIVER}}, + // cudaHostUnregister + {"cuMemHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemsetD16", {"hipMemsetD16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD16_v2", {"hipMemsetD16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD16Async", {"hipMemsetD16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D16", {"hipMemsetD2D16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD2D16_v2", {"hipMemsetD2D16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D16Async", {"hipMemsetD2D16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D32", {"hipMemsetD2D32", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD2D32_v2", {"hipMemsetD2D32", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D32Async", {"hipMemsetD2D32Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D8", {"hipMemsetD2D8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuMemsetD2D8_v2", {"hipMemsetD2D8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuMemsetD2D8Async", {"hipMemsetD2D8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaMemset + {"cuMemsetD32", {"hipMemset", CONV_MEMORY, API_DRIVER}}, + {"cuMemsetD32_v2", {"hipMemset", CONV_MEMORY, API_DRIVER}}, + // cudaMemsetAsync + {"cuMemsetD32Async", {"hipMemsetAsync", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemsetD8", {"hipMemsetD8", CONV_MEMORY, API_DRIVER}}, + {"cuMemsetD8_v2", {"hipMemsetD8", CONV_MEMORY, API_DRIVER}}, + // no analogue + {"cuMemsetD8Async", {"hipMemsetD8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaMallocMipmappedArray due to different signatures + {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFreeMipmappedArray due to different signatures + {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetMipmappedArrayLevel due to different signatures + {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // Module Management - {"cuLinkAddData", {"hipLinkAddData", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkAddFile", {"hipLinkAddFile", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkComplete", {"hipLinkComplete", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkCreate", {"hipLinkCreate", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLinkDestroy", {"hipLinkDestroy", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetFunction", {"hipModuleGetFunction", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetGlobal_v2", {"hipModuleGetGlobal", CONV_MODULE, API_DRIVER}}, - {"cuModuleGetSurfRef", {"hipModuleGetSurfRef", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleGetTexRef", {"hipModuleGetTexRef", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoad", {"hipModuleLoad", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadData", {"hipModuleLoadData", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadDataEx", {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}}, - {"cuModuleLoadFatBinary", {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuModuleUnload", {"hipModuleUnload", CONV_MODULE, API_DRIVER}}, + // 5.12. Unified Addressing + // cudaMemAdvise + {"cuMemAdvise", {"hipMemAdvise", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // TODO: double check cudaMemPrefetchAsync + {"cuMemPrefetchAsync", {"hipMemPrefetchAsync_", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaMemRangeGetAttribute + {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaMemRangeGetAttributes + {"cuMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuPointerGetAttribute", {"hipPointerGetAttribute", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaPointerGetAttributes due to different signatures + {"cuPointerGetAttributes", {"hipPointerGetAttributes", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuPointerSetAttribute", {"hipPointerSetAttribute", CONV_ADDRESSING, API_DRIVER, HIP_UNSUPPORTED}}, - // Event functions - {"cuEventCreate", {"hipEventCreate", CONV_EVENT, API_DRIVER}}, - {"cuEventDestroy_v2", {"hipEventDestroy", CONV_EVENT, API_DRIVER}}, - {"cuEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_DRIVER}}, - {"cuEventQuery", {"hipEventQuery", CONV_EVENT, API_DRIVER}}, - {"cuEventRecord", {"hipEventRecord", CONV_EVENT, API_DRIVER}}, - {"cuEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_DRIVER}}, + // 5.13. Stream Management + // cudaStreamAddCallback + {"cuStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_DRIVER}}, + // cudaStreamAttachMemAsync + {"cuStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamBeginCapture + {"cuStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamCreateWithFlags + {"cuStreamCreate", {"hipStreamCreateWithFlags", CONV_STREAM, API_DRIVER}}, + // cudaStreamCreateWithPriority + {"cuStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_DRIVER}}, + // cudaStreamDestroy + {"cuStreamDestroy", {"hipStreamDestroy", CONV_STREAM, API_DRIVER}}, + {"cuStreamDestroy_v2", {"hipStreamDestroy", CONV_STREAM, API_DRIVER}}, + // cudaStreamEndCapture + {"cuStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuStreamGetCtx", {"hipStreamGetContext", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamGetFlags + {"cuStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_DRIVER}}, + // cudaStreamGetPriority + {"cuStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_DRIVER}}, + // cudaStreamIsCapturing + {"cuStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaStreamQuery + {"cuStreamQuery", {"hipStreamQuery", CONV_STREAM, API_DRIVER}}, + // cudaStreamSynchronize + {"cuStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_DRIVER}}, + // cudaStreamWaitEvent + {"cuStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER}}, - // External Resource Interoperability - {"cuSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.14. Event Management + // cudaEventCreateWithFlags + {"cuEventCreate", {"hipEventCreateWithFlags", CONV_EVENT, API_DRIVER}}, + // cudaEventDestroy + {"cuEventDestroy", {"hipEventDestroy", CONV_EVENT, API_DRIVER}}, + {"cuEventDestroy_v2", {"hipEventDestroy", CONV_EVENT, API_DRIVER}}, + // cudaEventElapsedTime + {"cuEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_DRIVER}}, + // cudaEventQuery + {"cuEventQuery", {"hipEventQuery", CONV_EVENT, API_DRIVER}}, + // cudaEventRecord + {"cuEventRecord", {"hipEventRecord", CONV_EVENT, API_DRIVER}}, + // cudaEventSynchronize + {"cuEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_DRIVER}}, - // Execution Control - {"cuFuncGetAttribute", {"hipFuncGetAttribute", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuFuncSetCacheConfig", {"hipFuncSetCacheConfig", CONV_EXECUTION, API_DRIVER}}, - {"cuFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLaunchKernel", {"hipModuleLaunchKernel", CONV_EXECUTION, API_DRIVER}}, - {"cuLaunchHostFunc", {"hipLaunchHostFunc", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.15. External Resource Interoperability + // cudaDestroyExternalMemory + {"cuDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDestroyExternalSemaphore + {"cuDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaExternalMemoryGetMappedBuffer + {"cuExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaExternalMemoryGetMappedMipmappedArray + {"cuExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaImportExternalMemory + {"cuImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaImportExternalSemaphore + {"cuImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaSignalExternalSemaphoresAsync + {"cuSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaWaitExternalSemaphoresAsync + {"cuWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_DRIVER, HIP_UNSUPPORTED}}, - // Execution Control [DEPRECATED] - {"cuFuncSetBlockShape", {"hipFuncSetBlockShape", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuFuncSetSharedSize", {"hipFuncSetSharedSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLaunch", {"hipLaunch", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaLaunch) - {"cuLaunchGrid", {"hipLaunchGrid", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuLaunchGridAsync", {"hipLaunchGridAsync", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetf", {"hipParamSetf", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSeti", {"hipParamSeti", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetSize", {"hipParamSetSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetSize", {"hipParamSetSize", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuParamSetv", {"hipParamSetv", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.16. Stream Memory Operations + // no analogues + {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWaitValue32", {"hipStreamWaitValue32", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWaitValue64", {"hipStreamWaitValue64", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWriteValue32", {"hipStreamWriteValue32", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuStreamWriteValue64", {"hipStreamWriteValue64", CONV_STREAM_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - // Graph Management - {"cuGraphCreate", {"hipGraphCreate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphLaunch", {"hipGraphLaunch", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddKernelNode", {"hipGraphAddKernelNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddMemsetNode", {"hipGraphAddMemsetNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddHostNode", {"hipGraphAddHostNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddEmptyNode", {"hipGraphAddEmptyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphClone", {"hipGraphClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeFindInClone", {"hipGraphNodeFindInClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeGetType", {"hipGraphNodeGetType", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphGetNodes", {"hipGraphGetNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphGetRootNodes", {"hipGraphGetRootNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphGetEdges", {"hipGraphGetEdges", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphAddDependencies", {"hipGraphAddDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphRemoveDependencies", {"hipGraphRemoveDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphDestroyNode", {"hipGraphDestroyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphInstantiate", {"hipGraphInstantiate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphExecDestroy", {"hipGraphExecDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGraphDestroy", {"hipGraphDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.17.Execution Control + // no analogue + {"cuFuncGetAttribute", {"hipFuncGetAttribute", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFuncSetAttribute due to different signatures + {"cuFuncSetAttribute", {"hipFuncSetAttribute", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFuncSetCacheConfig due to different signatures + {"cuFuncSetCacheConfig", {"hipFuncSetCacheConfig", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaFuncSetCacheConfig due to different signatures + {"cuFuncSetSharedMemConfig", {"hipFuncSetSharedMemConfig", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunchCooperativeKernel due to different signatures + {"cuLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunchCooperativeKernelMultiDevice due to different signatures + {"cuLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaLaunchHostFunc + {"cuLaunchHostFunc", {"hipLaunchHostFunc", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunchKernel due to different signatures + {"cuLaunchKernel", {"hipModuleLaunchKernel", CONV_EXECUTION, API_DRIVER}}, - // Occupancy - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaOccupancyMaxActiveBlocksPerMultiprocessor) - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags) - {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaOccupancyMaxPotentialBlockSize) - {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaOccupancyMaxPotentialBlockSizeWithFlags) + // 5.18.Execution Control [DEPRECATED] + // no analogue + {"cuFuncSetBlockShape", {"hipFuncSetBlockShape", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuFuncSetSharedSize", {"hipFuncSetSharedSize", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaLaunch due to different signatures + {"cuLaunch", {"hipLaunch", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuLaunchGrid", {"hipLaunchGrid", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuLaunchGridAsync", {"hipLaunchGridAsync", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetf", {"hipParamSetf", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSeti", {"hipParamSeti", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetSize", {"hipParamSetSize", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetTexRef", {"hipParamSetTexRef", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuParamSetv", {"hipParamSetv", CONV_EXECUTION, API_DRIVER, HIP_UNSUPPORTED}}, - // Streams - {"cuStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_DRIVER}}, - {"cuStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamCreate", {"hipStreamCreate__", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaStreamCreate due to different signatures - {"cuStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamDestroy_v2", {"hipStreamDestroy", CONV_STREAM, API_DRIVER}}, - {"cuStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_DRIVER}}, - {"cuStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamQuery", {"hipStreamQuery", CONV_STREAM, API_DRIVER}}, - {"cuStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_DRIVER}}, - {"cuStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_DRIVER}}, - {"cuStreamWaitValue32", {"hipStreamWaitValue32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamWaitValue64", {"hipStreamWaitValue64", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamWriteValue32", {"hipStreamWriteValue32", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamWriteValue64", {"hipStreamWriteValue64", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.19. Graph Management + // cudaGraphAddChildGraphNode + {"cuGraphAddChildGraphNode", {"hipGraphAddChildGraphNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddDependencies + {"cuGraphAddDependencies", {"hipGraphAddDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddEmptyNode + {"cuGraphAddEmptyNode", {"hipGraphAddEmptyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddHostNode + {"cuGraphAddHostNode", {"hipGraphAddHostNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddKernelNode + {"cuGraphAddKernelNode", {"hipGraphAddKernelNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddMemcpyNode + {"cuGraphAddMemcpyNode", {"hipGraphAddMemcpyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphAddMemsetNode + {"cuGraphAddMemsetNode", {"hipGraphAddMemsetNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphChildGraphNodeGetGraph + {"cuGraphChildGraphNodeGetGraph", {"hipGraphChildGraphNodeGetGraph", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphClone + {"cuGraphClone", {"hipGraphClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphCreate + {"cuGraphCreate", {"hipGraphCreate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphDestroy + {"cuGraphDestroy", {"hipGraphDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphDestroyNode + {"cuGraphDestroyNode", {"hipGraphDestroyNode", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphExecDestroy + {"cuGraphExecDestroy", {"hipGraphExecDestroy", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphGetEdges + {"cuGraphGetEdges", {"hipGraphGetEdges", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphGetNodes + {"cuGraphGetNodes", {"hipGraphGetNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphGetRootNodes + {"cuGraphGetRootNodes", {"hipGraphGetRootNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphHostNodeGetParams + {"cuGraphHostNodeGetParams", {"hipGraphHostNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphHostNodeSetParams + {"cuGraphHostNodeSetParams", {"hipGraphHostNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphInstantiate + {"cuGraphInstantiate", {"hipGraphInstantiate", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphKernelNodeGetParams + {"cuGraphKernelNodeGetParams", {"hipGraphKernelNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphKernelNodeSetParams + {"cuGraphKernelNodeSetParams", {"hipGraphKernelNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphLaunch + {"cuGraphLaunch", {"hipGraphLaunch", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemcpyNodeGetParams + {"cuGraphMemcpyNodeGetParams", {"hipGraphMemcpyNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemcpyNodeSetParams + {"cuGraphMemcpyNodeSetParams", {"hipGraphMemcpyNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemsetNodeGetParams + {"cuGraphMemsetNodeGetParams", {"hipGraphMemsetNodeGetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphMemsetNodeSetParams + {"cuGraphMemsetNodeSetParams", {"hipGraphMemsetNodeSetParams", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeFindInClone + {"cuGraphNodeFindInClone", {"hipGraphNodeFindInClone", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeGetDependencies + {"cuGraphNodeGetDependencies", {"hipGraphNodeGetDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeGetDependentNodes + {"cuGraphNodeGetDependentNodes", {"hipGraphNodeGetDependentNodes", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphNodeGetType + {"cuGraphNodeGetType", {"hipGraphNodeGetType", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphRemoveDependencies + {"cuGraphRemoveDependencies", {"hipGraphRemoveDependencies", CONV_GRAPH, API_DRIVER, HIP_UNSUPPORTED}}, - // Memory management - {"cuArray3DCreate", {"hipArray3DCreate", CONV_MEMORY, API_DRIVER}}, - {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayCreate", {"hipArrayCreate", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayDestroy", {"hipArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuArrayGetDescriptor", {"hipArrayGetDescriptor", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcCloseMemHandle", {"hipIpcCloseMemHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcGetEventHandle", {"hipIpcGetEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcGetMemHandle", {"hipIpcGetMemHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcOpenEventHandle", {"hipIpcOpenEventHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuIpcOpenMemHandle", {"hipIpcOpenMemHandle", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAlloc_v2", {"hipMalloc", CONV_MEMORY, API_DRIVER}}, - {"cuMemAllocHost", {"hipMemAllocHost", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAllocManaged", {"hipMemAllocManaged", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemAllocPitch", {"hipMemAllocPitch__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemAllocPitch due to different signatures - {"cuMemcpy", {"hipMemcpy__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy due to different signatures - {"cuMemcpy2D", {"hipMemcpy2D__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy2D due to different signatures - {"cuMemcpy2DAsync", {"hipMemcpy2DAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy2DAsync due to different signatures - {"cuMemcpy2DUnaligned", {"hipMemcpy2DUnaligned", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpy3D", {"hipMemcpy3D__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3D due to different signatures - {"cuMemcpy3DAsync", {"hipMemcpy3DAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3DAsync due to different signatures - {"cuMemcpy3DPeer", {"hipMemcpy3DPeer__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3DPeer due to different signatures - {"cuMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpy3DPeerAsync due to different signatures - {"cuMemcpyAsync", {"hipMemcpyAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpyAsync due to different signatures - {"cuMemcpyAtoA", {"hipMemcpyAtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoD", {"hipMemcpyAtoD", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoH", {"hipMemcpyAtoH", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyAtoHAsync", {"hipMemcpyAtoHAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoA", {"hipMemcpyDtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyDtoD_v2", {"hipMemcpyDtoD", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoDAsync_v2", {"hipMemcpyDtoDAsync", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoH_v2", {"hipMemcpyDtoH", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyDtoHAsync_v2", {"hipMemcpyDtoHAsync", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoA", {"hipMemcpyHtoA", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoAAsync", {"hipMemcpyHtoAAsync", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemcpyHtoD_v2", {"hipMemcpyHtoD", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyHtoDAsync_v2", {"hipMemcpyHtoDAsync", CONV_MEMORY, API_DRIVER}}, - {"cuMemcpyPeerAsync", {"hipMemcpyPeerAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpyPeerAsync due to different signatures - {"cuMemcpyPeer", {"hipMemcpyPeer__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaMemcpyPeer due to different signatures - {"cuMemFree_v2", {"hipFree", CONV_MEMORY, API_DRIVER}}, - {"cuMemFreeHost", {"hipHostFree", CONV_MEMORY, API_DRIVER}}, - {"cuMemGetAddressRange", {"hipMemGetAddressRange", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemGetInfo_v2", {"hipMemGetInfo", CONV_MEMORY, API_DRIVER}}, - {"cuMemHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaHostAlloc) - {"cuMemHostGetDevicePointer", {"hipMemHostGetDevicePointer", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemHostGetFlags", {"hipMemHostGetFlags", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemHostRegister_v2", {"hipHostRegister", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaHostAlloc) - {"cuMemHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaHostUnregister) - {"cuMemsetD16_v2", {"hipMemsetD16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD16Async", {"hipMemsetD16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D16_v2", {"hipMemsetD2D16", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D16Async", {"hipMemsetD2D16Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D32_v2", {"hipMemsetD2D32", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D32Async", {"hipMemsetD2D32Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D8_v2", {"hipMemsetD2D8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD2D8Async", {"hipMemsetD2D8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD32_v2", {"hipMemset", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaMemset) - {"cuMemsetD32Async", {"hipMemsetAsync", CONV_MEMORY, API_DRIVER}}, // API_Runtime ANALOGUE (cudaMemsetAsync) - {"cuMemsetD8_v2", {"hipMemsetD8", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMemsetD8Async", {"hipMemsetD8Async", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMipmappedArrayCreate", {"hipMipmappedArrayCreate", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMipmappedArrayDestroy", {"hipMipmappedArrayDestroy", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuMipmappedArrayGetLevel", {"hipMipmappedArrayGetLevel", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.20. Occupancy + // cudaOccupancyMaxActiveBlocksPerMultiprocessor + {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", CONV_OCCUPANCY, API_DRIVER}}, + // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags + {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaOccupancyMaxPotentialBlockSize + {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", CONV_OCCUPANCY, API_DRIVER}}, + // cudaOccupancyMaxPotentialBlockSizeWithFlags + {"cuOccupancyMaxPotentialBlockSizeWithFlags", {"hipOccupancyMaxPotentialBlockSizeWithFlags", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, - // Unified Addressing - {"cuMemPrefetchAsync", {"hipMemPrefetchAsync__", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE (cudaMemPrefetchAsync has different signature) - {"cuMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // API_Runtime ANALOGUE (cudaMemAdvise) - {"cuMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // API_Runtime ANALOGUE (cudaMemRangeGetAttribute) - {"cuMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, // // API_Runtime ANALOGUE (cudaMemRangeGetAttributes) - {"cuPointerGetAttribute", {"hipPointerGetAttribute", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuPointerGetAttributes", {"hipPointerGetAttributes", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuPointerSetAttribute", {"hipPointerSetAttribute", CONV_MEMORY, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.21. Texture Reference Management + // no analogues + {"cuTexRefGetAddress", {"hipTexRefGetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetAddress_v2", {"hipTexRefGetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetAddressMode", {"hipTexRefGetAddressMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetArray", {"hipTexRefGetArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetBorderColor", {"hipTexRefGetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetFilterMode", {"hipTexRefGetFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetFlags", {"hipTexRefGetFlags", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetFormat", {"hipTexRefGetFormat", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMaxAnisotropy", {"hipTexRefGetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmapFilterMode", {"hipTexRefGetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmapLevelBias", {"hipTexRefGetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmapLevelClamp", {"hipTexRefGetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefGetMipmappedArray", {"hipTexRefGetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetAddress", {"hipTexRefSetAddress", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress_v2", {"hipTexRefSetAddress", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress2D", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress2D_v2", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddress2D_v3", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetAddressMode", {"hipTexRefSetAddressMode", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetArray", {"hipTexRefSetArray", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetBorderColor", {"hipTexRefSetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetFilterMode", {"hipTexRefSetFilterMode", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetFlags", {"hipTexRefSetFlags", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetFormat", {"hipTexRefSetFormat", CONV_TEXTURE, API_DRIVER}}, + {"cuTexRefSetMaxAnisotropy", {"hipTexRefSetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmapFilterMode", {"hipTexRefSetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmapLevelBias", {"hipTexRefSetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmapLevelClamp", {"hipTexRefSetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefSetMipmappedArray", {"hipTexRefSetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // Texture Reference Mngmnt + // 5.22. Texture Reference Management [DEPRECATED] + // no analogues + {"cuTexRefCreate", {"hipTexRefCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuTexRefDestroy", {"hipTexRefDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetAddress", {"hipTexRefGetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetAddressMode", {"hipTexRefGetAddressMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetArray", {"hipTexRefGetArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetBorderColor", {"hipTexRefGetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE - {"cuTexRefGetFilterMode", {"hipTexRefGetFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFlags", {"hipTexRefGetFlags", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetFormat", {"hipTexRefGetFormat", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMaxAnisotropy", {"hipTexRefGetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapFilterMode", {"hipTexRefGetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelBias", {"hipTexRefGetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmapLevelClamp", {"hipTexRefGetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefGetMipmappedArray", {"hipTexRefGetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddress", {"hipTexRefSetAddress", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddress2D", {"hipTexRefSetAddress2D", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetAddressMode", {"hipTexRefSetAddressMode", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetArray", {"hipTexRefSetArray", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetBorderColor", {"hipTexRefSetBorderColor", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE - {"cuTexRefSetFilterMode", {"hipTexRefSetFilterMode", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFlags", {"hipTexRefSetFlags", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetFormat", {"hipTexRefSetFormat", CONV_TEXTURE, API_DRIVER}}, - {"cuTexRefSetMaxAnisotropy", {"hipTexRefSetMaxAnisotropy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapFilterMode", {"hipTexRefSetMipmapFilterMode", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelBias", {"hipTexRefSetMipmapLevelBias", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmapLevelClamp", {"hipTexRefSetMipmapLevelClamp", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefSetMipmappedArray", {"hipTexRefSetMipmappedArray", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.23. Surface Reference Management + // no analogues + {"cuSurfRefGetArray", {"hipSurfRefGetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuSurfRefSetArray", {"hipSurfRefSetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - // Texture Reference Mngmnt [DEPRECATED] - {"cuTexRefCreate", {"hipTexRefCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexRefDestroy", {"hipTexRefDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.24. Texture Object Management + // no analogue + // NOTE: Not equal to cudaCreateTextureObject due to different signatures + {"cuTexObjectCreate", {"hipTexObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDestroyTextureObject + {"cuTexObjectDestroy", {"hipTexObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetTextureObjectResourceDesc due to different signatures + {"cuTexObjectGetResourceDesc", {"hipTexObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGetTextureObjectResourceViewDesc + {"cuTexObjectGetResourceViewDesc", {"hipTexObjectGetResourceViewDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetTextureObjectTextureDesc due to different signatures + {"cuTexObjectGetTextureDesc", {"hipTexObjectGetTextureDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // Surface Reference Mngmnt - {"cuSurfRefGetArray", {"hipSurfRefGetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfRefSetArray", {"hipSurfRefSetArray", CONV_SURFACE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.25. Surface Object Management + // no analogue + // NOTE: Not equal to cudaCreateSurfaceObject due to different signatures + {"cuSurfObjectCreate", {"hipSurfObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaDestroySurfaceObject + {"cuSurfObjectDestroy", {"hipSurfObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cudaGetSurfaceObjectResourceDesc due to different signatures + {"cuSurfObjectGetResourceDesc", {"hipSurfObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - // Texture Object Mngmnt - {"cuTexObjectCreate", {"hipTexObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectDestroy", {"hipTexObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectGetResourceDesc", {"hipTexObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectGetResourceViewDesc", {"hipTexObjectGetResourceViewDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuTexObjectGetTextureDesc", {"hipTexObjectGetTextureDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.26. Peer Context Memory Access + // no analogue + // NOTE: Not equal to cudaDeviceEnablePeerAccess due to different signatures + {"cuCtxEnablePeerAccess", {"hipCtxEnablePeerAccess", CONV_PEER, API_DRIVER}}, + // no analogue + // NOTE: Not equal to cudaDeviceDisablePeerAccess due to different signatures + {"cuCtxDisablePeerAccess", {"hipCtxDisablePeerAccess", CONV_PEER, API_DRIVER}}, + // cudaDeviceCanAccessPeer + {"cuDeviceCanAccessPeer", {"hipDeviceCanAccessPeer", CONV_PEER, API_DRIVER}}, + // cudaDeviceGetP2PAttribute + {"cuDeviceGetP2PAttribute", {"hipDeviceGetP2PAttribute", CONV_PEER, API_DRIVER, HIP_UNSUPPORTED}}, - // Surface Object Mngmnt - {"cuSurfObjectCreate", {"hipSurfObjectCreate", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfObjectDestroy", {"hipSurfObjectDestroy", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuSurfObjectGetResourceDesc", {"hipSurfObjectGetResourceDesc", CONV_TEXTURE, API_DRIVER, HIP_UNSUPPORTED}}, + // 5.27. Graphics Interoperability + // cudaGraphicsMapResources + {"cuGraphicsMapResources", {"hipGraphicsMapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedMipmappedArray + {"cuGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedPointer + {"cuGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedPointer + {"cuGraphicsResourceGetMappedPointer_v2", {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceSetMapFlags + {"cuGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceSetMapFlags + {"cuGraphicsResourceSetMapFlags_v2", {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsSubResourceGetMappedArray + {"cuGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsUnmapResources + {"cuGraphicsUnmapResources", {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsUnregisterResource + {"cuGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, - // Graphics Interoperability - {"cuGraphicsMapResources", {"hipGraphicsMapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsMapResources) - {"cuGraphicsResourceGetMappedMipmappedArray", {"hipGraphicsResourceGetMappedMipmappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedMipmappedArray) - {"cuGraphicsResourceGetMappedPointer", {"hipGraphicsResourceGetMappedPointer", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedPointer) - {"cuGraphicsResourceSetMapFlags", {"hipGraphicsResourceSetMapFlags", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceSetMapFlags) - {"cuGraphicsSubResourceGetMappedArray", {"hipGraphicsSubResourceGetMappedArray", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsSubResourceGetMappedArray) - {"cuGraphicsUnmapResources", {"hipGraphicsUnmapResources", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsUnmapResources) - {"cuGraphicsUnregisterResource", {"hipGraphicsUnregisterResource", CONV_GRAPHICS, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsUnregisterResource) + // 5.28. Profiler Control + // cudaProfilerInitialize + {"cuProfilerInitialize", {"hipProfilerInitialize", CONV_PROFILER, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaProfilerStart + {"cuProfilerStart", {"hipProfilerStart", CONV_PROFILER, API_DRIVER}}, + // cudaProfilerStop + {"cuProfilerStop", {"hipProfilerStop", CONV_PROFILER, API_DRIVER}}, - // Profiler - {"cuProfilerInitialize", {"hipProfilerInitialize", CONV_PROFILER, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaProfilerInitialize) - {"cuProfilerStart", {"hipProfilerStart", CONV_PROFILER, API_DRIVER}}, // API_Runtime ANALOGUE (cudaProfilerStart) - {"cuProfilerStop", {"hipProfilerStop", CONV_PROFILER, API_DRIVER}}, // API_Runtime ANALOGUE (cudaProfilerStop) + // 5.29. OpenGL Interoperability + // cudaGLGetDevices + {"cuGLGetDevices", {"hipGLGetDevices", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsGLRegisterBuffer + {"cuGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsGLRegisterImage + {"cuGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaWGLGetDevice + {"cuWGLGetDevice", {"hipWGLGetDevice", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGLGetDevices", {"hipGLGetDevices", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLGetDevices) - {"cuGraphicsGLRegisterBuffer", {"hipGraphicsGLRegisterBuffer", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsGLRegisterBuffer) - {"cuGraphicsGLRegisterImage", {"hipGraphicsGLRegisterImage", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsGLRegisterImage) - {"cuWGLGetDevice", {"hipWGLGetDevice", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaWGLGetDevice) + // 5.29. OpenGL Interoperability [DEPRECATED] + // no analogue + {"cuGLCtxCreate", {"hipGLCtxCreate", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuGLInit", {"hipGLInit", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // NOTE: Not equal to cudaGLMapBufferObject due to different signatures + {"cuGLMapBufferObject", {"hipGLMapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // NOTE: Not equal to cudaGLMapBufferObjectAsync due to different signatures + {"cuGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLRegisterBufferObject + {"cuGLRegisterBufferObject", {"hipGLRegisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLSetBufferObjectMapFlags + {"cuGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLUnmapBufferObject + {"cuGLUnmapBufferObject", {"hipGLUnmapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLUnmapBufferObjectAsync + {"cuGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGLUnregisterBufferObject + {"cuGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuGLCtxCreate", {"hipGLCtxCreate", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuGLInit", {"hipGLInit", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuGLMapBufferObject", {"hipGLMapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaGLMapBufferObject due to different signatures - {"cuGLMapBufferObjectAsync", {"hipGLMapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // Not equal to cudaGLMapBufferObjectAsync due to different signatures - {"cuGLRegisterBufferObject", {"hipGLRegisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLRegisterBufferObject) - {"cuGLSetBufferObjectMapFlags", {"hipGLSetBufferObjectMapFlags", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLSetBufferObjectMapFlags) - {"cuGLUnmapBufferObject", {"hipGLUnmapBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLUnmapBufferObject) - {"cuGLUnmapBufferObjectAsync", {"hipGLUnmapBufferObjectAsync", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLUnmapBufferObjectAsync) - {"cuGLUnregisterBufferObject", {"hipGLUnregisterBufferObject", CONV_OPENGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGLUnregisterBufferObject) + // 5.30.Direct3D 9 Interoperability + // no analogue + {"cuD3D9CtxCreate", {"hipD3D9CtxCreate", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuD3D9CtxCreateOnDevice", {"hipD3D9CtxCreateOnDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9GetDevice + {"cuD3D9GetDevice", {"hipD3D9GetDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9GetDevices + {"cuD3D9GetDevices", {"hipD3D9GetDevices", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9GetDirect3DDevice + {"cuD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsD3D9RegisterResource + {"cuGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuD3D9CtxCreate", {"hipD3D9CtxCreate", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D9CtxCreateOnDevice", {"hipD3D9CtxCreateOnDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D9GetDevice", {"hipD3D9GetDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9GetDevice) - {"cuD3D9GetDevices", {"hipD3D9GetDevices", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9GetDevices) - {"cuD3D9GetDirect3DDevice", {"hipD3D9GetDirect3DDevice", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9GetDirect3DDevice) - {"cuGraphicsD3D9RegisterResource", {"hipGraphicsD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsD3D9RegisterResource) + // 5.30.Direct3D 9 Interoperability [DEPRECATED] + // cudaD3D9MapResources + {"cuD3D9MapResources", {"hipD3D9MapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9RegisterResource + {"cuD3D9RegisterResource", {"hipD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedArray + {"cuD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedPitch + {"cuD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedPointer + {"cuD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetMappedSize + {"cuD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceGetSurfaceDimensions + {"cuD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9ResourceSetMapFlags + {"cuD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9UnmapResources + {"cuD3D9UnmapResources", {"hipD3D9UnmapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D9UnregisterResource + {"cuD3D9UnregisterResource", {"hipD3D9UnregisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, - {"cuD3D9MapResources", {"hipD3D9MapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9MapResources) - {"cuD3D9RegisterResource", {"hipD3D9RegisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9RegisterResource) - {"cuD3D9ResourceGetMappedArray", {"hipD3D9ResourceGetMappedArray", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedArray) - {"cuD3D9ResourceGetMappedPitch", {"hipD3D9ResourceGetMappedPitch", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedPitch) - {"cuD3D9ResourceGetMappedPointer", {"hipD3D9ResourceGetMappedPointer", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedPointer) - {"cuD3D9ResourceGetMappedSize", {"hipD3D9ResourceGetMappedSize", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetMappedSize) - {"cuD3D9ResourceGetSurfaceDimensions", {"hipD3D9ResourceGetSurfaceDimensions", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceGetSurfaceDimensions) - {"cuD3D9ResourceSetMapFlags", {"hipD3D9ResourceSetMapFlags", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9ResourceSetMapFlags) - {"cuD3D9UnmapResources", {"hipD3D9UnmapResources", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9UnmapResources) - {"cuD3D9UnregisterResource", {"hipD3D9UnregisterResource", CONV_D3D9, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D9UnregisterResource) + // 5.31. Direct3D 10 Interoperability + // cudaD3D10GetDevice + {"cuD3D10GetDevice", {"hipD3D10GetDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10GetDevices + {"cuD3D10GetDevices", {"hipD3D10GetDevices", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsD3D10RegisterResource + {"cuGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 10 Interoperability - {"cuD3D10GetDevice", {"hipD3D10GetDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10GetDevice) - {"cuD3D10GetDevices", {"hipD3D10GetDevices", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10GetDevices) - {"cuGraphicsD3D10RegisterResource", {"hipGraphicsD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsD3D10RegisterResource) + // 5.31. Direct3D 10 Interoperability [DEPRECATED] + // no analogue + {"cuD3D10CtxCreate", {"hipD3D10CtxCreate", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuD3D10CtxCreateOnDevice", {"hipD3D10CtxCreateOnDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10GetDirect3DDevice + {"cuD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10MapResources + {"cuD3D10MapResources", {"hipD3D10MapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10RegisterResource + {"cuD3D10RegisterResource", {"hipD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedArray + {"cuD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedPitch + {"cuD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedPointer + {"cuD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetMappedSize + {"cuD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceGetSurfaceDimensions + {"cuD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10ResourceSetMapFlags + {"cuD310ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10UnmapResources + {"cuD3D10UnmapResources", {"hipD3D10UnmapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D10UnregisterResource + {"cuD3D10UnregisterResource", {"hipD3D10UnregisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 10 Interoperability [DEPRECATED] - {"cuD3D10CtxCreate", {"hipD3D10CtxCreate", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D10CtxCreateOnDevice", {"hipD3D10CtxCreateOnDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D10GetDirect3DDevice", {"hipD3D10GetDirect3DDevice", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10GetDirect3DDevice) - {"cuD3D10MapResources", {"hipD3D10MapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10MapResources) - {"cuD3D10RegisterResource", {"hipD3D10RegisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10RegisterResource) - {"cuD3D10ResourceGetMappedArray", {"hipD3D10ResourceGetMappedArray", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedArray) - {"cuD3D10ResourceGetMappedPitch", {"hipD3D10ResourceGetMappedPitch", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedPitch) - {"cuD3D10ResourceGetMappedPointer", {"hipD3D10ResourceGetMappedPointer", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedPointer) - {"cuD3D10ResourceGetMappedSize", {"hipD3D10ResourceGetMappedSize", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetMappedSize) - {"cuD3D10ResourceGetSurfaceDimensions", {"hipD3D10ResourceGetSurfaceDimensions", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceGetSurfaceDimensions) - {"cuD310ResourceSetMapFlags", {"hipD3D10ResourceSetMapFlags", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10ResourceSetMapFlags) - {"cuD3D10UnmapResources", {"hipD3D10UnmapResources", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10UnmapResources) - {"cuD3D10UnregisterResource", {"hipD3D10UnregisterResource", CONV_D3D10, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D10UnregisterResource) + // 5.32. Direct3D 11 Interoperability + // cudaD3D11GetDevice + {"cuD3D11GetDevice", {"hipD3D11GetDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D11GetDevices + {"cuD3D11GetDevices", {"hipD3D11GetDevices", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsD3D11RegisterResource + {"cuGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 11 Interoperability - {"cuD3D11GetDevice", {"hipD3D11GetDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D11GetDevice) - {"cuD3D11GetDevices", {"hipD3D11GetDevices", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D11GetDevices) - {"cuGraphicsD3D11RegisterResource", {"hipGraphicsD3D11RegisterResource", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsD3D11RegisterResource) + // 5.32. Direct3D 11 Interoperability [DEPRECATED] + // no analogue + {"cuD3D11CtxCreate", {"hipD3D11CtxCreate", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuD3D11CtxCreateOnDevice", {"hipD3D11CtxCreateOnDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaD3D11GetDirect3DDevice + {"cuD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, - // Direct3D 11 Interoperability [DEPRECATED] - {"cuD3D11CtxCreate", {"hipD3D11CtxCreate", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D11CtxCreateOnDevice", {"hipD3D11CtxCreateOnDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuD3D11GetDirect3DDevice", {"hipD3D11GetDirect3DDevice", CONV_D3D11, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaD3D11GetDirect3DDevice) + // 5.33. VDPAU Interoperability + // cudaGraphicsVDPAURegisterOutputSurface + {"cuGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsVDPAURegisterVideoSurface + {"cuGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaVDPAUGetDevice + {"cuVDPAUGetDevice", {"hipVDPAUGetDevice", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, + // no analogue + {"cuVDPAUCtxCreate", {"hipVDPAUCtxCreate", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, - // VDPAU Interoperability - {"cuGraphicsVDPAURegisterOutputSurface", {"hipGraphicsVDPAURegisterOutputSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsVDPAURegisterOutputSurface) - {"cuGraphicsVDPAURegisterVideoSurface", {"hipGraphicsVDPAURegisterVideoSurface", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsVDPAURegisterVideoSurface) - {"cuVDPAUGetDevice", {"hipVDPAUGetDevice", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaVDPAUGetDevice) - {"cuVDPAUCtxCreate", {"hipVDPAUCtxCreate", CONV_VDPAU, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - - // EGL Interoperability - {"cuEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerAcquireFrame) - {"cuEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerConnect) - {"cuEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerConnectWithFlags) - {"cuEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // no API_Runtime ANALOGUE - {"cuEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamConsumerReleaseFrame) - {"cuEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerConnect) - {"cuEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerDisconnect) - {"cuEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerPresentFrame) - {"cuEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaEGLStreamProducerReturnFrame) - {"cuGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsEGLRegisterImage) - {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, // API_Runtime ANALOGUE (cudaGraphicsResourceGetMappedEglFrame) - - -////////////////////////////// cuComplex API ////////////////////////////// - {"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}}, - {"cuDoubleComplex", {"hipDoubleComplex", CONV_TYPE, API_COMPLEX}}, - {"cuComplex", {"hipComplex", CONV_TYPE, API_COMPLEX}}, - - {"cuCrealf", {"hipCrealf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimagf", {"hipCimagf", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuFloatComplex", {"make_hipFloatComplex", CONV_COMPLEX, API_COMPLEX}}, - {"cuConjf", {"hipConjf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCaddf", {"hipCaddf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsubf", {"hipCsubf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmulf", {"hipCmulf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdivf", {"hipCdivf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabsf", {"hipCabsf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCreal", {"hipCreal", CONV_COMPLEX, API_COMPLEX}}, - {"cuCimag", {"hipCimag", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuDoubleComplex", {"make_hipDoubleComplex", CONV_COMPLEX, API_COMPLEX}}, - {"cuConj", {"hipConj", CONV_COMPLEX, API_COMPLEX}}, - {"cuCadd", {"hipCadd", CONV_COMPLEX, API_COMPLEX}}, - {"cuCsub", {"hipCsub", CONV_COMPLEX, API_COMPLEX}}, - {"cuCmul", {"hipCmul", CONV_COMPLEX, API_COMPLEX}}, - {"cuCdiv", {"hipCdiv", CONV_COMPLEX, API_COMPLEX}}, - {"cuCabs", {"hipCabs", CONV_COMPLEX, API_COMPLEX}}, - {"make_cuComplex", {"make_hipComplex", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexFloatToDouble", {"hipComplexFloatToDouble", CONV_COMPLEX, API_COMPLEX}}, - {"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}}, - {"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}}, -}; \ No newline at end of file + // 5.34. EGL Interoperability + // cudaEGLStreamConsumerAcquireFrame + {"cuEGLStreamConsumerAcquireFrame", {"hipEGLStreamConsumerAcquireFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerConnect + {"cuEGLStreamConsumerConnect", {"hipEGLStreamConsumerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerConnectWithFlags + {"cuEGLStreamConsumerConnectWithFlags", {"hipEGLStreamConsumerConnectWithFlags", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerDisconnect + {"cuEGLStreamConsumerDisconnect", {"hipEGLStreamConsumerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamConsumerReleaseFrame + {"cuEGLStreamConsumerReleaseFrame", {"hipEGLStreamConsumerReleaseFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerConnect + {"cuEGLStreamProducerConnect", {"hipEGLStreamProducerConnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerDisconnect + {"cuEGLStreamProducerDisconnect", {"hipEGLStreamProducerDisconnect", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerPresentFrame + {"cuEGLStreamProducerPresentFrame", {"hipEGLStreamProducerPresentFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEGLStreamProducerReturnFrame + {"cuEGLStreamProducerReturnFrame", {"hipEGLStreamProducerReturnFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsEGLRegisterImage + {"cuGraphicsEGLRegisterImage", {"hipGraphicsEGLRegisterImage", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaGraphicsResourceGetMappedEglFrame + {"cuGraphicsResourceGetMappedEglFrame", {"hipGraphicsResourceGetMappedEglFrame", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, + // cudaEventCreateFromEGLSync + {"cuEventCreateFromEGLSync", {"hipEventCreateFromEGLSync", CONV_EGL, API_DRIVER, HIP_UNSUPPORTED}}, +}; diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp index 07b78ac738..5438aab3de 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp @@ -80,8 +80,10 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP{ {"CUDA_TEXTURE_DESC_st", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUDA_TEXTURE_DESC", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}}, - {"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}}, + // no analogue + // NOTE: cudaDeviceProp differs + {"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, + {"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // cudaIpcEventHandle_st {"CUipcEventHandle_st", {"ihipIpcEventHandle_t", CONV_TYPE, API_DRIVER}}, diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp index 12427b2ff4..3031cadf64 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp @@ -1,6 +1,6 @@ #include "CUDA2HIP.h" -// Map of all functions +// Map of all CUDA Runtime API functions const std::map CUDA_RUNTIME_FUNCTION_MAP{ // Error API {"cudaGetLastError", {"hipGetLastError", CONV_ERROR, API_RUNTIME}}, @@ -9,29 +9,49 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaGetErrorString", {"hipGetErrorString", CONV_ERROR, API_RUNTIME}}, // memcpy functions + // no analogue + // NOTE: Not equal to cuMemcpy due to different signatures {"cudaMemcpy", {"hipMemcpy", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyToArray", {"hipMemcpyToArray", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyToSymbol", {"hipMemcpyToSymbol", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", CONV_MEMORY, API_RUNTIME}}, + {"cudaMemcpyAsync", {"hipMemcpyAsync", CONV_MEMORY, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuMemcpy2D due to different signatures {"cudaMemcpy2D", {"hipMemcpy2D", CONV_MEMORY, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuMemcpy2DAsync due to different signatures {"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpy2DToArray", {"hipMemcpy2DToArray", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpy2DFromArray", {"hipMemcpy2DFromArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpy2DFromArrayAsync", {"hipMemcpy2DFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpy2DToArrayAsync", {"hipMemcpy2DToArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpy3D due to different signatures {"cudaMemcpy3D", {"hipMemcpy3D", CONV_MEMORY, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuMemcpy3DAsync due to different signatures {"cudaMemcpy3DAsync", {"hipMemcpy3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpy3DPeer due to different signatures {"cudaMemcpy3DPeer", {"hipMemcpy3DPeer", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpy3DPeerAsync due to different signatures {"cudaMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMemcpyAtoA due to different signatures {"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpyFromArrayAsync", {"hipMemcpyFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMemcpyFromSymbol", {"hipMemcpyFromSymbol", CONV_MEMORY, API_RUNTIME}}, {"cudaMemcpyFromSymbolAsync", {"hipMemcpyFromSymbolAsync", CONV_MEMORY, API_RUNTIME}}, - {"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // - {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // - {"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // + // cuMemAdvise + {"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuMemRangeGetAttribute + {"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuMemRangeGetAttributes + {"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // memset {"cudaMemset", {"hipMemset", CONV_MEMORY, API_RUNTIME}}, @@ -42,13 +62,17 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaMemset3DAsync", {"hipMemset3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // Memory management + // cuMemGetInfo {"cudaMemGetInfo", {"hipMemGetInfo", CONV_MEMORY, API_RUNTIME}}, {"cudaArrayGetInfo", {"hipArrayGetInfo", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures {"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME}}, {"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME}}, - {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // // API_Driver ANALOGUE (cuMemPrefetchAsync) + // TODO: double check cuMemPrefetchAsync + {"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // malloc {"cudaMalloc", {"hipMalloc", CONV_MEMORY, API_RUNTIME}}, @@ -57,15 +81,22 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaMalloc3D", {"hipMalloc3D", CONV_MEMORY, API_RUNTIME}}, {"cudaMalloc3DArray", {"hipMalloc3DArray", CONV_MEMORY, API_RUNTIME}}, {"cudaMallocManaged", {"hipMallocManaged", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, + // no analogue + // NOTE: Not equal to cuMipmappedArrayCreate due to different signatures {"cudaMallocMipmappedArray", {"hipMallocMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, {"cudaMallocPitch", {"hipMallocPitch", CONV_MEMORY, API_RUNTIME}}, + // cuMemFree {"cudaFree", {"hipFree", CONV_MEMORY, API_RUNTIME}}, + // cuMemFreeHost {"cudaFreeHost", {"hipHostFree", CONV_MEMORY, API_RUNTIME}}, {"cudaFreeArray", {"hipFreeArray", CONV_MEMORY, API_RUNTIME}}, + // cuMemHostRegister {"cudaHostRegister", {"hipHostRegister", CONV_MEMORY, API_RUNTIME}}, + // cuMemHostUnregister {"cudaHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_RUNTIME}}, - // hipHostAlloc deprecated - use hipHostMalloc instead + // cuMemHostAlloc + // NOTE: hipHostAlloc deprecated - use hipHostMalloc instead {"cudaHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_RUNTIME}}, // make memory functions @@ -74,35 +105,81 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"make_cudaPos", {"make_hipPos", CONV_MEMORY, API_RUNTIME}}, // Host Register Flags + // cuMemHostGetFlags {"cudaHostGetFlags", {"hipHostGetFlags", CONV_MEMORY, API_RUNTIME}}, // Events - {"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}}, - {"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}}, - {"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}}, - {"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}}, - {"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}}, - {"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}}, - {"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}}, + // no analogue + // NOTE: Not equal to cuEventCreate due to different signatures + {"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}}, + // cuEventCreate + {"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}}, + // cuEventDestroy + {"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}}, + // cuEventRecord + {"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}}, + // cuEventElapsedTime + {"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}}, + // cuEventSynchronize + {"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}}, + // cuEventQuery + {"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}}, + + // 5.6. External Resource Interoperability + // cuDestroyExternalMemory + {"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuDestroyExternalSemaphore + {"cudaDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuExternalMemoryGetMappedBuffer + {"cudaExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuExternalMemoryGetMappedMipmappedArray + {"cudaExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuImportExternalMemory + {"cudaImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuImportExternalSemaphore + {"cudaImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuSignalExternalSemaphoresAsync + {"cudaSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuWaitExternalSemaphoresAsync + {"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}}, // Streams + // no analogue + // NOTE: Not equal to cuStreamCreate due to different signatures {"cudaStreamCreate", {"hipStreamCreate", CONV_STREAM, API_RUNTIME}}, + // cuStreamCreate {"cudaStreamCreateWithFlags", {"hipStreamCreateWithFlags", CONV_STREAM, API_RUNTIME}}, - {"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamCreateWithPriority + {"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME}}, + // cuStreamDestroy {"cudaStreamDestroy", {"hipStreamDestroy", CONV_STREAM, API_RUNTIME}}, + // cuStreamWaitEvent {"cudaStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME}}, + // cuStreamSynchronize {"cudaStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME}}, + // cuStreamGetFlags {"cudaStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME}}, + // cuStreamQuery {"cudaStreamQuery", {"hipStreamQuery", CONV_STREAM, API_RUNTIME}}, + // cuStreamAddCallback {"cudaStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_RUNTIME}}, + // cuStreamAttachMemAsync {"cudaStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, - {"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamBeginCapture + {"cudaStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamEndCapture + {"cudaStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamIsCapturing + {"cudaStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuStreamGetPriority + {"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME}}, // Other synchronization {"cudaDeviceSynchronize", {"hipDeviceSynchronize", CONV_DEVICE, API_RUNTIME}}, {"cudaDeviceReset", {"hipDeviceReset", CONV_DEVICE, API_RUNTIME}}, {"cudaSetDevice", {"hipSetDevice", CONV_DEVICE, API_RUNTIME}}, {"cudaGetDevice", {"hipGetDevice", CONV_DEVICE, API_RUNTIME}}, + // cuDeviceGetCount {"cudaGetDeviceCount", {"hipGetDeviceCount", CONV_DEVICE, API_RUNTIME}}, {"cudaChooseDevice", {"hipChooseDevice", CONV_DEVICE, API_RUNTIME}}, @@ -118,20 +195,25 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_RUNTIME}}, // Pointer Attributes - // struct cudaPointerAttributes - {"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_MEMORY, API_RUNTIME}}, - + // no analogue + // NOTE: Not equal to cuPointerGetAttributes due to different signatures + {"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_ADDRESSING, API_RUNTIME}}, + // cuMemHostGetDevicePointer {"cudaHostGetDevicePointer", {"hipHostGetDevicePointer", CONV_MEMORY, API_RUNTIME}}, // Device {"cudaGetDeviceProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_RUNTIME}}, + // cuDeviceGetPCIBusId {"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_DEVICE, API_RUNTIME}}, + // cuDeviceGetByPCIBusId {"cudaDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_RUNTIME}}, - {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuCtxGetStreamPriorityRange + {"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME}}, {"cudaSetValidDevices", {"hipSetValidDevices", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, // Device Flags - {"cudaGetDeviceFlags", {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}}, + // cuCtxGetFlags + {"cudaGetDeviceFlags", {"hipCtxGetFlags", CONV_DEVICE, API_RUNTIME}}, {"cudaSetDeviceFlags", {"hipSetDeviceFlags", CONV_DEVICE, API_RUNTIME}}, // Cache config @@ -179,7 +261,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ // {"cudaThreadGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", CONV_DEVICE, API_RUNTIME}}, // {"cudaThreadSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", CONV_DEVICE, API_RUNTIME}}, - + // cuCtxGetLimit {"cudaDeviceGetLimit", {"hipDeviceGetLimit", CONV_DEVICE, API_RUNTIME}}, // Profiler diff --git a/projects/hip/hipify-clang/src/HipifyAction.cpp b/projects/hip/hipify-clang/src/HipifyAction.cpp index ce185c39a8..b370df794e 100644 --- a/projects/hip/hipify-clang/src/HipifyAction.cpp +++ b/projects/hip/hipify-clang/src/HipifyAction.cpp @@ -270,14 +270,14 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc if (numArgs > 0) { OS << ", "; // Start of the first argument. - clang::SourceLocation argStart = launchKernel->getArg(0)->getLocStart(); + clang::SourceLocation argStart = llcompat::getBeginLoc(launchKernel->getArg(0)); // End of the last argument. - clang::SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd(); + clang::SourceLocation argEnd = llcompat::getEndLoc(launchKernel->getArg(numArgs - 1)); OS << readSourceText(*SM, {argStart, argEnd}); } OS << ")"; - clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()}); + clang::SourceRange replacementRange = getWriteRange(*SM, {llcompat::getBeginLoc(launchKernel), llcompat::getEndLoc(launchKernel)}); clang::SourceLocation launchStart = replacementRange.getBegin(); clang::SourceLocation launchEnd = replacementRange.getEnd(); size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchStart); @@ -320,8 +320,8 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match } if (!typeName.empty()) { - clang::SourceLocation slStart = sharedVar->getLocStart(); - clang::SourceLocation slEnd = sharedVar->getLocEnd(); + clang::SourceLocation slStart = llcompat::getBeginLoc(sharedVar->getTypeSourceInfo()->getTypeLoc()); + clang::SourceLocation slEnd = llcompat::getEndLoc(sharedVar->getTypeSourceInfo()->getTypeLoc()); clang::SourceManager* SM = Result.SourceManager; size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1; std::string varName = sharedVar->getNameAsString(); diff --git a/projects/hip/hipify-clang/src/HipifyAction.h b/projects/hip/hipify-clang/src/HipifyAction.h index 7b54dddf54..9d30a72592 100644 --- a/projects/hip/hipify-clang/src/HipifyAction.h +++ b/projects/hip/hipify-clang/src/HipifyAction.h @@ -9,6 +9,7 @@ #include "Statistics.h" namespace ct = clang::tooling; +using namespace llvm; /** * A FrontendAction that hipifies CUDA programs. diff --git a/projects/hip/hipify-clang/src/LLVMCompat.cpp b/projects/hip/hipify-clang/src/LLVMCompat.cpp index 4ab62310d6..611bb28cbe 100644 --- a/projects/hip/hipify-clang/src/LLVMCompat.cpp +++ b/projects/hip/hipify-clang/src/LLVMCompat.cpp @@ -8,11 +8,11 @@ void PrintStackTraceOnErrorSignal() { #if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8) llvm::sys::PrintStackTraceOnErrorSignal(); #else - llvm::sys::PrintStackTraceOnErrorSignal(clang::StringRef()); + llvm::sys::PrintStackTraceOnErrorSignal(StringRef()); #endif } -ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file) { +ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file) { #if LLVM_VERSION_MAJOR > 3 // getReplacements() now returns a map from filename to Replacements - so create an entry // for this source file and return a reference to it. @@ -40,4 +40,36 @@ void EnterPreprocessorTokenStream(clang::Preprocessor& _pp, const clang::Token * #endif } +clang::SourceLocation getBeginLoc(const clang::Stmt* stmt) { +#if LLVM_VERSION_MAJOR < 8 + return stmt->getLocStart(); +#else + return stmt->getBeginLoc(); +#endif +} + +clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc) { +#if LLVM_VERSION_MAJOR < 8 + return typeLoc.getLocStart(); +#else + return typeLoc.getBeginLoc(); +#endif +} + +clang::SourceLocation getEndLoc(const clang::Stmt* stmt) { +#if LLVM_VERSION_MAJOR < 8 + return stmt->getLocEnd(); +#else + return stmt->getEndLoc(); +#endif +} + +clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc) { +#if LLVM_VERSION_MAJOR < 8 + return typeLoc.getLocEnd(); +#else + return typeLoc.getEndLoc(); +#endif +} + } // namespace llcompat diff --git a/projects/hip/hipify-clang/src/LLVMCompat.h b/projects/hip/hipify-clang/src/LLVMCompat.h index 9f82e36a1f..a43af857bf 100644 --- a/projects/hip/hipify-clang/src/LLVMCompat.h +++ b/projects/hip/hipify-clang/src/LLVMCompat.h @@ -25,15 +25,23 @@ namespace llcompat { #define LLVM_DEBUG(X) DEBUG(X) #endif +clang::SourceLocation getBeginLoc(const clang::Stmt* stmt); +clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc); + +clang::SourceLocation getEndLoc(const clang::Stmt* stmt); +clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc); + void PrintStackTraceOnErrorSignal(); +using namespace llvm; + /** * Get the replacement map for a given filename in a RefactoringTool. * * Older LLVM versions don't actually support multiple filenames, so everything all gets * smushed together. It is the caller's responsibility to cope with this. */ -ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file); +ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file); /** * Add a Replacement to a Replacements. diff --git a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp index e678f25aa2..5edddad6c5 100644 --- a/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp +++ b/projects/hip/include/hip/hcc_detail/functional_grid_launch.hpp @@ -33,6 +33,7 @@ THE SOFTWARE. #include #include +#include #include #include #include @@ -56,7 +57,9 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - std::vector kernarg, const std::tuple&) { + const std::tuple&, + const std::vector>&, + std::vector kernarg) { return kernarg; } @@ -65,7 +68,9 @@ template < typename... Ts, typename std::enable_if::type* = nullptr> inline std::vector make_kernarg( - std::vector kernarg, const std::tuple& formals) { + const std::tuple& formals, + const std::vector>& size_align, + std::vector kernarg) { using T = typename std::tuple_element>::type; static_assert( @@ -80,24 +85,44 @@ inline std::vector make_kernarg( #endif kernarg.resize(round_up_to_next_multiple_nonnegative( - kernarg.size(), alignof(T)) + sizeof(T)); + kernarg.size(), size_align[n].second) + + size_align[n].first); - new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get(formals)}; + std::memcpy( + kernarg.data() + kernarg.size() - size_align[n].first, + &std::get(formals), + size_align[n].first); - return make_kernarg(std::move(kernarg), formals); + return make_kernarg(formals, size_align, std::move(kernarg)); } template inline std::vector make_kernarg( - void (*)(Formals...), std::tuple actuals) { + void (*kernel)(Formals...), std::tuple actuals) { static_assert(sizeof...(Formals) == sizeof...(Actuals), "The count of formal arguments must match the count of actuals."); + if (sizeof...(Formals) == 0) return {}; + + const auto it = function_names().find( + reinterpret_cast(kernel)); + + if (it == function_names().cend()) { + throw std::runtime_error{"Undefined __global__ function."}; + } + + const auto it1 = kernargs().find(it->second); + + if (it1 == kernargs().end()) { + throw std::runtime_error{ + "Missing metadata for __global__ function: " + it->second}; + } + std::tuple to_formals{std::move(actuals)}; std::vector kernarg; kernarg.reserve(sizeof(to_formals)); - return make_kernarg<0>(std::move(kernarg), to_formals); + return make_kernarg<0>(to_formals, it1->second, std::move(kernarg)); } void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks, diff --git a/projects/hip/include/hip/hcc_detail/hip_memory.h b/projects/hip/include/hip/hcc_detail/hip_memory.h index 2c9ec1b7c3..866b9e879e 100644 --- a/projects/hip/include/hip/hcc_detail/hip_memory.h +++ b/projects/hip/include/hip/hcc_detail/hip_memory.h @@ -41,8 +41,14 @@ THE SOFTWARE. #define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE) +#if __HIP__ && __HIP_DEVICE_COMPILE__ +__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP]; +__attribute__((weak)) __device__ + uint32_t __hip_device_page_flag[__HIP_NUM_PAGES]; +#else extern __device__ char __hip_device_heap[]; extern __device__ uint32_t __hip_device_page_flag[]; +#endif extern "C" inline __device__ void* __hip_malloc(size_t size) { char* heap = (char*)__hip_device_heap; diff --git a/projects/hip/include/hip/hcc_detail/math_functions.h b/projects/hip/include/hip/hcc_detail/math_functions.h index b12e7aca89..557257b2b0 100644 --- a/projects/hip/include/hip/hcc_detail/math_functions.h +++ b/projects/hip/include/hip/hcc_detail/math_functions.h @@ -514,38 +514,41 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); } __DEVICE__ inline float __expf(float x) { return __ocml_exp_f32(x); } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline -float __fadd_rd(float x, float y) { return __ocml_add_rtp_f32(x, y); } +float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); } __DEVICE__ inline float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); } __DEVICE__ inline -float __fadd_ru(float x, float y) { return __ocml_add_rtn_f32(x, y); } +float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); } __DEVICE__ inline float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); } __DEVICE__ inline -float __fdiv_rd(float x, float y) { return x / y; } +float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); } __DEVICE__ inline -float __fdiv_rn(float x, float y) { return x / y; } +float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); } __DEVICE__ inline -float __fdiv_ru(float x, float y) { return x / y; } +float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); } __DEVICE__ inline -float __fdiv_rz(float x, float y) { return x / y; } +float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); } +#endif __DEVICE__ inline float __fdividef(float x, float y) { return x / y; } +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline float __fmaf_rd(float x, float y, float z) { - return __ocml_fma_rtp_f32(x, y, z); + return __ocml_fma_rtn_f32(x, y, z); } __DEVICE__ inline @@ -557,7 +560,7 @@ __DEVICE__ inline float __fmaf_ru(float x, float y, float z) { - return __ocml_fma_rtn_f32(x, y, z); + return __ocml_fma_rtp_f32(x, y, z); } __DEVICE__ inline @@ -567,13 +570,13 @@ float __fmaf_rz(float x, float y, float z) } __DEVICE__ inline -float __fmul_rd(float x, float y) { return __ocml_mul_rtp_f32(x, y); } +float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); } __DEVICE__ inline float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); } __DEVICE__ inline -float __fmul_ru(float x, float y) { return __ocml_mul_rtn_f32(x, y); } +float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); } __DEVICE__ inline float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); } @@ -594,28 +597,29 @@ inline float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); } __DEVICE__ inline -float __fsqrt_rd(float x) { return __ocml_sqrt_f32(x); } +float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); } __DEVICE__ inline -float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); } +float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); } __DEVICE__ inline -float __fsqrt_ru(float x) { return __ocml_sqrt_f32(x); } +float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); } __DEVICE__ inline -float __fsqrt_rz(float x) { return __ocml_sqrt_f32(x); } +float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); } __DEVICE__ inline -float __fsub_rd(float x, float y) { return __ocml_sub_rtp_f32(x, y); } +float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); } __DEVICE__ inline float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); } __DEVICE__ inline -float __fsub_ru(float x, float y) { return __ocml_sub_rtn_f32(x, y); } +float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); } __DEVICE__ inline float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); } +#endif __DEVICE__ inline float __log10f(float x) { return __ocml_log10_f32(x); } @@ -1034,39 +1038,40 @@ double yn(int n, double x) } // BEGIN INTRINSICS +#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ inline -double __dadd_rd(double x, double y) { return __ocml_add_rtp_f64(x, y); } +double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); } __DEVICE__ inline double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); } __DEVICE__ inline -double __dadd_ru(double x, double y) { return __ocml_add_rtn_f64(x, y); } +double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); } __DEVICE__ inline double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); } __DEVICE__ inline -double __ddiv_rd(double x, double y) { return x / y; } +double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); } __DEVICE__ inline -double __ddiv_rn(double x, double y) { return x / y; } +double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); } __DEVICE__ inline -double __ddiv_ru(double x, double y) { return x / y; } +double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); } __DEVICE__ inline -double __ddiv_rz(double x, double y) { return x / y; } +double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); } __DEVICE__ inline -double __dmul_rd(double x, double y) { return __ocml_mul_rtp_f64(x, y); } +double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); } __DEVICE__ inline double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); } __DEVICE__ inline -double __dmul_ru(double x, double y) { return __ocml_mul_rtn_f64(x, y); } +double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); } __DEVICE__ inline double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); } @@ -1084,25 +1089,25 @@ inline double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); } __DEVICE__ inline -double __dsqrt_rd(double x) { return __ocml_sqrt_f64(x); } +double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); } __DEVICE__ inline -double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); } +double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); } __DEVICE__ inline -double __dsqrt_ru(double x) { return __ocml_sqrt_f64(x); } +double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); } __DEVICE__ inline -double __dsqrt_rz(double x) { return __ocml_sqrt_f64(x); } +double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); } __DEVICE__ inline -double __dsub_rd(double x, double y) { return __ocml_sub_rtp_f64(x, y); } +double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); } __DEVICE__ inline double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); } __DEVICE__ inline -double __dsub_ru(double x, double y) { return __ocml_sub_rtn_f64(x, y); } +double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); } __DEVICE__ inline double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); } @@ -1110,7 +1115,7 @@ __DEVICE__ inline double __fma_rd(double x, double y, double z) { - return __ocml_fma_rtp_f64(x, y, z); + return __ocml_fma_rtn_f64(x, y, z); } __DEVICE__ inline @@ -1122,7 +1127,7 @@ __DEVICE__ inline double __fma_ru(double x, double y, double z) { - return __ocml_fma_rtn_f64(x, y, z); + return __ocml_fma_rtp_f64(x, y, z); } __DEVICE__ inline @@ -1130,6 +1135,7 @@ double __fma_rz(double x, double y, double z) { return __ocml_fma_rtz_f64(x, y, z); } +#endif // END INTRINSICS // END DOUBLE diff --git a/projects/hip/include/hip/hcc_detail/math_fwd.h b/projects/hip/include/hip/hcc_detail/math_fwd.h index 404c2f81d5..e5594924ba 100644 --- a/projects/hip/include/hip/hcc_detail/math_fwd.h +++ b/projects/hip/include/hip/hcc_detail/math_fwd.h @@ -288,6 +288,30 @@ __attribute__((const)) float __ocml_mul_rtz_f32(float, float); __device__ __attribute__((const)) +float __ocml_div_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_div_rtz_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rte_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtn_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtp_f32(float, float); +__device__ +__attribute__((const)) +float __ocml_sqrt_rtz_f32(float, float); +__device__ +__attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) @@ -572,6 +596,30 @@ __attribute__((const)) double __ocml_mul_rtz_f64(double, double); __device__ __attribute__((const)) +double __ocml_div_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_div_rtz_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rte_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtn_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtp_f64(double, double); +__device__ +__attribute__((const)) +double __ocml_sqrt_rtz_f64(double, double); +__device__ +__attribute__((const)) double __ocml_fma_rte_f64(double, double, double); __device__ __attribute__((const)) @@ -594,4 +642,4 @@ double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64"); #if defined(__cplusplus) } // extern "C" -#endif \ No newline at end of file +#endif diff --git a/projects/hip/include/hip/hcc_detail/program_state.hpp b/projects/hip/include/hip/hcc_detail/program_state.hpp index bdb87b3509..92bef22172 100644 --- a/projects/hip/include/hip/hcc_detail/program_state.hpp +++ b/projects/hip/include/hip/hcc_detail/program_state.hpp @@ -99,6 +99,8 @@ const std::unordered_map& function_names(bool rebuild = false); std::unordered_map& globals(bool rebuild = false); +std::unordered_map< + std::string, std::vector>>& kernargs(); hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable, hsa_agent_t agent); diff --git a/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp b/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp index ab7a4b35a6..5c3907f0d3 100644 --- a/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp +++ b/projects/hip/samples/0_Intro/bit_extract/bit_extract.cpp @@ -23,10 +23,6 @@ THE SOFTWARE. #include #include #include "hip/hip_runtime.h" -#ifdef __HIP_PLATFORM_HCC__ -#include -#endif - #define CHECK(cmd) \ { \ @@ -44,7 +40,7 @@ __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N) for (size_t i = offset; i < N; i += stride) { #ifdef __HIP_PLATFORM_HCC__ - C_d[i] = hc::__bitextract_u32(A_d[i], 8, 4); + C_d[i] = __bitextract_u32(A_d[i], 8, 4); #else /* defined __HIP_PLATFORM_NVCC__ or other path */ C_d[i] = ((A_d[i] & 0xf00) >> 8); #endif diff --git a/projects/hip/src/hip_clang.cpp b/projects/hip/src/hip_clang.cpp index 15a96d298a..44080884e7 100644 --- a/projects/hip/src/hip_clang.cpp +++ b/projects/hip/src/hip_clang.cpp @@ -22,6 +22,7 @@ THE SOFTWARE. #include #include +#include #include "hip/hip_runtime.h" #include "hip_hcc_internal.h" @@ -86,6 +87,7 @@ __hipRegisterFatBinary(const void* data) std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)], desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)}; + tprintf(DB_FB, "Found bundle for %s\n", target.c_str()); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { hsa_agent_t agent = g_allAgents[deviceId + 1]; @@ -110,10 +112,35 @@ __hipRegisterFatBinary(const void* data) if (module->executable.handle) { modules->at(deviceId) = module; + tprintf(DB_FB, "Loaded code object for %s\n", name); + if (HIP_DUMP_CODE_OBJECT) { + char fname[30]; + static std::atomic index; + sprintf(fname, "__hip_dump_code_object%04d.o", index++); + tprintf(DB_FB, "Dump code object %s\n", fname); + std::ofstream ofs; + ofs.open(fname, std::ios::binary); + ofs << image; + ofs.close(); + } + } else { + fprintf(stderr, "Failed to load code object for %s\n", name); + abort(); } } } + for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { + hsa_agent_t agent = g_allAgents[deviceId + 1]; + + char name[64] = {}; + hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name); + if (!(*modules)[deviceId]) { + fprintf(stderr, "No device code bundle for %s\n", name); + abort(); + } + } + tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules); return modules; } @@ -132,13 +159,20 @@ extern "C" void __hipRegisterFunction( dim3* gridDim, int* wSize) { + HIP_INIT_API(modules, hostFunction, deviceFunction, deviceName); std::vector functions{g_deviceCnt}; + assert(modules && modules->size() >= g_deviceCnt); for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) { hipFunction_t function; - if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) { + if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) && + function != nullptr) { functions[deviceId] = function; } + else { + tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for" + " device %d\n", deviceName, deviceId); + } } g_functions.insert(std::make_pair(hostFunction, std::move(functions))); @@ -180,6 +214,7 @@ hipError_t hipSetupArgument( size_t size, size_t offset) { + HIP_INIT_API(arg, size, offset); auto ctx = ihipGetTlsDefaultCtx(); LockedAccessor_CtxCrit_t crit(ctx->criticalData()); auto& arguments = crit->_execStack.top()._arguments; @@ -194,6 +229,7 @@ hipError_t hipSetupArgument( hipError_t hipLaunchByPtr(const void *hostFunction) { + HIP_INIT_API(hostFunction); ihipExec_t exec; { auto ctx = ihipGetTlsDefaultCtx(); @@ -213,20 +249,28 @@ hipError_t hipLaunchByPtr(const void *hostFunction) deviceId = 0; } + hipError_t e = hipSuccess; decltype(g_functions)::iterator it; - if ((it = g_functions.find(hostFunction)) == g_functions.end()) - return hipErrorUnknown; + if ((it = g_functions.find(hostFunction)) == g_functions.end() || + !it->second[deviceId]) { + e = hipErrorUnknown; + fprintf(stderr, "hipLaunchByPtr cannot find kernel with stub address %p" + " for device %d!\n", hostFunction, deviceId); + abort(); + } else { + size_t size = exec._arguments.size(); + void *extra[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; - size_t size = exec._arguments.size(); - void *extra[] = { - HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0], - HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, - HIP_LAUNCH_PARAM_END - }; + e = hipModuleLaunchKernel(it->second[deviceId], + exec._gridDim.x, exec._gridDim.y, exec._gridDim.z, + exec._blockDim.x, exec._blockDim.y, exec._blockDim.z, + exec._sharedMem, exec._hStream, nullptr, extra); + } - return hipModuleLaunchKernel(it->second[deviceId], - exec._gridDim.x, exec._gridDim.y, exec._gridDim.z, - exec._blockDim.x, exec._blockDim.y, exec._blockDim.z, - exec._sharedMem, exec._hStream, nullptr, extra); + return ihipLogStatus(e); } diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index eff93da847..e152e7ba69 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -97,6 +97,8 @@ int HIP_INIT_ALLOC = -1; int HIP_SYNC_STREAM_WAIT = 0; int HIP_FORCE_NULL_STREAM = 0; +int HIP_DUMP_CODE_OBJECT = 0; + #if (__hcc_workweek__ >= 17300) // Make sure we have required bug fix in HCC @@ -1294,6 +1296,10 @@ void HipReadEnv() { "overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag " "when creating the event."); + READ_ENV_I(release, HIP_DUMP_CODE_OBJECT, 0, + "If set, dump code object as __hip_dump_code_object[nnnn].o in the current directory," + "where nnnn is the index number."); + // Some flags have both compile-time and runtime flags - generate a warning if user enables the // runtime flag but the compile-time flag is disabled. if (HIP_DB && !COMPILE_HIP_DB) { diff --git a/projects/hip/src/hip_hcc_internal.h b/projects/hip/src/hip_hcc_internal.h index d64a4a4cbe..8102f066de 100644 --- a/projects/hip/src/hip_hcc_internal.h +++ b/projects/hip/src/hip_hcc_internal.h @@ -83,11 +83,11 @@ extern int HIP_SYNC_NULL_STREAM; extern int HIP_INIT_ALLOC; extern int HIP_FORCE_NULL_STREAM; +extern int HIP_DUMP_CODE_OBJECT; // TODO - remove when this is standard behavior. extern int HCC_OPT_FLUSH; - // Class to assign a short TID to each new thread, for HIP debugging purposes. class TidInfo { public: diff --git a/projects/hip/src/hip_memory.cpp b/projects/hip/src/hip_memory.cpp index d02bb5acb8..16ab436e0e 100644 --- a/projects/hip/src/hip_memory.cpp +++ b/projects/hip/src/hip_memory.cpp @@ -985,10 +985,9 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); - if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost || + if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDefault || kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) { - stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind); - // acc.memcpy_symbol(dst, (void*)src, count+offset); + stream->locked_copySync((char*)dst+offset, (void*)src, count, kind, false); } else { return ihipLogStatus(hipErrorInvalidValue); } @@ -1018,9 +1017,9 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); - if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost || + if (kind == hipMemcpyDefault || kind == hipMemcpyDeviceToHost || kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) { - stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind); + stream->locked_copySync((void*)dst, (char*)src+offset, count, kind, false); } else { return ihipLogStatus(hipErrorInvalidValue); } @@ -1052,7 +1051,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_ if (stream) { try { - stream->lockedSymbolCopyAsync(acc, dst, (void*)src, count, offset, kind); + hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream); } catch (ihipException& ex) { e = ex._code; } @@ -1088,7 +1087,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co stream = ihipSyncAndResolveStream(stream); if (stream) { try { - stream->lockedSymbolCopyAsync(acc, dst, src, count, offset, kind); + hip_internal::memcpyAsync(dst, (char*)src+offset, count, kind, stream); } catch (ihipException& ex) { e = ex._code; } diff --git a/projects/hip/src/hip_module.cpp b/projects/hip/src/hip_module.cpp index efb091f68a..786d1e8d5c 100644 --- a/projects/hip/src/hip_module.cpp +++ b/projects/hip/src/hip_module.cpp @@ -258,20 +258,29 @@ struct Agent_global { uint32_t byte_cnt; }; -inline void track(const Agent_global& x) { +inline void track(const Agent_global& x, hsa_agent_t agent) { tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(), x.address, x.byte_cnt); - auto device = ihipGetTlsDefaultCtx()->getWriteableDevice(); - + int deviceIndex =0; + for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) { + if(g_allAgents[deviceIndex] == agent) + break; + } + auto device = ihipGetDevice(deviceIndex - 1); hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true, false); hc::am_memtracker_add(x.address, ptr_info); +#if USE_APP_PTR_FOR_CTX + hc::am_memtracker_update(x.address, device->_deviceId, 0u, ihipGetTlsDefaultCtx()); +#else hc::am_memtracker_update(x.address, device->_deviceId, 0u); +#endif + } template > -inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t, +inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent, hsa_executable_symbol_t x, void* out) { assert(out); @@ -281,7 +290,7 @@ inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t, if (t == HSA_SYMBOL_KIND_VARIABLE) { static_cast(out)->push_back(Agent_global{name(x), address(x), size(x)}); - track(static_cast(out)->back()); + track(static_cast(out)->back(),agent); } return HSA_STATUS_SUCCESS; @@ -342,7 +351,7 @@ hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes, hi tie(*dptr, *bytes) = read_global_description(it0->second.cbegin(), it0->second.cend(), name); - return dptr ? hipSuccess : hipErrorNotFound; + return *dptr ? hipSuccess : hipErrorNotFound; } hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) { @@ -367,7 +376,7 @@ hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, c tie(*dptr, *bytes) = read_global_description(it->second.cbegin(), it->second.cend(), name); - return dptr ? hipSuccess : hipErrorNotFound; + return *dptr ? hipSuccess : hipErrorNotFound; } hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname) { diff --git a/projects/hip/src/program_state.cpp b/projects/hip/src/program_state.cpp index 88cdeeb404..7e42a44245 100644 --- a/projects/hip/src/program_state.cpp +++ b/projects/hip/src/program_state.cpp @@ -312,8 +312,8 @@ const unordered_map>& kernels(bool rebui void load_code_object_and_freeze_executable( const string& file, hsa_agent_t agent, - hsa_executable_t - executable) { // TODO: the following sequence is inefficient, should be refactored + hsa_executable_t executable) { + // TODO: the following sequence is inefficient, should be refactored // into a single load of the file and subsequent ELFIO // processing. static const auto cor_deleter = [](hsa_code_object_reader_t* p) { @@ -340,6 +340,90 @@ void load_code_object_and_freeze_executable( code_readers.push_back(move(tmp)); } } + +size_t parse_args( + const string& metadata, + size_t f, + size_t l, + vector>& size_align) { + if (f == l) return f; + if (!size_align.empty()) return l; + + do { + static constexpr size_t size_sz{5}; + f = metadata.find("Size:", f) + size_sz; + + if (l <= f) return f; + + auto size = strtoul(&metadata[f], nullptr, 10); + + static constexpr size_t align_sz{6}; + f = metadata.find("Align:", f) + align_sz; + + char* l{}; + auto align = strtoul(&metadata[f], &l, 10); + + f += (l - &metadata[f]) + 1; + + size_align.emplace_back(size, align); + } while (true); +} + +void read_kernarg_metadata( + elfio& reader, + unordered_map>>& kernargs) +{ // TODO: this is inefficient. + auto it = find_section_if( + reader, [](const section* x) { return x->get_type() == SHT_NOTE; }); + + if (!it) return; + + const note_section_accessor acc{reader, it}; + for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) { + ELFIO::Elf_Word type{}; + string name{}; + void* desc{}; + Elf_Word desc_size{}; + + acc.get_note(i, type, name, desc, desc_size); + + if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA. + + string tmp{ + static_cast(desc), static_cast(desc) + desc_size}; + + auto dx = tmp.find("Kernels:"); + + if (dx == string::npos) continue; + + static constexpr decltype(tmp.size()) kernels_sz{8}; + dx += kernels_sz; + + do { + dx = tmp.find("Name:", dx); + + if (dx == string::npos) break; + + static constexpr decltype(tmp.size()) name_sz{5}; + dx = tmp.find_first_not_of(" '", dx + name_sz); + + auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx); + dx += fn.size(); + + auto dx1 = tmp.find("CodeProps", dx); + dx = tmp.find("Args:", dx); + + if (dx1 < dx) { + dx = dx1; + continue; + } + if (dx == string::npos) break; + + static constexpr decltype(tmp.size()) args_sz{5}; + dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]); + } while (true); + } +} } // namespace namespace hip_impl { @@ -501,6 +585,25 @@ unordered_map& globals(bool rebuild) { return r; } +unordered_map>>& kernargs() { + static unordered_map>> r; + static once_flag f; + + call_once(f, []() { + for (auto&& blob : code_object_blobs()) { + stringstream tmp{std::string{ + blob.second.front().cbegin(), blob.second.front().cend()}}; + + elfio reader; + if (!reader.load(tmp)) continue; + + read_kernarg_metadata(reader, r); + } + }); + + return r; +} + hsa_executable_t load_executable(const string& file, hsa_executable_t executable, hsa_agent_t agent) { elfio reader; diff --git a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp index 17cd82c9ab..939bdae743 100644 --- a/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipDoublePrecisionIntrinsics.cpp @@ -34,6 +34,7 @@ THE SOFTWARE. #pragma clang diagnostic ignored "-Wunused-variable" __device__ void double_precision_intrinsics() { +#if defined OCML_BASIC_ROUNDED_OPERATIONS __dadd_rd(0.0, 1.0); __dadd_rn(0.0, 1.0); __dadd_ru(0.0, 1.0); @@ -62,6 +63,7 @@ __device__ void double_precision_intrinsics() { __fma_rn(1.0, 2.0, 3.0); __fma_ru(1.0, 2.0, 3.0); __fma_rz(1.0, 2.0, 3.0); +#endif } __global__ void compileDoublePrecisionIntrinsics(int ignored) { diff --git a/projects/hip/tests/src/deviceLib/hipFloatMath.cpp b/projects/hip/tests/src/deviceLib/hipFloatMath.cpp index ee83309f28..c6a07e26a9 100644 --- a/projects/hip/tests/src/deviceLib/hipFloatMath.cpp +++ b/projects/hip/tests/src/deviceLib/hipFloatMath.cpp @@ -38,11 +38,13 @@ __global__ void floatMath(float* In, float* Out) { Out[tid] = __cosf(In[tid]); Out[tid] = __exp10f(Out[tid]); Out[tid] = __expf(Out[tid]); +#if defined OCML_BASIC_ROUNDED_OPERATIONS Out[tid] = __frsqrt_rn(Out[tid]); - //Out[tid] = __fsqrt_rd(Out[tid]); - //Out[tid] = __fsqrt_rn(Out[tid]); - //Out[tid] = __fsqrt_ru(Out[tid]); - //Out[tid] = __fsqrt_rz(Out[tid]); + Out[tid] = __fsqrt_rd(Out[tid]); + Out[tid] = __fsqrt_rn(Out[tid]); + Out[tid] = __fsqrt_ru(Out[tid]); + Out[tid] = __fsqrt_rz(Out[tid]); +#endif Out[tid] = __log10f(Out[tid]); Out[tid] = __log2f(Out[tid]); Out[tid] = __logf(Out[tid]); diff --git a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp index f3d2a36931..b216b3cb54 100644 --- a/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp +++ b/projects/hip/tests/src/deviceLib/hipSinglePrecisionIntrinsics.cpp @@ -39,6 +39,7 @@ __device__ void single_precision_intrinsics() { __cosf(0.0f); __exp10f(0.0f); __expf(0.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fadd_rd(0.0f, 1.0f); __fadd_rn(0.0f, 1.0f); __fadd_ru(0.0f, 1.0f); @@ -47,7 +48,9 @@ __device__ void single_precision_intrinsics() { __fdiv_rn(4.0f, 2.0f); __fdiv_ru(4.0f, 2.0f); __fdiv_rz(4.0f, 2.0f); +#endif __fdividef(4.0f, 2.0f); +#if defined OCML_BASIC_ROUNDED_OPERATIONS __fmaf_rd(1.0f, 2.0f, 3.0f); __fmaf_rn(1.0f, 2.0f, 3.0f); __fmaf_ru(1.0f, 2.0f, 3.0f); @@ -69,6 +72,7 @@ __device__ void single_precision_intrinsics() { __fsub_rn(2.0f, 1.0f); __fsub_ru(2.0f, 1.0f); __fsub_rz(2.0f, 1.0f); +#endif __log10f(1.0f); __log2f(1.0f); __logf(1.0f);