From bf883d95e96cbdf4ce792a6b09f65e9c406ce9e1 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 12 Aug 2019 19:20:13 +0300 Subject: [PATCH] [HIPIFY] Cooperative groups support [ROCm/hip commit: aa285d978efc6a42623f4f8834503a029fd0928c] --- ...A_Driver_API_functions_supported_by_HIP.md | 10 +- ..._Runtime_API_functions_supported_by_HIP.md | 111 +++++++++--------- .../src/CUDA2HIP_Driver_API_functions.cpp | 2 +- .../src/CUDA2HIP_Driver_API_types.cpp | 4 +- .../src/CUDA2HIP_Runtime_API_functions.cpp | 6 +- .../src/CUDA2HIP_Runtime_API_types.cpp | 6 +- 6 files changed, 70 insertions(+), 69 deletions(-) diff --git a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 7aa24465f1..7b66a484f2 100644 --- a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -166,8 +166,8 @@ | 92 |*`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS`* | | 9.0 | | 93 |*`CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS`* | | 9.0 | | 94 |*`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`* | | 9.0 | -| 95 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH`* | | 9.0 | -| 96 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH`* | | 9.0 | +| 95 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH`* |*`hipDeviceAttributeCooperativeLaunch`* | 9.0 | +| 96 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH`* |*`hipDeviceAttributeCooperativeMultiDeviceLaunch`* | 9.0 | | 97 |*`CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN`* | | 9.0 | | 98 |*`CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES`* | | 9.2 | | 99 |*`CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED`* | | 9.2 | @@ -1044,9 +1044,9 @@ | **CUDA** | **HIP** |**CUDA version\***| |-----------------------------------------------------------|---------------------------------------------------------|------------------| -| `cuOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` | -| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | -| `cuOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | +| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` | +| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | +| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` | | `cuOccupancyMaxPotentialBlockSizeWithFlags` | | ## **21. Texture Reference Management** diff --git a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 1bca61f33b..f35c451414 100644 --- a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -99,27 +99,27 @@ ## **7. Execution Control** -| **CUDA** | **HIP** |**CUDA version\***| -|-----------------------------------------------------------|-------------------------------|:----------------:| -| `cudaFuncGetAttributes` | | -| `cudaFuncSetAttribute` | | 9.0 | -| `cudaFuncSetCacheConfig` | `hipFuncSetCacheConfig` | -| `cudaFuncSetSharedMemConfig` | | -| `cudaGetParameterBuffer` | | -| `cudaGetParameterBufferV2` | | -| `cudaLaunchKernel` | `hipLaunchKernel` | -| `cudaSetDoubleForDevice` | | -| `cudaSetDoubleForHost` | | -| `cudaLaunchCooperativeKernel` | | 9.0 | -| `cudaLaunchCooperativeKernelMultiDevice` | | 9.0 | -| `cudaLaunchHostFunc` | | 10.0 | +| **CUDA** | **HIP** |**CUDA version\***| +|-----------------------------------------------------------|---------------------------------------|:----------------:| +| `cudaFuncGetAttributes` | | +| `cudaFuncSetAttribute` | | 9.0 | +| `cudaFuncSetCacheConfig` |`hipFuncSetCacheConfig` | +| `cudaFuncSetSharedMemConfig` | | +| `cudaGetParameterBuffer` | | +| `cudaGetParameterBufferV2` | | +| `cudaLaunchKernel` |`hipLaunchKernel` | +| `cudaSetDoubleForDevice` | | +| `cudaSetDoubleForHost` | | +| `cudaLaunchCooperativeKernel` |`hipLaunchCooperativeKernel` | 9.0 | +| `cudaLaunchCooperativeKernelMultiDevice` |`hipLaunchCooperativeKernelMultiDevice`| 9.0 | +| `cudaLaunchHostFunc` | | 10.0 | ## **8. Occupancy** -| **CUDA** | **HIP** |**CUDA version\***| -|-----------------------------------------------------------|-----------------------------------------------|:----------------:| -| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`| -| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | +| **CUDA** | **HIP** |**CUDA version\***| +|-----------------------------------------------------------|-------------------------------------------------------|:----------------:| +| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| ## **9. Execution Control [DEPRECATED since 7.0]** @@ -413,41 +413,42 @@ | `cudaGraphRemoveDependencies` | | 10.0 | ## **30. C++ API Routines** -*(7.0 contains, 7.5 doesn’t)* +*(7.0 contains, 7.5 doesn't)* -| **CUDA** | **HIP** |**CUDA version\***| -|-----------------------------------------------------------|------------------------------------------------|:----------------:| -| `cudaBindSurfaceToArray` | | -| `cudaBindTexture` | `hipBindTexture` | -| `cudaBindTexture2D` | | -| `cudaBindTextureToArray` | | -| `cudaBindTextureToMipmappedArray` | | -| `cudaCreateChannelDesc` | `hipCreateChannelDesc` | -| `cudaEventCreate` | | -| `cudaFuncGetAttributes` | | -| `cudaFuncSetAttribute` | | -| `cudaFuncSetCacheConfig` | | -| `cudaGetSymbolAddress` | `hipGetSymbolAddress` | -| `cudaGetSymbolSize` | `hipGetSymbolSize` | -| `cudaGetTextureAlignmentOffset` | | -| `cudaLaunch` | | -| `cudaLaunchCooperativeKernel` | | -| `cudaLaunchKernel` | | -| `cudaMallocHost` | | -| `cudaMallocManaged` | | -| `cudaMemcpyFromSymbol` | | -| `cudaMemcpyFromSymbolAsync` | | -| `cudaMemcpyToSymbol` | | -| `cudaMemcpyToSymbolAsync` | | -| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` | -| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | | -| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` | -| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | | -| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | | -| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | | -| `cudaSetupArgument` | | -| `cudaStreamAttachMemAsync` | | -| `cudaUnbindTexture` | `hipUnbindTexture` | +| **CUDA** | **HIP** |**CUDA version\***| +|-----------------------------------------------------------|-------------------------------------------------------|:----------------:| +| `cudaBindSurfaceToArray` | | +| `cudaBindTexture` |`hipBindTexture` | +| `cudaBindTexture2D` | | +| `cudaBindTextureToArray` | | +| `cudaBindTextureToMipmappedArray` | | +| `cudaCreateChannelDesc` |`hipCreateChannelDesc` | +| `cudaEventCreate` | | +| `cudaFuncGetAttributes` | | +| `cudaFuncSetAttribute` | | +| `cudaFuncSetCacheConfig` | | +| `cudaGetSymbolAddress` |`hipGetSymbolAddress` | +| `cudaGetSymbolSize` |`hipGetSymbolSize` | +| `cudaGetTextureAlignmentOffset` | | +| `cudaLaunch` | | +| `cudaLaunchCooperativeKernel` |`hipLaunchCooperativeKernel` | +| `cudaLaunchCooperativeKernelMultiDevice` |`hipLaunchCooperativeKernelMultiDevice` | +| `cudaLaunchKernel` | | +| `cudaMallocHost` | | +| `cudaMallocManaged` | | +| `cudaMemcpyFromSymbol` | | +| `cudaMemcpyFromSymbolAsync` | | +| `cudaMemcpyToSymbol` | | +| `cudaMemcpyToSymbolAsync` | | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` | +| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`| +| `cudaOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` | +| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | | +| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | | +| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | | +| `cudaSetupArgument` | | +| `cudaStreamAttachMemAsync` | | +| `cudaUnbindTexture` |`hipUnbindTexture` | ## **32. Profiler Control** @@ -593,8 +594,8 @@ | 92 |*`cudaDevAttrReserved92`* | 9.0 | | | 93 |*`cudaDevAttrReserved93`* | 9.0 | | | 94 |*`cudaDevAttrReserved94`* | 9.0 | | -| 95 |*`cudaDevAttrCooperativeLaunch`* | 9.0 | | -| 96 |*`cudaDevAttrCooperativeMultiDeviceLaunch`* | 9.0 | | +| 95 |*`cudaDevAttrCooperativeLaunch`* | 9.0 |*`hipDeviceAttributeCooperativeLaunch`* | +| 96 |*`cudaDevAttrCooperativeMultiDeviceLaunch`* | 9.0 |*`hipDeviceAttributeCooperativeMultiDeviceLaunch`* | | 97 |*`cudaDevAttrMaxSharedMemoryPerBlockOptin`* | 9.0 | | | 98 |*`cudaDevAttrCanFlushRemoteWrites`* | 9.2 | | | 99 |*`cudaDevAttrHostRegisterSupported`* | 9.2 | | @@ -1063,7 +1064,7 @@ | struct |`cudaExternalSemaphoreSignalParams` | 10.0 | | | struct |`cudaExternalSemaphoreWaitParams` | 10.0 | | | struct |`cudaHostNodeParams` | 10.0 | | -| struct |`cudaLaunchParams` | 9.0 | | +| struct |`cudaLaunchParams` | 9.0 |`hipLaunchParams` | | struct |`cudaMemsetParams` | 10.0 | | | struct |`CUeglStreamConnection_st` | 9.1 | | | typedef |`cudaEglStreamConnection` | 9.1 | | diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index 8be20774ea..0896f530e5 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -522,7 +522,7 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ // cudaOccupancyMaxActiveBlocksPerMultiprocessor {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSize {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSizeWithFlags diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp index cc4502a866..717e86315d 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp @@ -486,9 +486,9 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP{ // no analogue: cudaDevAttrReserved94 {"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 94 // cudaDevAttrCooperativeLaunch - {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 95 + {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 95 // cudaDevAttrCooperativeMultiDeviceLaunch - {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 96 + {"CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 96 // cudaDevAttrMaxSharedMemoryPerBlockOptin {"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 97 // cudaDevAttrCanFlushRemoteWrites diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp index 96454d7ec3..bc31009285 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_functions.cpp @@ -194,10 +194,10 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ {"cudaGetParameterBufferV2", {"hipGetParameterBufferV2", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, // no analogue // NOTE: Not equal to cuLaunchCooperativeKernel due to different signatures - {"cudaLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, + {"cudaLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", "", CONV_EXECUTION, API_RUNTIME}}, // no analogue // NOTE: Not equal to cuLaunchCooperativeKernelMultiDevice due to different signatures - {"cudaLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, + {"cudaLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", "", CONV_EXECUTION, API_RUNTIME}}, // cuLaunchHostFunc {"cudaLaunchHostFunc", {"hipLaunchHostFunc", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}}, // no analogue @@ -212,7 +212,7 @@ const std::map CUDA_RUNTIME_FUNCTION_MAP{ // {"cudaOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_RUNTIME}}, // cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}}, + {"cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_RUNTIME}}, // cuOccupancyMaxPotentialBlockSize {"cudaOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_RUNTIME}}, // cuOccupancyMaxPotentialBlockSizeWithFlags diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp index e8956b6956..7e153a223f 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp @@ -84,7 +84,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // no analogue // CUDA_LAUNCH_PARAMS struct differs - {"cudaLaunchParams", {"hipLaunchParams", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, + {"cudaLaunchParams", {"hipLaunchParams", "", CONV_TYPE, API_RUNTIME}}, // no analogue // NOTE: HIP struct is bigger and contains cudaMemcpy3DParms only in the beginning @@ -402,9 +402,9 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { // CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR {"cudaDevAttrReserved94", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 94 // CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH - {"cudaDevAttrCooperativeLaunch", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 95 + {"cudaDevAttrCooperativeLaunch", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 95 // CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH - {"cudaDevAttrCooperativeMultiDeviceLaunch", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 96 + {"cudaDevAttrCooperativeMultiDeviceLaunch", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 96 // CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN {"cudaDevAttrMaxSharedMemoryPerBlockOptin", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 97 // CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES