[HIPIFY] Cooperative groups support

[ROCm/hip commit: aa285d978e]
This commit is contained in:
Evgeny Mankov
2019-08-12 19:20:13 +03:00
parent f4d869c8eb
commit bf883d95e9
6 ha cambiato i file con 70 aggiunte e 69 eliminazioni
@@ -166,8 +166,8 @@
| 92 |*`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS`* | | 9.0 |
| 93 |*`CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS`* | | 9.0 |
| 94 |*`CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR`* | | 9.0 |
| 95 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH`* | | 9.0 |
| 96 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH`* | | 9.0 |
| 95 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH`* |*`hipDeviceAttributeCooperativeLaunch`* | 9.0 |
| 96 |*`CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH`* |*`hipDeviceAttributeCooperativeMultiDeviceLaunch`* | 9.0 |
| 97 |*`CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN`* | | 9.0 |
| 98 |*`CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES`* | | 9.2 |
| 99 |*`CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED`* | | 9.2 |
@@ -1044,9 +1044,9 @@
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|---------------------------------------------------------|------------------|
| `cuOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
| `cuOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` |
| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |
| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` |
| `cuOccupancyMaxPotentialBlockSizeWithFlags` | |
## **21. Texture Reference Management**
@@ -99,27 +99,27 @@
## **7. Execution Control**
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|-------------------------------|:----------------:|
| `cudaFuncGetAttributes` | |
| `cudaFuncSetAttribute` | | 9.0 |
| `cudaFuncSetCacheConfig` | `hipFuncSetCacheConfig` |
| `cudaFuncSetSharedMemConfig` | |
| `cudaGetParameterBuffer` | |
| `cudaGetParameterBufferV2` | |
| `cudaLaunchKernel` | `hipLaunchKernel` |
| `cudaSetDoubleForDevice` | |
| `cudaSetDoubleForHost` | |
| `cudaLaunchCooperativeKernel` | | 9.0 |
| `cudaLaunchCooperativeKernelMultiDevice` | | 9.0 |
| `cudaLaunchHostFunc` | | 10.0 |
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|---------------------------------------|:----------------:|
| `cudaFuncGetAttributes` | |
| `cudaFuncSetAttribute` | | 9.0 |
| `cudaFuncSetCacheConfig` |`hipFuncSetCacheConfig` |
| `cudaFuncSetSharedMemConfig` | |
| `cudaGetParameterBuffer` | |
| `cudaGetParameterBufferV2` | |
| `cudaLaunchKernel` |`hipLaunchKernel` |
| `cudaSetDoubleForDevice` | |
| `cudaSetDoubleForHost` | |
| `cudaLaunchCooperativeKernel` |`hipLaunchCooperativeKernel` | 9.0 |
| `cudaLaunchCooperativeKernelMultiDevice` |`hipLaunchCooperativeKernelMultiDevice`| 9.0 |
| `cudaLaunchHostFunc` | | 10.0 |
## **8. Occupancy**
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|-----------------------------------------------|:----------------:|
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor`|
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|-------------------------------------------------------|:----------------:|
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|
## **9. Execution Control [DEPRECATED since 7.0]**
@@ -413,41 +413,42 @@
| `cudaGraphRemoveDependencies` | | 10.0 |
## **30. C++ API Routines**
*(7.0 contains, 7.5 doesnt)*
*(7.0 contains, 7.5 doesn't)*
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|------------------------------------------------|:----------------:|
| `cudaBindSurfaceToArray` | |
| `cudaBindTexture` | `hipBindTexture` |
| `cudaBindTexture2D` | |
| `cudaBindTextureToArray` | |
| `cudaBindTextureToMipmappedArray` | |
| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
| `cudaEventCreate` | |
| `cudaFuncGetAttributes` | |
| `cudaFuncSetAttribute` | |
| `cudaFuncSetCacheConfig` | |
| `cudaGetSymbolAddress` | `hipGetSymbolAddress` |
| `cudaGetSymbolSize` | `hipGetSymbolSize` |
| `cudaGetTextureAlignmentOffset` | |
| `cudaLaunch` | |
| `cudaLaunchCooperativeKernel` | |
| `cudaLaunchKernel` | |
| `cudaMallocHost` | |
| `cudaMallocManaged` | |
| `cudaMemcpyFromSymbol` | |
| `cudaMemcpyFromSymbolAsync` | |
| `cudaMemcpyToSymbol` | |
| `cudaMemcpyToSymbolAsync` | |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` | `hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSize` | `hipOccupancyMaxPotentialBlockSize` |
| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | |
| `cudaSetupArgument` | |
| `cudaStreamAttachMemAsync` | |
| `cudaUnbindTexture` | `hipUnbindTexture` |
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|-------------------------------------------------------|:----------------:|
| `cudaBindSurfaceToArray` | |
| `cudaBindTexture` |`hipBindTexture` |
| `cudaBindTexture2D` | |
| `cudaBindTextureToArray` | |
| `cudaBindTextureToMipmappedArray` | |
| `cudaCreateChannelDesc` |`hipCreateChannelDesc` |
| `cudaEventCreate` | |
| `cudaFuncGetAttributes` | |
| `cudaFuncSetAttribute` | |
| `cudaFuncSetCacheConfig` | |
| `cudaGetSymbolAddress` |`hipGetSymbolAddress` |
| `cudaGetSymbolSize` |`hipGetSymbolSize` |
| `cudaGetTextureAlignmentOffset` | |
| `cudaLaunch` | |
| `cudaLaunchCooperativeKernel` |`hipLaunchCooperativeKernel` |
| `cudaLaunchCooperativeKernelMultiDevice` |`hipLaunchCooperativeKernelMultiDevice` |
| `cudaLaunchKernel` | |
| `cudaMallocHost` | |
| `cudaMallocManaged` | |
| `cudaMemcpyFromSymbol` | |
| `cudaMemcpyFromSymbolAsync` | |
| `cudaMemcpyToSymbol` | |
| `cudaMemcpyToSymbolAsync` | |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`|
| `cudaOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` |
| `cudaOccupancyMaxPotentialBlockSizeWithFlags` | |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMem` | |
| `cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags` | |
| `cudaSetupArgument` | |
| `cudaStreamAttachMemAsync` | |
| `cudaUnbindTexture` |`hipUnbindTexture` |
## **32. Profiler Control**
@@ -593,8 +594,8 @@
| 92 |*`cudaDevAttrReserved92`* | 9.0 | |
| 93 |*`cudaDevAttrReserved93`* | 9.0 | |
| 94 |*`cudaDevAttrReserved94`* | 9.0 | |
| 95 |*`cudaDevAttrCooperativeLaunch`* | 9.0 | |
| 96 |*`cudaDevAttrCooperativeMultiDeviceLaunch`* | 9.0 | |
| 95 |*`cudaDevAttrCooperativeLaunch`* | 9.0 |*`hipDeviceAttributeCooperativeLaunch`* |
| 96 |*`cudaDevAttrCooperativeMultiDeviceLaunch`* | 9.0 |*`hipDeviceAttributeCooperativeMultiDeviceLaunch`* |
| 97 |*`cudaDevAttrMaxSharedMemoryPerBlockOptin`* | 9.0 | |
| 98 |*`cudaDevAttrCanFlushRemoteWrites`* | 9.2 | |
| 99 |*`cudaDevAttrHostRegisterSupported`* | 9.2 | |
@@ -1063,7 +1064,7 @@
| struct |`cudaExternalSemaphoreSignalParams` | 10.0 | |
| struct |`cudaExternalSemaphoreWaitParams` | 10.0 | |
| struct |`cudaHostNodeParams` | 10.0 | |
| struct |`cudaLaunchParams` | 9.0 | |
| struct |`cudaLaunchParams` | 9.0 |`hipLaunchParams` |
| struct |`cudaMemsetParams` | 10.0 | |
| struct |`CUeglStreamConnection_st` | 9.1 | |
| typedef |`cudaEglStreamConnection` | 9.1 | |
@@ -522,7 +522,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
// cudaOccupancyMaxActiveBlocksPerMultiprocessor
{"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}},
// cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
{"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER, HIP_UNSUPPORTED}},
{"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}},
// cudaOccupancyMaxPotentialBlockSize
{"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}},
// cudaOccupancyMaxPotentialBlockSizeWithFlags
@@ -486,9 +486,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP{
// no analogue: cudaDevAttrReserved94
{"CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 94
// cudaDevAttrCooperativeLaunch
{"CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 95
{"CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 95
// cudaDevAttrCooperativeMultiDeviceLaunch
{"CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 96
{"CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 96
// cudaDevAttrMaxSharedMemoryPerBlockOptin
{"CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 97
// cudaDevAttrCanFlushRemoteWrites
@@ -194,10 +194,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
{"cudaGetParameterBufferV2", {"hipGetParameterBufferV2", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuLaunchCooperativeKernel due to different signatures
{"cudaLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaLaunchCooperativeKernel", {"hipLaunchCooperativeKernel", "", CONV_EXECUTION, API_RUNTIME}},
// no analogue
// NOTE: Not equal to cuLaunchCooperativeKernelMultiDevice due to different signatures
{"cudaLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaLaunchCooperativeKernelMultiDevice", {"hipLaunchCooperativeKernelMultiDevice", "", CONV_EXECUTION, API_RUNTIME}},
// cuLaunchHostFunc
{"cudaLaunchHostFunc", {"hipLaunchHostFunc", "", CONV_EXECUTION, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
@@ -212,7 +212,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
//
{"cudaOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_RUNTIME}},
// cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
{"cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_RUNTIME}},
// cuOccupancyMaxPotentialBlockSize
{"cudaOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_RUNTIME}},
// cuOccupancyMaxPotentialBlockSizeWithFlags
@@ -84,7 +84,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
// no analogue
// CUDA_LAUNCH_PARAMS struct differs
{"cudaLaunchParams", {"hipLaunchParams", "", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaLaunchParams", {"hipLaunchParams", "", CONV_TYPE, API_RUNTIME}},
// no analogue
// NOTE: HIP struct is bigger and contains cudaMemcpy3DParms only in the beginning
@@ -402,9 +402,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
// CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR
{"cudaDevAttrReserved94", {"hipDeviceAttributeCanUseStreamWaitValueNor", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 94
// CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH
{"cudaDevAttrCooperativeLaunch", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 95
{"cudaDevAttrCooperativeLaunch", {"hipDeviceAttributeCooperativeLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 95
// CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH
{"cudaDevAttrCooperativeMultiDeviceLaunch", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 96
{"cudaDevAttrCooperativeMultiDeviceLaunch", {"hipDeviceAttributeCooperativeMultiDeviceLaunch", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 96
// CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
{"cudaDevAttrMaxSharedMemoryPerBlockOptin", {"hipDeviceAttributeMaxSharedMemoryPerBlockOptin", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 97
// CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES