From 78a36b4fb81d9eb2fda5861d05ccf02a134e400a Mon Sep 17 00:00:00 2001 From: Aryan Salmanpour Date: Thu, 30 Jan 2020 15:05:53 -0500 Subject: [PATCH] [HIP][HIPIFY] Add some missing flags for cooperative launch and occupancy APIs [ROCm/hip commit: 6e867eacb6a8d970993b27c99853e47b3841da68] --- projects/hip/bin/hipify-perl | 5 +++++ .../CUDA_Driver_API_functions_supported_by_HIP.md | 5 +++-- .../CUDA_Runtime_API_functions_supported_by_HIP.md | 4 ++-- .../hipify-clang/src/CUDA2HIP_Driver_API_types.cpp | 2 ++ .../hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp | 2 +- .../hip/include/hip/hcc_detail/hip_runtime_api.h | 11 ++++++++--- projects/hip/include/hip/hip_runtime_api.h | 4 ++++ .../hip/include/hip/nvcc_detail/hip_runtime_api.h | 12 ++++++++++++ projects/hip/src/hip_hcc.cpp | 2 ++ 9 files changed, 39 insertions(+), 8 deletions(-) diff --git a/projects/hip/bin/hipify-perl b/projects/hip/bin/hipify-perl index 4fe9b80349..d18e227c3d 100755 --- a/projects/hip/bin/hipify-perl +++ b/projects/hip/bin/hipify-perl @@ -1186,6 +1186,7 @@ sub simpleSubstitutions { $ft{'numeric_literal'} += s/\bCUDA_ERROR_INVALID_PTX\b/hipErrorInvalidKernelFile/g; $ft{'numeric_literal'} += s/\bCUDA_ERROR_INVALID_SOURCE\b/hipErrorInvalidSource/g; $ft{'numeric_literal'} += s/\bCUDA_ERROR_INVALID_VALUE\b/hipErrorInvalidValue/g; + $ft{'numeric_literal'} += s/\bCUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE\b/hipErrorCooperativeLaunchTooLarge/g; $ft{'numeric_literal'} += s/\bCUDA_ERROR_LAUNCH_FAILED\b/hipErrorLaunchFailure/g; $ft{'numeric_literal'} += s/\bCUDA_ERROR_LAUNCH_OUT_OF_RESOURCES\b/hipErrorLaunchOutOfResources/g; $ft{'numeric_literal'} += s/\bCUDA_ERROR_LAUNCH_TIMEOUT\b/hipErrorLaunchTimeOut/g; @@ -1659,6 +1660,7 @@ sub simpleSubstitutions { $ft{'numeric_literal'} += s/\bcudaErrorInvalidSource\b/hipErrorInvalidSource/g; $ft{'numeric_literal'} += s/\bcudaErrorInvalidSymbol\b/hipErrorInvalidSymbol/g; $ft{'numeric_literal'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g; + $ft{'numeric_literal'} += s/\bcudaErrorCooperativeLaunchTooLarge\b/hipErrorCooperativeLaunchTooLarge/g; $ft{'numeric_literal'} += s/\bcudaErrorLaunchFailure\b/hipErrorLaunchFailure/g; $ft{'numeric_literal'} += s/\bcudaErrorLaunchOutOfResources\b/hipErrorLaunchOutOfResources/g; $ft{'numeric_literal'} += s/\bcudaErrorLaunchTimeout\b/hipErrorLaunchTimeOut/g; @@ -1801,6 +1803,9 @@ sub simpleSubstitutions { $ft{'define'} += s/\bcudaTextureType3D\b/hipTextureType3D/g; $ft{'define'} += s/\bcudaTextureTypeCubemap\b/hipTextureTypeCubemap/g; $ft{'define'} += s/\bcudaTextureTypeCubemapLayered\b/hipTextureTypeCubemapLayered/g; + $ft{'define'} += s/\bcudaOccupancyDefault\b/hipOccupancyDefault/g; + $ft{'define'} += s/\bcudaCooperativeLaunchMultiDeviceNoPreSync\b/hipCooperativeLaunchMultiDeviceNoPreSync/g; + $ft{'define'} += s/\bcudaCooperativeLaunchMultiDeviceNoPostSync\b/hipCooperativeLaunchMultiDeviceNoPostSync/g; } # CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro diff --git a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index ff14cf9bc1..5be6a4da8d 100644 --- a/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -447,6 +447,7 @@ | 717 |*`CUDA_ERROR_INVALID_ADDRESS_SPACE`* | | | 718 |*`CUDA_ERROR_INVALID_PC`* | | | 719 |*`CUDA_ERROR_LAUNCH_FAILED`* |*`hipErrorLaunchFailure`* | +| 720 |*`CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`* |*`hipErrorCooperativeLaunchTooLarge`* | | 800 |*`CUDA_ERROR_NOT_PERMITTED`* | | | 801 |*`CUDA_ERROR_NOT_SUPPORTED`* |*`hipErrorNotSupported`* | | 802 |*`CUDA_ERROR_SYSTEM_NOT_READY`* | | 10.0 | @@ -1063,8 +1064,8 @@ | `cuFuncSetSharedMemConfig` | | | `cuLaunchKernel` | `hipModuleLaunchKernel` | | `cuLaunchHostFunc` | | 10.0 | -| `cuLaunchCooperativeKernel` | | 9.0 | -| `cuLaunchCooperativeKernelMultiDevice` | | 9.0 | +| `cuLaunchCooperativeKernel` | `hipLaunchCooperativeKernel` | 9.0 | +| `cuLaunchCooperativeKernelMultiDevice` | `hipLaunchCooperativeKernelMultiDevice` | 9.0 | ## **19. Execution Control [DEPRECATED]** diff --git a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md index 201d2aad0f..54e0c89e06 100644 --- a/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md +++ b/projects/hip/docs/markdown/CUDA_Runtime_API_functions_supported_by_HIP.md @@ -800,7 +800,7 @@ | 717 |*`cudaErrorInvalidAddressSpace`* | | | | 718 |*`cudaErrorInvalidPc`* | | | | 719 |*`cudaErrorLaunchFailure`* | |*`hipErrorLaunchFailure`* | -| 720 |*`cudaErrorCooperativeLaunchTooLarge`* | 9.0 | | +| 720 |*`cudaErrorCooperativeLaunchTooLarge`* | 9.0 |*`hipErrorCooperativeLaunchTooLarge`* | | 800 |*`cudaErrorNotPermitted`* | | | | 801 |*`cudaErrorNotSupported`* | |*`hipErrorNotSupported`* | | 802 |*`cudaErrorSystemNotReady`* | 10.0 | | @@ -1045,7 +1045,7 @@ | define |`cudaMemAttachGlobal` | |`hipMemAttachGlobal` | | define |`cudaMemAttachHost` | |`hipMemAttachHost` | | define |`cudaMemAttachSingle` | | | -| define |`cudaOccupancyDefault` | | | +| define |`cudaOccupancyDefault` | |`hipOccupancyDefault` | | define |`cudaOccupancyDisableCachingOverride` | | | | define |`cudaPeerAccessDefault` | | | | define |`cudaStreamDefault` | |`hipStreamDefault` | diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp index 1c3c2634f7..a4d419384d 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Driver_API_types.cpp @@ -1284,6 +1284,8 @@ const std::map CUDA_DRIVER_TYPE_NAME_MAP{ {"CUDA_ERROR_INVALID_PC", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 718 // cudaErrorLaunchFailure {"CUDA_ERROR_LAUNCH_FAILED", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 719 + // cudaErrorCooperativeLaunchTooLarge + {"CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 720 // cudaErrorNotPermitted {"CUDA_ERROR_NOT_PERMITTED", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 800 // cudaErrorNotSupported diff --git a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp index 5993f5d770..ade8b105f1 100644 --- a/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp +++ b/projects/hip/hipify-clang/src/CUDA2HIP_Runtime_API_types.cpp @@ -787,7 +787,7 @@ const std::map CUDA_RUNTIME_TYPE_NAME_MAP { {"cudaErrorInvalidPc", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 718 // CUDA_ERROR_LAUNCH_FAILED {"cudaErrorLaunchFailure", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 719 - // no analogue + // CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE {"cudaErrorCooperativeLaunchTooLarge", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 720 // CUDA_ERROR_NOT_PERMITTED {"cudaErrorNotPermitted", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 800 diff --git a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h index 659a6c3c3a..8ee44be6ac 100644 --- a/projects/hip/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hip/hcc_detail/hip_runtime_api.h @@ -212,6 +212,11 @@ enum hipLimit_t { #define hipArrayCubemap 0x04 #define hipArrayTextureGather 0x08 +#define hipOccupancyDefault 0x00 + +#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01 +#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02 + /* * @brief hipJitOption * @enum @@ -2903,7 +2908,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th * default stream is used with associated synchronization rules. * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge */ hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX, void** kernelParams, unsigned int sharedMemBytes, @@ -2917,7 +2922,7 @@ hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim * @param [in] numDevices Size of the launchParamsList array. * @param [in] flags Flags to control launch behavior. * - * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue + * @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge */ hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices, unsigned int flags); @@ -2960,7 +2965,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( * @param [in] flags Extra flags for occupancy calculation (currently ignored) */ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( - int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); + int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); #if __HIP_VDI__ && !defined(__HCC__) /** diff --git a/projects/hip/include/hip/hip_runtime_api.h b/projects/hip/include/hip/hip_runtime_api.h index 64b2a85d8a..025688e98c 100644 --- a/projects/hip/include/hip/hip_runtime_api.h +++ b/projects/hip/include/hip/hip_runtime_api.h @@ -255,6 +255,10 @@ typedef enum __HIP_NODISCARD hipError_t { 713, ///< Produced when trying to unlock a non-page-locked memory. hipErrorLaunchFailure = 719, ///< An exception occurred on the device while executing a kernel. + hipErrorCooperativeLaunchTooLarge = + 720, ///< This error indicates that the number of blocks launched per grid for a kernel + ///< that was launched via cooperative launch APIs exceeds the maximum number of + ///< allowed blocks for the current device hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented hipErrorUnknown = 999, //< Unknown error. // HSA Runtime Error Codes start here. diff --git a/projects/hip/include/hip/nvcc_detail/hip_runtime_api.h b/projects/hip/include/hip/nvcc_detail/hip_runtime_api.h index 05a162478a..1ee84b5057 100644 --- a/projects/hip/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/hip/include/hip/nvcc_detail/hip_runtime_api.h @@ -140,6 +140,12 @@ typedef enum cudaChannelFormatKind hipChannelFormatKind; #define hipLimitMallocHeapSize cudaLimitMallocHeapSize #define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess +#define hipOccupancyDefault cudaOccupancyDefault + +#define hipCooperativeLaunchMultiDeviceNoPreSync cudaCooperativeLaunchMultiDeviceNoPreSync +#define hipCooperativeLaunchMultiDeviceNoPostSync cudaCooperativeLaunchMultiDeviceNoPostSync + + // enum CUjit_option redefines #define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS #define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK @@ -267,6 +273,8 @@ inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) { return hipErrorNotInitialized; case cudaErrorLaunchFailure: return hipErrorLaunchFailure; + case cudaErrorCooperativeLaunchTooLarge: + return hipErrorCooperativeLaunchTooLarge; case cudaErrorPriorLaunchFailure: return hipErrorPriorLaunchFailure; case cudaErrorLaunchOutOfResources: @@ -445,6 +453,8 @@ inline static hipError_t hipCUResultTohipError(CUresult cuError) { return hipErrorHostMemoryNotRegistered; case CUDA_ERROR_LAUNCH_FAILED: return hipErrorLaunchFailure; + case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: + return hipErrorCooperativeLaunchTooLarge; case CUDA_ERROR_NOT_SUPPORTED: return hipErrorNotSupported; case CUDA_ERROR_UNKNOWN: @@ -601,6 +611,8 @@ inline static cudaError_t hipErrorToCudaError(hipError_t hError) { return cudaErrorSetOnActiveProcess; case hipErrorLaunchFailure: return cudaErrorLaunchFailure; + case hipErrorCooperativeLaunchTooLarge: + return cudaErrorCooperativeLaunchTooLarge; case hipErrorNotSupported: return cudaErrorNotSupported; // HSA: does not exist in CUDA diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 175d301ee1..85c08943bf 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -1796,6 +1796,8 @@ const char* ihipErrorString(hipError_t hip_error) { return "hipErrorMissingConfiguration"; case hipErrorLaunchFailure: return "hipErrorLaunchFailure"; + case hipErrorCooperativeLaunchTooLarge: + return "hipErrorCooperativeLaunchTooLarge"; case hipErrorPriorLaunchFailure: return "hipErrorPriorLaunchFailure"; case hipErrorLaunchTimeOut: