[HIP][HIPIFY] Add some missing flags for cooperative launch and occupancy APIs
[ROCm/hip commit: 6e867eacb6]
This commit is contained in:
@@ -1186,6 +1186,7 @@ sub simpleSubstitutions {
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_INVALID_PTX\b/hipErrorInvalidKernelFile/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_INVALID_SOURCE\b/hipErrorInvalidSource/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_INVALID_VALUE\b/hipErrorInvalidValue/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE\b/hipErrorCooperativeLaunchTooLarge/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_LAUNCH_FAILED\b/hipErrorLaunchFailure/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_LAUNCH_OUT_OF_RESOURCES\b/hipErrorLaunchOutOfResources/g;
|
||||
$ft{'numeric_literal'} += s/\bCUDA_ERROR_LAUNCH_TIMEOUT\b/hipErrorLaunchTimeOut/g;
|
||||
@@ -1659,6 +1660,7 @@ sub simpleSubstitutions {
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorInvalidSource\b/hipErrorInvalidSource/g;
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorInvalidSymbol\b/hipErrorInvalidSymbol/g;
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorInvalidValue\b/hipErrorInvalidValue/g;
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorCooperativeLaunchTooLarge\b/hipErrorCooperativeLaunchTooLarge/g;
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorLaunchFailure\b/hipErrorLaunchFailure/g;
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorLaunchOutOfResources\b/hipErrorLaunchOutOfResources/g;
|
||||
$ft{'numeric_literal'} += s/\bcudaErrorLaunchTimeout\b/hipErrorLaunchTimeOut/g;
|
||||
@@ -1801,6 +1803,9 @@ sub simpleSubstitutions {
|
||||
$ft{'define'} += s/\bcudaTextureType3D\b/hipTextureType3D/g;
|
||||
$ft{'define'} += s/\bcudaTextureTypeCubemap\b/hipTextureTypeCubemap/g;
|
||||
$ft{'define'} += s/\bcudaTextureTypeCubemapLayered\b/hipTextureTypeCubemapLayered/g;
|
||||
$ft{'define'} += s/\bcudaOccupancyDefault\b/hipOccupancyDefault/g;
|
||||
$ft{'define'} += s/\bcudaCooperativeLaunchMultiDeviceNoPreSync\b/hipCooperativeLaunchMultiDeviceNoPreSync/g;
|
||||
$ft{'define'} += s/\bcudaCooperativeLaunchMultiDeviceNoPostSync\b/hipCooperativeLaunchMultiDeviceNoPostSync/g;
|
||||
}
|
||||
|
||||
# CUDA extern __shared__ syntax replace with HIP_DYNAMIC_SHARED() macro
|
||||
|
||||
@@ -447,6 +447,7 @@
|
||||
| 717 |*`CUDA_ERROR_INVALID_ADDRESS_SPACE`* | |
|
||||
| 718 |*`CUDA_ERROR_INVALID_PC`* | |
|
||||
| 719 |*`CUDA_ERROR_LAUNCH_FAILED`* |*`hipErrorLaunchFailure`* |
|
||||
| 720 |*`CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE`* |*`hipErrorCooperativeLaunchTooLarge`* |
|
||||
| 800 |*`CUDA_ERROR_NOT_PERMITTED`* | |
|
||||
| 801 |*`CUDA_ERROR_NOT_SUPPORTED`* |*`hipErrorNotSupported`* |
|
||||
| 802 |*`CUDA_ERROR_SYSTEM_NOT_READY`* | | 10.0 |
|
||||
@@ -1063,8 +1064,8 @@
|
||||
| `cuFuncSetSharedMemConfig` | |
|
||||
| `cuLaunchKernel` | `hipModuleLaunchKernel` |
|
||||
| `cuLaunchHostFunc` | | 10.0 |
|
||||
| `cuLaunchCooperativeKernel` | | 9.0 |
|
||||
| `cuLaunchCooperativeKernelMultiDevice` | | 9.0 |
|
||||
| `cuLaunchCooperativeKernel` | `hipLaunchCooperativeKernel` | 9.0 |
|
||||
| `cuLaunchCooperativeKernelMultiDevice` | `hipLaunchCooperativeKernelMultiDevice` | 9.0 |
|
||||
|
||||
## **19. Execution Control [DEPRECATED]**
|
||||
|
||||
|
||||
@@ -800,7 +800,7 @@
|
||||
| 717 |*`cudaErrorInvalidAddressSpace`* | | |
|
||||
| 718 |*`cudaErrorInvalidPc`* | | |
|
||||
| 719 |*`cudaErrorLaunchFailure`* | |*`hipErrorLaunchFailure`* |
|
||||
| 720 |*`cudaErrorCooperativeLaunchTooLarge`* | 9.0 | |
|
||||
| 720 |*`cudaErrorCooperativeLaunchTooLarge`* | 9.0 |*`hipErrorCooperativeLaunchTooLarge`* |
|
||||
| 800 |*`cudaErrorNotPermitted`* | | |
|
||||
| 801 |*`cudaErrorNotSupported`* | |*`hipErrorNotSupported`* |
|
||||
| 802 |*`cudaErrorSystemNotReady`* | 10.0 | |
|
||||
@@ -1045,7 +1045,7 @@
|
||||
| define |`cudaMemAttachGlobal` | |`hipMemAttachGlobal` |
|
||||
| define |`cudaMemAttachHost` | |`hipMemAttachHost` |
|
||||
| define |`cudaMemAttachSingle` | | |
|
||||
| define |`cudaOccupancyDefault` | | |
|
||||
| define |`cudaOccupancyDefault` | |`hipOccupancyDefault` |
|
||||
| define |`cudaOccupancyDisableCachingOverride` | | |
|
||||
| define |`cudaPeerAccessDefault` | | |
|
||||
| define |`cudaStreamDefault` | |`hipStreamDefault` |
|
||||
|
||||
@@ -1284,6 +1284,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP{
|
||||
{"CUDA_ERROR_INVALID_PC", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 718
|
||||
// cudaErrorLaunchFailure
|
||||
{"CUDA_ERROR_LAUNCH_FAILED", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_DRIVER}}, // 719
|
||||
// cudaErrorCooperativeLaunchTooLarge
|
||||
{"CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 720
|
||||
// cudaErrorNotPermitted
|
||||
{"CUDA_ERROR_NOT_PERMITTED", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_DRIVER, HIP_UNSUPPORTED}}, // 800
|
||||
// cudaErrorNotSupported
|
||||
|
||||
@@ -787,7 +787,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP {
|
||||
{"cudaErrorInvalidPc", {"hipErrorInvalidPc", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 718
|
||||
// CUDA_ERROR_LAUNCH_FAILED
|
||||
{"cudaErrorLaunchFailure", {"hipErrorLaunchFailure", "", CONV_NUMERIC_LITERAL, API_RUNTIME}}, // 719
|
||||
// no analogue
|
||||
// CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE
|
||||
{"cudaErrorCooperativeLaunchTooLarge", {"hipErrorCooperativeLaunchTooLarge", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 720
|
||||
// CUDA_ERROR_NOT_PERMITTED
|
||||
{"cudaErrorNotPermitted", {"hipErrorNotPermitted", "", CONV_NUMERIC_LITERAL, API_RUNTIME, HIP_UNSUPPORTED}}, // 800
|
||||
|
||||
@@ -212,6 +212,11 @@ enum hipLimit_t {
|
||||
#define hipArrayCubemap 0x04
|
||||
#define hipArrayTextureGather 0x08
|
||||
|
||||
#define hipOccupancyDefault 0x00
|
||||
|
||||
#define hipCooperativeLaunchMultiDeviceNoPreSync 0x01
|
||||
#define hipCooperativeLaunchMultiDeviceNoPostSync 0x02
|
||||
|
||||
/*
|
||||
* @brief hipJitOption
|
||||
* @enum
|
||||
@@ -2903,7 +2908,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigne
|
||||
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th
|
||||
* default stream is used with associated synchronization rules.
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge
|
||||
*/
|
||||
hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDimX,
|
||||
void** kernelParams, unsigned int sharedMemBytes,
|
||||
@@ -2917,7 +2922,7 @@ hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim, dim3 blockDim
|
||||
* @param [in] numDevices Size of the launchParamsList array.
|
||||
* @param [in] flags Flags to control launch behavior.
|
||||
*
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue
|
||||
* @returns hipSuccess, hipInvalidDevice, hipErrorNotInitialized, hipErrorInvalidValue, hipErrorCooperativeLaunchTooLarge
|
||||
*/
|
||||
hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices, unsigned int flags);
|
||||
@@ -2960,7 +2965,7 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
* @param [in] flags Extra flags for occupancy calculation (currently ignored)
|
||||
*/
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault));
|
||||
|
||||
#if __HIP_VDI__ && !defined(__HCC__)
|
||||
/**
|
||||
|
||||
@@ -255,6 +255,10 @@ typedef enum __HIP_NODISCARD hipError_t {
|
||||
713, ///< Produced when trying to unlock a non-page-locked memory.
|
||||
hipErrorLaunchFailure =
|
||||
719, ///< An exception occurred on the device while executing a kernel.
|
||||
hipErrorCooperativeLaunchTooLarge =
|
||||
720, ///< This error indicates that the number of blocks launched per grid for a kernel
|
||||
///< that was launched via cooperative launch APIs exceeds the maximum number of
|
||||
///< allowed blocks for the current device
|
||||
hipErrorNotSupported = 801, ///< Produced when the hip API is not supported/implemented
|
||||
hipErrorUnknown = 999, //< Unknown error.
|
||||
// HSA Runtime Error Codes start here.
|
||||
|
||||
@@ -140,6 +140,12 @@ typedef enum cudaChannelFormatKind hipChannelFormatKind;
|
||||
#define hipLimitMallocHeapSize cudaLimitMallocHeapSize
|
||||
#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
|
||||
|
||||
#define hipOccupancyDefault cudaOccupancyDefault
|
||||
|
||||
#define hipCooperativeLaunchMultiDeviceNoPreSync cudaCooperativeLaunchMultiDeviceNoPreSync
|
||||
#define hipCooperativeLaunchMultiDeviceNoPostSync cudaCooperativeLaunchMultiDeviceNoPostSync
|
||||
|
||||
|
||||
// enum CUjit_option redefines
|
||||
#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
|
||||
#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
|
||||
@@ -267,6 +273,8 @@ inline static hipError_t hipCUDAErrorTohipError(cudaError_t cuError) {
|
||||
return hipErrorNotInitialized;
|
||||
case cudaErrorLaunchFailure:
|
||||
return hipErrorLaunchFailure;
|
||||
case cudaErrorCooperativeLaunchTooLarge:
|
||||
return hipErrorCooperativeLaunchTooLarge;
|
||||
case cudaErrorPriorLaunchFailure:
|
||||
return hipErrorPriorLaunchFailure;
|
||||
case cudaErrorLaunchOutOfResources:
|
||||
@@ -445,6 +453,8 @@ inline static hipError_t hipCUResultTohipError(CUresult cuError) {
|
||||
return hipErrorHostMemoryNotRegistered;
|
||||
case CUDA_ERROR_LAUNCH_FAILED:
|
||||
return hipErrorLaunchFailure;
|
||||
case CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE:
|
||||
return hipErrorCooperativeLaunchTooLarge;
|
||||
case CUDA_ERROR_NOT_SUPPORTED:
|
||||
return hipErrorNotSupported;
|
||||
case CUDA_ERROR_UNKNOWN:
|
||||
@@ -601,6 +611,8 @@ inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
|
||||
return cudaErrorSetOnActiveProcess;
|
||||
case hipErrorLaunchFailure:
|
||||
return cudaErrorLaunchFailure;
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return cudaErrorCooperativeLaunchTooLarge;
|
||||
case hipErrorNotSupported:
|
||||
return cudaErrorNotSupported;
|
||||
// HSA: does not exist in CUDA
|
||||
|
||||
@@ -1796,6 +1796,8 @@ const char* ihipErrorString(hipError_t hip_error) {
|
||||
return "hipErrorMissingConfiguration";
|
||||
case hipErrorLaunchFailure:
|
||||
return "hipErrorLaunchFailure";
|
||||
case hipErrorCooperativeLaunchTooLarge:
|
||||
return "hipErrorCooperativeLaunchTooLarge";
|
||||
case hipErrorPriorLaunchFailure:
|
||||
return "hipErrorPriorLaunchFailure";
|
||||
case hipErrorLaunchTimeOut:
|
||||
|
||||
Reference in New Issue
Block a user