diff --git a/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 8956533ed0..e77997b0b6 100644 --- a/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -1126,10 +1126,10 @@ | **CUDA** | **HIP** |**CUDA version\***| |-----------------------------------------------------------|---------------------------------------------------------|------------------| -| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` | -| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | -| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` | -| `cuOccupancyMaxPotentialBlockSizeWithFlags` | | +| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipDrvOccupancyMaxActiveBlocksPerMultiprocessor` | +| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` | +| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` | +| `cuOccupancyMaxPotentialBlockSizeWithFlags` | | ## **22. Texture Reference Management [DEPRECATED]** diff --git a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp index 5d700631c4..ab07a10e93 100644 --- a/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp +++ b/hipify-clang/src/CUDA2HIP_Driver_API_functions.cpp @@ -545,9 +545,9 @@ const std::map CUDA_DRIVER_FUNCTION_MAP{ // 5.21. Occupancy // cudaOccupancyMaxActiveBlocksPerMultiprocessor - {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags - {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}}, + {"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSize {"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}}, // cudaOccupancyMaxPotentialBlockSizeWithFlags diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 76209ef6a7..2c6726c161 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -2959,7 +2959,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( * @brief Returns occupancy for a device function. * * @param [out] numBlocks Returned occupancy - * @param [in] func Kernel function for which occupancy is calulated + * @param [in] func Kernel function (hipFunction) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + */ +hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk); + +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function for which occupancy is calulated * @param [in] blockSize Block size the kernel is intended to be launched with * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block * @param [in] flags Extra flags for occupancy calculation (currently ignored) @@ -2967,6 +2978,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault)); +/** + * @brief Returns occupancy for a device function. + * + * @param [out] numBlocks Returned occupancy + * @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated + * @param [in] blockSize Block size the kernel is intended to be launched with + * @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block + * @param [in] flags Extra flags for occupancy calculation (currently ignored) + */ +hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags); + #if __HIP_VDI__ && !defined(__HCC__) /** * @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 44f0f108a6..a88abba9cb 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -1471,6 +1471,15 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor( tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } +hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk) +{ + HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk); + + return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( + tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk)); +} + hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags) @@ -1481,6 +1490,15 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( tls, numBlocks, f, blockSize, dynSharedMemPerBlk)); } +hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, + unsigned int flags) +{ + HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags); + return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor( + tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk)); +} + hipError_t hipLaunchKernel( const void* func_addr, dim3 numBlocks, dim3 dimBlocks, void** args, size_t sharedMemBytes, hipStream_t stream)