Add hipDrvOccupancyMaxActiveBlocksPerMultiprocessor[WithFlags] (#1854)
Equivalent to cuOccupancyMaxActiveBlocksPerMultiprocessor[WithFlags].
This commit is contained in:
@@ -1126,10 +1126,10 @@
|
||||
|
||||
| **CUDA** | **HIP** |**CUDA version\***|
|
||||
|-----------------------------------------------------------|---------------------------------------------------------|------------------|
|
||||
| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` |
|
||||
| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |
|
||||
| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` |
|
||||
| `cuOccupancyMaxPotentialBlockSizeWithFlags` | |
|
||||
| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipDrvOccupancyMaxActiveBlocksPerMultiprocessor` |
|
||||
| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |
|
||||
| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` |
|
||||
| `cuOccupancyMaxPotentialBlockSizeWithFlags` | |
|
||||
|
||||
## **22. Texture Reference Management [DEPRECATED]**
|
||||
|
||||
|
||||
@@ -545,9 +545,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
|
||||
|
||||
// 5.21. Occupancy
|
||||
// cudaOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
{"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}},
|
||||
{"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}},
|
||||
// cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
|
||||
{"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}},
|
||||
{"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}},
|
||||
// cudaOccupancyMaxPotentialBlockSize
|
||||
{"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}},
|
||||
// cudaOccupancyMaxPotentialBlockSizeWithFlags
|
||||
|
||||
@@ -2959,7 +2959,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] func Kernel function for which occupancy is calulated
|
||||
* @param [in] func Kernel function (hipFunction) for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
*/
|
||||
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] f Kernel function for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
* @param [in] flags Extra flags for occupancy calculation (currently ignored)
|
||||
@@ -2967,6 +2978,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault));
|
||||
|
||||
/**
|
||||
* @brief Returns occupancy for a device function.
|
||||
*
|
||||
* @param [out] numBlocks Returned occupancy
|
||||
* @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated
|
||||
* @param [in] blockSize Block size the kernel is intended to be launched with
|
||||
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
|
||||
* @param [in] flags Extra flags for occupancy calculation (currently ignored)
|
||||
*/
|
||||
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
|
||||
|
||||
#if __HIP_VDI__ && !defined(__HCC__)
|
||||
/**
|
||||
* @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched
|
||||
|
||||
@@ -1471,6 +1471,15 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
|
||||
{
|
||||
HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk);
|
||||
|
||||
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk,
|
||||
unsigned int flags)
|
||||
@@ -1481,6 +1490,15 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk,
|
||||
unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags);
|
||||
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipLaunchKernel(
|
||||
const void* func_addr, dim3 numBlocks, dim3 dimBlocks, void** args,
|
||||
size_t sharedMemBytes, hipStream_t stream)
|
||||
|
||||
Reference in New Issue
Block a user