Add hipDrvOccupancyMaxActiveBlocksPerMultiprocessor[WithFlags] (#1854)

Equivalent to cuOccupancyMaxActiveBlocksPerMultiprocessor[WithFlags].
Этот коммит содержится в:
Rahul Garg
2020-02-28 03:16:55 -08:00
коммит произвёл GitHub
родитель af90312867
Коммит edc97f3073
4 изменённых файлов: 48 добавлений и 7 удалений
+4 -4
Просмотреть файл
@@ -1126,10 +1126,10 @@
| **CUDA** | **HIP** |**CUDA version\***|
|-----------------------------------------------------------|---------------------------------------------------------|------------------|
| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |
| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` |
| `cuOccupancyMaxPotentialBlockSizeWithFlags` | |
| `cuOccupancyMaxActiveBlocksPerMultiprocessor` |`hipDrvOccupancyMaxActiveBlocksPerMultiprocessor` |
| `cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |`hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` |
| `cuOccupancyMaxPotentialBlockSize` |`hipOccupancyMaxPotentialBlockSize` |
| `cuOccupancyMaxPotentialBlockSizeWithFlags` | |
## **22. Texture Reference Management [DEPRECATED]**
+2 -2
Просмотреть файл
@@ -545,9 +545,9 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP{
// 5.21. Occupancy
// cudaOccupancyMaxActiveBlocksPerMultiprocessor
{"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}},
{"cuOccupancyMaxActiveBlocksPerMultiprocessor", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessor", "", CONV_OCCUPANCY, API_DRIVER}},
// cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags
{"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}},
{"cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", {"hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "", CONV_OCCUPANCY, API_DRIVER}},
// cudaOccupancyMaxPotentialBlockSize
{"cuOccupancyMaxPotentialBlockSize", {"hipOccupancyMaxPotentialBlockSize", "", CONV_OCCUPANCY, API_DRIVER}},
// cudaOccupancyMaxPotentialBlockSizeWithFlags
+24 -1
Просмотреть файл
@@ -2959,7 +2959,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
* @brief Returns occupancy for a device function.
*
* @param [out] numBlocks Returned occupancy
* @param [in] func Kernel function for which occupancy is calulated
* @param [in] func Kernel function (hipFunction) for which occupancy is calulated
* @param [in] blockSize Block size the kernel is intended to be launched with
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
*/
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk);
/**
* @brief Returns occupancy for a device function.
*
* @param [out] numBlocks Returned occupancy
* @param [in] f Kernel function for which occupancy is calulated
* @param [in] blockSize Block size the kernel is intended to be launched with
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
* @param [in] flags Extra flags for occupancy calculation (currently ignored)
@@ -2967,6 +2978,18 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk, unsigned int flags __dparm(hipOccupancyDefault));
/**
* @brief Returns occupancy for a device function.
*
* @param [out] numBlocks Returned occupancy
* @param [in] f Kernel function(hipFunction_t) for which occupancy is calulated
* @param [in] blockSize Block size the kernel is intended to be launched with
* @param [in] dynSharedMemPerBlk dynamic shared memory usage (in bytes) intended for each block
* @param [in] flags Extra flags for occupancy calculation (currently ignored)
*/
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags);
#if __HIP_VDI__ && !defined(__HCC__)
/**
* @brief Launches kernels on multiple devices and guarantees all specified kernels are dispatched
+18
Просмотреть файл
@@ -1471,6 +1471,15 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
}
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessor(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
{
HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk);
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk));
}
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk,
unsigned int flags)
@@ -1481,6 +1490,15 @@ hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
}
hipError_t hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk,
unsigned int flags)
{
HIP_INIT_API(hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags);
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
tls, (uint32_t*) numBlocks, f, blockSize, dynSharedMemPerBlk));
}
hipError_t hipLaunchKernel(
const void* func_addr, dim3 numBlocks, dim3 dimBlocks, void** args,
size_t sharedMemBytes, hipStream_t stream)