Match Occupancy APIs syntax with CUDA (#1625)
* Match Occupancy APIs syntax with CUDA and fix tests using these APIs
Этот коммит содержится в:
@@ -1368,7 +1368,7 @@ hipError_t hipOccupancyMaxPotentialBlockSize(uint32_t* gridSize, uint32_t* block
|
||||
}
|
||||
|
||||
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
|
||||
TlsData *tls, int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk)
|
||||
{
|
||||
using namespace hip_impl;
|
||||
|
||||
@@ -1408,35 +1408,41 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
: std::min(maxWavesPerSimd, availableSGPRs / usedSGPRS));
|
||||
|
||||
// Calculate blocks occupancy per CU based on SGPR usage
|
||||
*numBlocks = std::min(*numBlocks, (uint32_t) (sgprs_alu_occupancy / numWavefronts));
|
||||
*numBlocks = std::min(*numBlocks, (int) (sgprs_alu_occupancy / numWavefronts));
|
||||
|
||||
size_t total_used_lds = usedLDS + dynSharedMemPerBlk;
|
||||
if (total_used_lds != 0) {
|
||||
// Calculate LDS occupacy per CU. lds_per_cu / (static_lsd + dynamic_lds)
|
||||
size_t lds_occupancy = prop.maxSharedMemoryPerMultiProcessor / total_used_lds;
|
||||
*numBlocks = std::min(*numBlocks, (uint32_t) lds_occupancy);
|
||||
*numBlocks = std::min(*numBlocks, (int) lds_occupancy);
|
||||
}
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk)
|
||||
{
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, numBlocks, f, blockSize, dynSharedMemPerBlk);
|
||||
|
||||
auto F = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)(f),
|
||||
hip_impl::target_agent(0));
|
||||
|
||||
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
tls, numBlocks, F, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
||||
uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk,
|
||||
int* numBlocks, const void* f, int blockSize, size_t dynSharedMemPerBlk,
|
||||
unsigned int flags)
|
||||
{
|
||||
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, numBlocks, f, blockSize, dynSharedMemPerBlk, flags);
|
||||
|
||||
auto F = hip_impl::get_program_state().kernel_descriptor((std::uintptr_t)(f),
|
||||
hip_impl::target_agent(0));
|
||||
|
||||
return ihipLogStatus(ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
tls, numBlocks, f, blockSize, dynSharedMemPerBlk));
|
||||
tls, numBlocks, F, blockSize, dynSharedMemPerBlk));
|
||||
}
|
||||
|
||||
hipError_t hipLaunchKernel(
|
||||
|
||||
Ссылка в новой задаче
Block a user