Fix errors in occupancy calculation function (#1926)
Fix two errors in hipOccupancyMaxActiveBlocksPerMultiprocessor. 1) Fix a possible segfault if the user passed in a null pointer for the numBlocks value. 2) Handle the situation when the user is asking for a block size that is larger than what the target device can hold within a single block.
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
7aa9611689
Коммит
bf04d7380a
@@ -426,7 +426,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk)
|
||||
{
|
||||
using namespace hip_impl;
|
||||
@@ -435,10 +435,18 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
||||
if (ctx == nullptr) {
|
||||
return hipErrorInvalidDevice;
|
||||
}
|
||||
if (numBlocks == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
hipDeviceProp_t prop{};
|
||||
ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId);
|
||||
|
||||
if (blockSize > prop.maxThreadsPerBlock) {
|
||||
*numBlocks = 0;
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024;
|
||||
|
||||
size_t usedVGPRS = 0;
|
||||
|
||||
Ссылка в новой задаче
Block a user