From bf04d7380a2a817f5b4d2d8b01867c2dc2d9dd8d Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Tue, 17 Mar 2020 03:30:38 -0500 Subject: [PATCH] Fix errors in occupancy calculation function (#1926) Fix two errors in hipOccupancyMaxActiveBlocksPerMultiprocessor. 1) Fix a possible segfault if the user passed in a null pointer for the numBlocks value. 2) Handle the situation when the user is asking for a block size that is larger than what the target device can hold within a single block. --- src/hip_module.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 65c218c92d..6ec260b58a 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -426,7 +426,7 @@ void getGprsLdsUsage(hipFunction_t f, size_t* usedVGPRS, size_t* usedSGPRS, size } } -hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( +static hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( TlsData *tls, uint32_t* numBlocks, hipFunction_t f, uint32_t blockSize, size_t dynSharedMemPerBlk) { using namespace hip_impl; @@ -435,10 +435,18 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor( if (ctx == nullptr) { return hipErrorInvalidDevice; } + if (numBlocks == nullptr) { + return hipErrorInvalidValue; + } hipDeviceProp_t prop{}; ihipGetDeviceProperties(&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + if (blockSize > prop.maxThreadsPerBlock) { + *numBlocks = 0; + return hipSuccess; + } + prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024; size_t usedVGPRS = 0;