From 6ccfa38fa35b7b4c9cdbde2d5179da720295dec9 Mon Sep 17 00:00:00 2001
From: foreman
Date: Thu, 1 Aug 2019 16:40:24 -0400
Subject: [PATCH] P4 to Git Change 1977240 by wchau@wc_hip_vdi on 2019/08/01
16:35:47
SWDEV-180872 - Runtime support changes for Cooperative Group Features
- Taking into account of SGPRs usage to determine the block size
Affected files ...
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#35 edit
[ROCm/hip commit: 99d54cb7bd613ce09f22e9a088bd93efc651b7a9]
---
projects/hip/api/hip/hip_platform.cpp | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/projects/hip/api/hip/hip_platform.cpp b/projects/hip/api/hip/hip_platform.cpp
index d284570322..1b8a7b5c96 100644
--- a/projects/hip/api/hip/hip_platform.cpp
+++ b/projects/hip/api/hip/hip_platform.cpp
@@ -492,9 +492,19 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
// Find threads accupancy per CU => simd_per_cu * GPR usage
constexpr size_t MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
- size_t alu_accupancy = device->info().simdPerCU_ *
- std::min(MaxWavesPerSimd, (wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4)));
+ size_t VgprWaves = wrkGrpInfo->availableVGPRs_ / amd::alignUp(wrkGrpInfo->usedVGPRs_, 4);
+ size_t GprWaves;
+ if (wrkGrpInfo->usedSGPRs_ > 0) {
+ const size_t maxSGPRs = (device->info().gfxipVersion_ < 800) ? 512 : 800;
+ size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
+ GprWaves = std::min(VgprWaves, SgprWaves);
+ }
+ else {
+ GprWaves = VgprWaves;
+ }
+
+ size_t alu_accupancy = device->info().simdPerCU_ * std::min(MaxWavesPerSimd, GprWaves);
alu_accupancy *= wrkGrpInfo->wavefrontSize_;
// Calculate blocks occupancy per CU
*numBlocks = alu_accupancy / amd::alignUp(blockSize, wrkGrpInfo->wavefrontSize_);