From 62fee66ff24caecd2b59fa229bf30884b98b17b7 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Sun, 31 Jan 2021 01:16:48 -0600 Subject: [PATCH] SWDEV-270773 - Fix workgroup calculation logic for GWS initialization The existing workgroup calculation logic for GWS initialization is incorrect. It tries to add together workgroups across dimensions, leading to major under-count in 2D and 3D kernels. An (x,y,z) kernel uses x * y * z blocks, not x + y + z. In addition, the previous logic was incorrect for the case of launching a single-threaded kernel. It calculated 0 workgroups, leading to initializing GWS to -1. Change-Id: I1bb20a0d5b6e0cc10ac55901c28d8f93aac61c09 [ROCm/clr commit: 54d1d69c0a51865653a19e3db86071fa432fc476] --- projects/clr/rocclr/device/pal/palvirtual.cpp | 6 +++--- projects/clr/rocclr/device/rocm/rocvirtual.cpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/clr/rocclr/device/pal/palvirtual.cpp b/projects/clr/rocclr/device/pal/palvirtual.cpp index 4a85bb10fe..8538f5b96a 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.cpp +++ b/projects/clr/rocclr/device/pal/palvirtual.cpp @@ -2331,10 +2331,10 @@ void VirtualGPU::PostDeviceEnqueue(const amd::Kernel& kernel, const HSAILKernel& // ================================================================================================ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) { if (vcmd.cooperativeGroups()) { - uint32_t workgroups = 0; + uint32_t workgroups = 1; for (uint i = 0; i < vcmd.sizes().dimensions(); i++) { - if ((vcmd.sizes().local()[i] != 0) && (vcmd.sizes().global()[i] != 1)) { - workgroups += (vcmd.sizes().global()[i] / vcmd.sizes().local()[i]); + if (vcmd.sizes().local()[i] != 0) { + workgroups *= (vcmd.sizes().global()[i] / vcmd.sizes().local()[i]); } } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 31950d3a70..f6d484e86b 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2595,10 +2595,10 @@ void VirtualGPU::submitKernel(amd::NDRangeKernelCommand& vcmd) { if (vcmd.cooperativeGroups()) { // Initialize GWS if it's cooperative groups launch - uint32_t workgroups = 0; + uint32_t workgroups = 1; for (uint i = 0; i < vcmd.sizes().dimensions(); i++) { - if ((vcmd.sizes().local()[i] != 0) && (vcmd.sizes().global()[i] != 1)) { - workgroups += (vcmd.sizes().global()[i] / vcmd.sizes().local()[i]); + if (vcmd.sizes().local()[i] != 0) { + workgroups *= (vcmd.sizes().global()[i] / vcmd.sizes().local()[i]); } }