From 1fb66c3e1e6bca124c73f605a92cabfe6544cc06 Mon Sep 17 00:00:00 2001 From: Jatin Chaudhary Date: Wed, 16 Oct 2024 01:34:23 +0100 Subject: [PATCH] SWDEV-491248 - Fix build_mask thread_rank() gives thread index in a block. Limit the range to the current warp size. Change-Id: Ib5c9831236096485cf99ba7ab0b911a3b10de31c [ROCm/clr commit: bd7d40a4d80c88a3d55affab6f5b6474c4f4618b] --- .../include/hip/amd_detail/amd_hip_cooperative_groups.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h index 3a66c3c00c..37eda85a26 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h @@ -825,7 +825,9 @@ template class thread_block_tile_base : public tile_base> (64 - numThreads); - return mask << ((internal::workgroup::thread_rank() / numThreads) * numThreads); + // thread_rank() gives thread id from 0..thread launch size. + return mask << (((internal::workgroup::thread_rank() % warpSize) / numThreads) * + numThreads); } #endif