SWDEV-491248 - Fix build_mask

thread_rank() gives thread index in a block. Limit the range to the
current warp size.

Change-Id: Ib5c9831236096485cf99ba7ab0b911a3b10de31c


[ROCm/clr commit: bd7d40a4d8]
Цей коміт міститься в:
Jatin Chaudhary
2024-10-16 01:34:23 +01:00
зафіксовано Jatin Jaikishan Chaudhary
джерело 1d7b7cde76
коміт 1fb66c3e1e
+3 -1
Переглянути файл
@@ -825,7 +825,9 @@ template <unsigned int size> class thread_block_tile_base : public tile_base<siz
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
__CG_QUALIFIER__ unsigned long long build_mask() const {
unsigned long long mask = ~0ull >> (64 - numThreads);
return mask << ((internal::workgroup::thread_rank() / numThreads) * numThreads);
// thread_rank() gives thread id from 0..thread launch size.
return mask << (((internal::workgroup::thread_rank() % warpSize) / numThreads) *
numThreads);
}
#endif