From ebcecf1f4ff2d7e783b7a25c7d9e5dc227913e7a Mon Sep 17 00:00:00 2001 From: Sameer Sahasrabuddhe Date: Wed, 24 Jan 2024 13:16:50 +0530 Subject: [PATCH] SWDEV-295298: hide warp sync builtins with a macro in ROCm 6.1 Change-Id: Ie2efd233c0bcf8ad0e06223ee854fe4bd1060443 [ROCm/clr commit: d80168eb877eba197450526b7894bb9c32c00452] --- .../include/hip/amd_detail/amd_warp_functions.h | 4 +++- .../include/hip/amd_detail/amd_warp_sync_functions.h | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h index 64c2740132..98f8896cd9 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -103,14 +103,16 @@ unsigned long long int __ballot64(int predicate) { return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE); } +// See amd_warp_sync_functions.h for an explanation of this preprocessor flag. +#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS // Since threads in a wave do not make independent progress, __activemask() // always returns the exact active mask, i.e, all active threads in the wave. - __device__ inline unsigned long long __activemask() { return __ballot(true); } +#endif // HIP_ENABLE_WARP_SYNC_BUILTINS __device__ static inline unsigned int __lane_id() { return __builtin_amdgcn_mbcnt_hi( diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h index 5ce2581a8d..b8c67a8972 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h @@ -22,6 +22,15 @@ THE SOFTWARE. #pragma once +// Warp sync builtins (with explicit mask argument) introduced in ROCm 6.1 as a +// preview to allow end-users to adapt to the new interface involving 64-bit +// masks. These are disabled by default, and can be enabled by setting the macro +// below. The builtins will be enabled unconditionally in ROCm 6.2. +// +// This arrangement also applies to the __activemask() builtin defined in +// amd_warp_functions.h. +#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS + #if !defined(__HIPCC_RTC__) #include "amd_warp_functions.h" #include "hip_assert.h" @@ -259,3 +268,5 @@ T __shfl_xor_sync(MaskT mask, T var, int laneMask, #undef __hip_do_sync #undef __hip_check_mask + +#endif // HIP_ENABLE_WARP_SYNC_BUILTINS