SWDEV-295298: hide warp sync builtins with a macro in ROCm 6.1

Change-Id: Ie2efd233c0bcf8ad0e06223ee854fe4bd1060443


[ROCm/clr commit: d80168eb87]
Bu işleme şunda yer alıyor:
Sameer Sahasrabuddhe
2024-01-24 13:16:50 +05:30
işlemeyi yapan: Maneesh Gupta
ebeveyn bd399106ae
işleme ebcecf1f4f
2 değiştirilmiş dosya ile 14 ekleme ve 1 silme
+3 -1
Dosyayı Görüntüle
@@ -103,14 +103,16 @@ unsigned long long int __ballot64(int predicate) {
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
}
// See amd_warp_sync_functions.h for an explanation of this preprocessor flag.
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
// Since threads in a wave do not make independent progress, __activemask()
// always returns the exact active mask, i.e, all active threads in the wave.
__device__
inline
unsigned long long __activemask() {
return __ballot(true);
}
#endif // HIP_ENABLE_WARP_SYNC_BUILTINS
__device__ static inline unsigned int __lane_id() {
return __builtin_amdgcn_mbcnt_hi(
+11
Dosyayı Görüntüle
@@ -22,6 +22,15 @@ THE SOFTWARE.
#pragma once
// Warp sync builtins (with explicit mask argument) introduced in ROCm 6.1 as a
// preview to allow end-users to adapt to the new interface involving 64-bit
// masks. These are disabled by default, and can be enabled by setting the macro
// below. The builtins will be enabled unconditionally in ROCm 6.2.
//
// This arrangement also applies to the __activemask() builtin defined in
// amd_warp_functions.h.
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
#if !defined(__HIPCC_RTC__)
#include "amd_warp_functions.h"
#include "hip_assert.h"
@@ -259,3 +268,5 @@ T __shfl_xor_sync(MaskT mask, T var, int laneMask,
#undef __hip_do_sync
#undef __hip_check_mask
#endif // HIP_ENABLE_WARP_SYNC_BUILTINS