SWDEV-295298: hide warp sync builtins with a macro in ROCm 6.1
Change-Id: Ie2efd233c0bcf8ad0e06223ee854fe4bd1060443
[ROCm/clr commit: d80168eb87]
Bu işleme şunda yer alıyor:
işlemeyi yapan:
Maneesh Gupta
ebeveyn
bd399106ae
işleme
ebcecf1f4f
@@ -103,14 +103,16 @@ unsigned long long int __ballot64(int predicate) {
|
||||
return __builtin_amdgcn_uicmp(predicate, 0, ICMP_NE);
|
||||
}
|
||||
|
||||
// See amd_warp_sync_functions.h for an explanation of this preprocessor flag.
|
||||
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
|
||||
// Since threads in a wave do not make independent progress, __activemask()
|
||||
// always returns the exact active mask, i.e, all active threads in the wave.
|
||||
|
||||
__device__
|
||||
inline
|
||||
unsigned long long __activemask() {
|
||||
return __ballot(true);
|
||||
}
|
||||
#endif // HIP_ENABLE_WARP_SYNC_BUILTINS
|
||||
|
||||
__device__ static inline unsigned int __lane_id() {
|
||||
return __builtin_amdgcn_mbcnt_hi(
|
||||
|
||||
@@ -22,6 +22,15 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
// Warp sync builtins (with explicit mask argument) introduced in ROCm 6.1 as a
|
||||
// preview to allow end-users to adapt to the new interface involving 64-bit
|
||||
// masks. These are disabled by default, and can be enabled by setting the macro
|
||||
// below. The builtins will be enabled unconditionally in ROCm 6.2.
|
||||
//
|
||||
// This arrangement also applies to the __activemask() builtin defined in
|
||||
// amd_warp_functions.h.
|
||||
#ifdef HIP_ENABLE_WARP_SYNC_BUILTINS
|
||||
|
||||
#if !defined(__HIPCC_RTC__)
|
||||
#include "amd_warp_functions.h"
|
||||
#include "hip_assert.h"
|
||||
@@ -259,3 +268,5 @@ T __shfl_xor_sync(MaskT mask, T var, int laneMask,
|
||||
|
||||
#undef __hip_do_sync
|
||||
#undef __hip_check_mask
|
||||
|
||||
#endif // HIP_ENABLE_WARP_SYNC_BUILTINS
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle