SWDEV-573004 - fix shfl_sync for compiler init value (#2533)

- add attribute for maybe undef

Signed-off-by: sdashmiz <shadi.dashmiz@amd.com>
This commit is contained in:
Shadi Dashmiz
2026-01-30 15:39:42 -05:00
committato da GitHub
parent 2ff4a999c3
commit e1844f6a59
@@ -37,6 +37,13 @@ THE SOFTWARE.
#include <algorithm>
#endif
#pragma push_macro("MAYBE_UNDEF")
#if defined(__has_attribute) && __has_attribute(maybe_undef)
#define MAYBE_UNDEF __attribute__((maybe_undef))
#else
#define MAYBE_UNDEF
#endif
extern "C" __device__ __attribute__((const)) int __ockl_wfred_add_i32(int);
extern "C" __device__ __attribute__((const)) unsigned int __ockl_wfred_add_u32(unsigned int);
extern "C" __device__ __attribute__((const)) int __ockl_wfred_min_i32(int);
@@ -273,7 +280,7 @@ __device__ inline unsigned long long __match_all_sync(MaskT mask, T value, int*
// various variants of shfl
template <typename MaskT, typename T>
__device__ inline T __shfl_sync(MaskT mask, T var, int srcLane, int width = warpSize) {
__device__ inline T __shfl_sync(MaskT mask, MAYBE_UNDEF T var, int srcLane, int width = warpSize) {
static_assert(__hip_internal::is_integral<MaskT>::value && sizeof(MaskT) == 8,
"The mask must be a 64-bit integer. "
"Implicitly promoting a smaller integer is almost always an error.");
@@ -283,7 +290,7 @@ __device__ inline T __shfl_sync(MaskT mask, T var, int srcLane, int width = warp
}
template <typename MaskT, typename T>
__device__ inline T __shfl_up_sync(MaskT mask, T var, unsigned int delta, int width = warpSize) {
__device__ inline T __shfl_up_sync(MaskT mask, MAYBE_UNDEF T var, unsigned int delta, int width = warpSize) {
static_assert(__hip_internal::is_integral<MaskT>::value && sizeof(MaskT) == 8,
"The mask must be a 64-bit integer. "
"Implicitly promoting a smaller integer is almost always an error.");
@@ -293,7 +300,7 @@ __device__ inline T __shfl_up_sync(MaskT mask, T var, unsigned int delta, int wi
}
template <typename MaskT, typename T>
__device__ inline T __shfl_down_sync(MaskT mask, T var, unsigned int delta, int width = warpSize) {
__device__ inline T __shfl_down_sync(MaskT mask, MAYBE_UNDEF T var, unsigned int delta, int width = warpSize) {
static_assert(__hip_internal::is_integral<MaskT>::value && sizeof(MaskT) == 8,
"The mask must be a 64-bit integer. "
"Implicitly promoting a smaller integer is almost always an error.");
@@ -303,7 +310,7 @@ __device__ inline T __shfl_down_sync(MaskT mask, T var, unsigned int delta, int
}
template <typename MaskT, typename T>
__device__ inline T __shfl_xor_sync(MaskT mask, T var, int laneMask, int width = warpSize) {
__device__ inline T __shfl_xor_sync(MaskT mask, MAYBE_UNDEF T var, int laneMask, int width = warpSize) {
static_assert(__hip_internal::is_integral<MaskT>::value && sizeof(MaskT) == 8,
"The mask must be a 64-bit integer. "
"Implicitly promoting a smaller integer is almost always an error.");
@@ -679,4 +686,7 @@ __device__ inline unsigned long long __reduce_xor_sync(MaskT mask, unsigned long
#undef __hip_adjust_mask_for_wave32
#endif // HIP_ENABLE_EXTRA_WARP_SYNC_TYPES
#pragma pop_macro("MAYBE_UNDEF")
#endif // HIP_DISABLE_WARP_SYNC_BUILTINS