SWDEV-536048 - Add explicit cast to int when warpSize is used with == operator (#473)

[ROCm/clr commit: a01134a1ab]
Dieser Commit ist enthalten in:
Assiouras, Ioannis
2025-06-06 07:52:31 +01:00
committet von GitHub
Ursprung 4d2e05e142
Commit af6016fe89
4 geänderte Dateien mit 8 neuen und 8 gelöschten Zeilen
@@ -375,7 +375,7 @@ class coalesced_group : public thread_group {
if (coalesced_info.tiled_info.is_tiled) {
unsigned int base_offset = (thread_rank() & (~(tile_size - 1)));
unsigned int masklength = min(static_cast<unsigned int>(size()) - base_offset, tile_size);
lane_mask full_mask = (warpSize == 32) ? static_cast<lane_mask>((1u << 32) - 1)
lane_mask full_mask = (static_cast<int>(warpSize) == 32) ? static_cast<lane_mask>((1u << 32) - 1)
: static_cast<lane_mask>(-1ull);
lane_mask member_mask = full_mask >> (warpSize - masklength);
@@ -469,7 +469,7 @@ class coalesced_group : public thread_group {
srcRank = srcRank % static_cast<int>(size());
int lane = (size() == warpSize) ? srcRank
: (warpSize == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
: (static_cast<int>(warpSize) == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
: __fns32(coalesced_info.member_mask, 0, (srcRank + 1));
return __shfl(var, lane, warpSize);
@@ -501,7 +501,7 @@ class coalesced_group : public thread_group {
}
int lane;
if (warpSize == 64) {
if (static_cast<int>(warpSize) == 64) {
lane = __fns64(coalesced_info.member_mask, __lane_id(), lane_delta + 1);
}
else {
@@ -541,10 +541,10 @@ class coalesced_group : public thread_group {
}
int lane;
if (warpSize == 64) {
if (static_cast<int>(warpSize) == 64) {
lane = __fns64(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
}
else if (warpSize == 32) {
else if (static_cast<int>(warpSize) == 32) {
lane = __fns32(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
}
@@ -128,7 +128,7 @@ unsigned long long __activemask() {
#endif // HIP_DISABLE_WARP_SYNC_BUILTINS
__device__ static inline unsigned int __lane_id() {
if (warpSize == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0);
if (static_cast<int>(warpSize) == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0);
return __builtin_amdgcn_mbcnt_hi(
-1, __builtin_amdgcn_mbcnt_lo(-1, 0));
}
@@ -101,7 +101,7 @@ T __hip_readfirstlane(T val) {
// When compiling for wave32 mode, ignore the upper half of the 64-bit mask.
#define __hip_adjust_mask_for_wave32(MASK) \
do { \
if (warpSize == 32) MASK &= 0xFFFFFFFF; \
if (static_cast<int>(warpSize) == 32) MASK &= 0xFFFFFFFF; \
} while (0)
// We use a macro to expand each builtin into a waterfall that implements the
@@ -240,7 +240,7 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "
// have i-th bit of x set and come before the current thread.
__CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) {
unsigned int counter=0;
if (warpSize == 32) {
if (static_cast<int>(warpSize) == 32) {
counter = __builtin_amdgcn_mbcnt_lo(static_cast<unsigned int>(x), add);
} else {
unsigned int lo = static_cast<unsigned int>(x & 0xFFFFFFFF);