SWDEV-536048 - Add explicit cast to int when warpSize is used with == operator (#473)

[ROCm/clr commit: a01134a1ab]
2025-06-06 07:52:31 +01:00
Commit af6016fe89
@@ -375,7 +375,7 @@ class coalesced_group : public thread_group {
    if (coalesced_info.tiled_info.is_tiled) {
      unsigned int base_offset = (thread_rank() & (~(tile_size - 1)));
      unsigned int masklength = min(static_cast<unsigned int>(size()) - base_offset, tile_size);
-      lane_mask full_mask = (warpSize == 32) ? static_cast<lane_mask>((1u << 32) - 1)
+      lane_mask full_mask = (static_cast<int>(warpSize) == 32) ? static_cast<lane_mask>((1u << 32) - 1)
                                             : static_cast<lane_mask>(-1ull);
      lane_mask member_mask = full_mask >> (warpSize - masklength);

@@ -469,7 +469,7 @@ class coalesced_group : public thread_group {
    srcRank = srcRank % static_cast<int>(size());

    int lane = (size() == warpSize) ? srcRank
-             : (warpSize == 64)     ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
+             : (static_cast<int>(warpSize) == 64)     ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
                                    : __fns32(coalesced_info.member_mask, 0, (srcRank + 1));

    return __shfl(var, lane, warpSize);
@@ -501,7 +501,7 @@ class coalesced_group : public thread_group {
    }

    int lane;
-    if (warpSize == 64) {
+    if (static_cast<int>(warpSize) == 64) {
      lane = __fns64(coalesced_info.member_mask, __lane_id(), lane_delta + 1);
    }
    else {
@@ -541,10 +541,10 @@ class coalesced_group : public thread_group {
    }

    int lane;
-    if (warpSize == 64) {
+    if (static_cast<int>(warpSize) == 64) {
      lane = __fns64(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
    }
-    else if (warpSize == 32) {
+    else if (static_cast<int>(warpSize) == 32) {
      lane = __fns32(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
    }

@@ -128,7 +128,7 @@ unsigned long long __activemask() {
 #endif // HIP_DISABLE_WARP_SYNC_BUILTINS

 __device__ static inline unsigned int __lane_id() {
-    if (warpSize == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0);
+    if (static_cast<int>(warpSize) == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0);
    return  __builtin_amdgcn_mbcnt_hi(
        -1, __builtin_amdgcn_mbcnt_lo(-1, 0));
 }
@@ -101,7 +101,7 @@ T __hip_readfirstlane(T val) {
 // When compiling for wave32 mode, ignore the upper half of the 64-bit mask.
 #define __hip_adjust_mask_for_wave32(MASK)            \
  do {                                          \
-    if (warpSize == 32) MASK &= 0xFFFFFFFF;     \
+    if (static_cast<int>(warpSize) == 32) MASK &= 0xFFFFFFFF;     \
  } while (0)

 // We use a macro to expand each builtin into a waterfall that implements the
@@ -240,7 +240,7 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "
 // have i-th bit of x set and come before the current thread.
 __CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) {
  unsigned int counter=0;
-  if (warpSize == 32) {
+  if (static_cast<int>(warpSize) == 32) {
    counter = __builtin_amdgcn_mbcnt_lo(static_cast<unsigned int>(x), add);
  } else {
    unsigned int lo = static_cast<unsigned int>(x & 0xFFFFFFFF);