diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h index d0badb963c..382c3acd0f 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h @@ -375,7 +375,7 @@ class coalesced_group : public thread_group { if (coalesced_info.tiled_info.is_tiled) { unsigned int base_offset = (thread_rank() & (~(tile_size - 1))); unsigned int masklength = min(static_cast(size()) - base_offset, tile_size); - lane_mask full_mask = (warpSize == 32) ? static_cast((1u << 32) - 1) + lane_mask full_mask = (static_cast(warpSize) == 32) ? static_cast((1u << 32) - 1) : static_cast(-1ull); lane_mask member_mask = full_mask >> (warpSize - masklength); @@ -469,7 +469,7 @@ class coalesced_group : public thread_group { srcRank = srcRank % static_cast(size()); int lane = (size() == warpSize) ? srcRank - : (warpSize == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1)) + : (static_cast(warpSize) == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1)) : __fns32(coalesced_info.member_mask, 0, (srcRank + 1)); return __shfl(var, lane, warpSize); @@ -501,7 +501,7 @@ class coalesced_group : public thread_group { } int lane; - if (warpSize == 64) { + if (static_cast(warpSize) == 64) { lane = __fns64(coalesced_info.member_mask, __lane_id(), lane_delta + 1); } else { @@ -541,10 +541,10 @@ class coalesced_group : public thread_group { } int lane; - if (warpSize == 64) { + if (static_cast(warpSize) == 64) { lane = __fns64(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1)); } - else if (warpSize == 32) { + else if (static_cast(warpSize) == 32) { lane = __fns32(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1)); } diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h index ff87828ad4..ba3949b874 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -128,7 +128,7 @@ unsigned long long __activemask() { #endif // HIP_DISABLE_WARP_SYNC_BUILTINS __device__ static inline unsigned int __lane_id() { - if (warpSize == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0); + if (static_cast(warpSize) == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0); return __builtin_amdgcn_mbcnt_hi( -1, __builtin_amdgcn_mbcnt_lo(-1, 0)); } diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h index 9792e9feac..a524e2f6c5 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_sync_functions.h @@ -101,7 +101,7 @@ T __hip_readfirstlane(T val) { // When compiling for wave32 mode, ignore the upper half of the 64-bit mask. #define __hip_adjust_mask_for_wave32(MASK) \ do { \ - if (warpSize == 32) MASK &= 0xFFFFFFFF; \ + if (static_cast(warpSize) == 32) MASK &= 0xFFFFFFFF; \ } while (0) // We use a macro to expand each builtin into a waterfall that implements the diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h b/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h index 61a60ce66a..27cdc84bdb 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h @@ -240,7 +240,7 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, " // have i-th bit of x set and come before the current thread. __CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) { unsigned int counter=0; - if (warpSize == 32) { + if (static_cast(warpSize) == 32) { counter = __builtin_amdgcn_mbcnt_lo(static_cast(x), add); } else { unsigned int lo = static_cast(x & 0xFFFFFFFF);