diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h index 4a154f62e1..d0badb963c 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_hip_cooperative_groups.h @@ -375,7 +375,9 @@ class coalesced_group : public thread_group { if (coalesced_info.tiled_info.is_tiled) { unsigned int base_offset = (thread_rank() & (~(tile_size - 1))); unsigned int masklength = min(static_cast(size()) - base_offset, tile_size); - lane_mask member_mask = static_cast(-1) >> (warpSize - masklength); + lane_mask full_mask = (warpSize == 32) ? static_cast((1u << 32) - 1) + : static_cast(-1ull); + lane_mask member_mask = full_mask >> (warpSize - masklength); member_mask <<= (__lane_id() & ~(tile_size - 1)); coalesced_group coalesced_tile = coalesced_group(member_mask); diff --git a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h index 0de0939eb5..ff87828ad4 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h +++ b/projects/clr/hipamd/include/hip/amd_detail/amd_warp_functions.h @@ -83,18 +83,13 @@ __device__ static inline int __hip_move_dpp_N(int src) { bound_ctrl); } -#if defined(__SPIRV__) - inline __device__ const struct final { - __device__ - __attribute__((always_inline, const)) - operator int() const noexcept { - return __builtin_amdgcn_wavefrontsize(); - } - } warpSize{}; -#else - __device__ - static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE; -#endif +inline __device__ const struct final { + __device__ + __attribute__((always_inline, const)) + operator int() const noexcept { + return __builtin_amdgcn_wavefrontsize(); + } +} warpSize{}; // warp vote function __all __any __ballot __device__ diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h b/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h index f5d22b782d..61a60ce66a 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_cooperative_groups_helper.h @@ -52,16 +52,7 @@ THE SOFTWARE. #define _CG_STATIC_CONST_DECL_ static constexpr #endif -#if defined(__SPIRV__) && !defined(__AMDGCN_WAVEFRONT_SIZE) -#error "TEMPORARY LIMITATION: when targeting AMDGCN SPIR-V" - "__AMDGCN_WAVEFRONT_SIZE is not defined, and must be defined by the user" -#endif -#if __AMDGCN_WAVEFRONT_SIZE == 32 -using lane_mask = unsigned int; -#else using lane_mask = unsigned long long int; -#endif - namespace cooperative_groups { /* Global scope */ @@ -250,10 +241,12 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, " __CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) { unsigned int counter=0; if (warpSize == 32) { - counter = __builtin_amdgcn_mbcnt_lo(x, add); + counter = __builtin_amdgcn_mbcnt_lo(static_cast(x), add); } else { - counter = __builtin_amdgcn_mbcnt_lo(static_cast(x), add); - counter = __builtin_amdgcn_mbcnt_hi(static_cast(x >> 32), counter); + unsigned int lo = static_cast(x & 0xFFFFFFFF); + unsigned int hi = static_cast((x >> 32) & 0xFFFFFFFF); + counter = __builtin_amdgcn_mbcnt_lo(lo, add); + counter = __builtin_amdgcn_mbcnt_hi(hi, counter); } return counter;