SWDEV-530921 - Remove the usage of __AMDGCN_WAVEFRONT_SIZE as compile time constant (#330)

* SWDEV-530921 - Remove the usage of __AMDGCN_WAVEFRONT_SIZE as compile time constant

* wavefrontsize builtin to be used only in device compilation

* SWDEV-530921 - Remove the usage of __AMDGCN_WAVEFRONT_SIZE as compile time constant

[ROCm/clr commit: 54503f0d67]
Tá an tiomantas seo le fáil i:
Dittakavi, Satyanvesh
2025-05-29 12:52:10 +05:30
tiomanta ag GitHub
tuismitheoir 306a7bb5a1
tiomantas dd3eaa86ff
D'athraigh 3 comhad le 15 breiseanna agus 25 scriosta
@@ -375,7 +375,9 @@ class coalesced_group : public thread_group {
if (coalesced_info.tiled_info.is_tiled) {
unsigned int base_offset = (thread_rank() & (~(tile_size - 1)));
unsigned int masklength = min(static_cast<unsigned int>(size()) - base_offset, tile_size);
lane_mask member_mask = static_cast<lane_mask>(-1) >> (warpSize - masklength);
lane_mask full_mask = (warpSize == 32) ? static_cast<lane_mask>((1u << 32) - 1)
: static_cast<lane_mask>(-1ull);
lane_mask member_mask = full_mask >> (warpSize - masklength);
member_mask <<= (__lane_id() & ~(tile_size - 1));
coalesced_group coalesced_tile = coalesced_group(member_mask);
@@ -83,18 +83,13 @@ __device__ static inline int __hip_move_dpp_N(int src) {
bound_ctrl);
}
#if defined(__SPIRV__)
inline __device__ const struct final {
__device__
__attribute__((always_inline, const))
operator int() const noexcept {
return __builtin_amdgcn_wavefrontsize();
}
} warpSize{};
#else
__device__
static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
#endif
inline __device__ const struct final {
__device__
__attribute__((always_inline, const))
operator int() const noexcept {
return __builtin_amdgcn_wavefrontsize();
}
} warpSize{};
// warp vote function __all __any __ballot
__device__
@@ -52,16 +52,7 @@ THE SOFTWARE.
#define _CG_STATIC_CONST_DECL_ static constexpr
#endif
#if defined(__SPIRV__) && !defined(__AMDGCN_WAVEFRONT_SIZE)
#error "TEMPORARY LIMITATION: when targeting AMDGCN SPIR-V"
"__AMDGCN_WAVEFRONT_SIZE is not defined, and must be defined by the user"
#endif
#if __AMDGCN_WAVEFRONT_SIZE == 32
using lane_mask = unsigned int;
#else
using lane_mask = unsigned long long int;
#endif
namespace cooperative_groups {
/* Global scope */
@@ -250,10 +241,12 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "
__CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) {
unsigned int counter=0;
if (warpSize == 32) {
counter = __builtin_amdgcn_mbcnt_lo(x, add);
counter = __builtin_amdgcn_mbcnt_lo(static_cast<unsigned int>(x), add);
} else {
counter = __builtin_amdgcn_mbcnt_lo(static_cast<lane_mask>(x), add);
counter = __builtin_amdgcn_mbcnt_hi(static_cast<lane_mask>(x >> 32), counter);
unsigned int lo = static_cast<unsigned int>(x & 0xFFFFFFFF);
unsigned int hi = static_cast<unsigned int>((x >> 32) & 0xFFFFFFFF);
counter = __builtin_amdgcn_mbcnt_lo(lo, add);
counter = __builtin_amdgcn_mbcnt_hi(hi, counter);
}
return counter;