SWDEV-530921 - Remove the usage of __AMDGCN_WAVEFRONT_SIZE as compile time constant (#330)
* SWDEV-530921 - Remove the usage of __AMDGCN_WAVEFRONT_SIZE as compile time constant
* wavefrontsize builtin to be used only in device compilation
* SWDEV-530921 - Remove the usage of __AMDGCN_WAVEFRONT_SIZE as compile time constant
[ROCm/clr commit: 54503f0d67]
Tá an tiomantas seo le fáil i:
tiomanta ag
GitHub
tuismitheoir
306a7bb5a1
tiomantas
dd3eaa86ff
@@ -375,7 +375,9 @@ class coalesced_group : public thread_group {
|
||||
if (coalesced_info.tiled_info.is_tiled) {
|
||||
unsigned int base_offset = (thread_rank() & (~(tile_size - 1)));
|
||||
unsigned int masklength = min(static_cast<unsigned int>(size()) - base_offset, tile_size);
|
||||
lane_mask member_mask = static_cast<lane_mask>(-1) >> (warpSize - masklength);
|
||||
lane_mask full_mask = (warpSize == 32) ? static_cast<lane_mask>((1u << 32) - 1)
|
||||
: static_cast<lane_mask>(-1ull);
|
||||
lane_mask member_mask = full_mask >> (warpSize - masklength);
|
||||
|
||||
member_mask <<= (__lane_id() & ~(tile_size - 1));
|
||||
coalesced_group coalesced_tile = coalesced_group(member_mask);
|
||||
|
||||
@@ -83,18 +83,13 @@ __device__ static inline int __hip_move_dpp_N(int src) {
|
||||
bound_ctrl);
|
||||
}
|
||||
|
||||
#if defined(__SPIRV__)
|
||||
inline __device__ const struct final {
|
||||
__device__
|
||||
__attribute__((always_inline, const))
|
||||
operator int() const noexcept {
|
||||
return __builtin_amdgcn_wavefrontsize();
|
||||
}
|
||||
} warpSize{};
|
||||
#else
|
||||
__device__
|
||||
static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
|
||||
#endif
|
||||
inline __device__ const struct final {
|
||||
__device__
|
||||
__attribute__((always_inline, const))
|
||||
operator int() const noexcept {
|
||||
return __builtin_amdgcn_wavefrontsize();
|
||||
}
|
||||
} warpSize{};
|
||||
|
||||
// warp vote function __all __any __ballot
|
||||
__device__
|
||||
|
||||
+5
-12
@@ -52,16 +52,7 @@ THE SOFTWARE.
|
||||
#define _CG_STATIC_CONST_DECL_ static constexpr
|
||||
#endif
|
||||
|
||||
#if defined(__SPIRV__) && !defined(__AMDGCN_WAVEFRONT_SIZE)
|
||||
#error "TEMPORARY LIMITATION: when targeting AMDGCN SPIR-V"
|
||||
"__AMDGCN_WAVEFRONT_SIZE is not defined, and must be defined by the user"
|
||||
#endif
|
||||
#if __AMDGCN_WAVEFRONT_SIZE == 32
|
||||
using lane_mask = unsigned int;
|
||||
#else
|
||||
using lane_mask = unsigned long long int;
|
||||
#endif
|
||||
|
||||
namespace cooperative_groups {
|
||||
|
||||
/* Global scope */
|
||||
@@ -250,10 +241,12 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "
|
||||
__CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) {
|
||||
unsigned int counter=0;
|
||||
if (warpSize == 32) {
|
||||
counter = __builtin_amdgcn_mbcnt_lo(x, add);
|
||||
counter = __builtin_amdgcn_mbcnt_lo(static_cast<unsigned int>(x), add);
|
||||
} else {
|
||||
counter = __builtin_amdgcn_mbcnt_lo(static_cast<lane_mask>(x), add);
|
||||
counter = __builtin_amdgcn_mbcnt_hi(static_cast<lane_mask>(x >> 32), counter);
|
||||
unsigned int lo = static_cast<unsigned int>(x & 0xFFFFFFFF);
|
||||
unsigned int hi = static_cast<unsigned int>((x >> 32) & 0xFFFFFFFF);
|
||||
counter = __builtin_amdgcn_mbcnt_lo(lo, add);
|
||||
counter = __builtin_amdgcn_mbcnt_hi(hi, counter);
|
||||
}
|
||||
|
||||
return counter;
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir