SWDEV-536048 - Add explicit cast to int when warpSize is used with == operator (#473)
[ROCm/clr commit: a01134a1ab]
Dieser Commit ist enthalten in:
committet von
GitHub
Ursprung
4d2e05e142
Commit
af6016fe89
@@ -375,7 +375,7 @@ class coalesced_group : public thread_group {
|
||||
if (coalesced_info.tiled_info.is_tiled) {
|
||||
unsigned int base_offset = (thread_rank() & (~(tile_size - 1)));
|
||||
unsigned int masklength = min(static_cast<unsigned int>(size()) - base_offset, tile_size);
|
||||
lane_mask full_mask = (warpSize == 32) ? static_cast<lane_mask>((1u << 32) - 1)
|
||||
lane_mask full_mask = (static_cast<int>(warpSize) == 32) ? static_cast<lane_mask>((1u << 32) - 1)
|
||||
: static_cast<lane_mask>(-1ull);
|
||||
lane_mask member_mask = full_mask >> (warpSize - masklength);
|
||||
|
||||
@@ -469,7 +469,7 @@ class coalesced_group : public thread_group {
|
||||
srcRank = srcRank % static_cast<int>(size());
|
||||
|
||||
int lane = (size() == warpSize) ? srcRank
|
||||
: (warpSize == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
|
||||
: (static_cast<int>(warpSize) == 64) ? __fns64(coalesced_info.member_mask, 0, (srcRank + 1))
|
||||
: __fns32(coalesced_info.member_mask, 0, (srcRank + 1));
|
||||
|
||||
return __shfl(var, lane, warpSize);
|
||||
@@ -501,7 +501,7 @@ class coalesced_group : public thread_group {
|
||||
}
|
||||
|
||||
int lane;
|
||||
if (warpSize == 64) {
|
||||
if (static_cast<int>(warpSize) == 64) {
|
||||
lane = __fns64(coalesced_info.member_mask, __lane_id(), lane_delta + 1);
|
||||
}
|
||||
else {
|
||||
@@ -541,10 +541,10 @@ class coalesced_group : public thread_group {
|
||||
}
|
||||
|
||||
int lane;
|
||||
if (warpSize == 64) {
|
||||
if (static_cast<int>(warpSize) == 64) {
|
||||
lane = __fns64(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
|
||||
}
|
||||
else if (warpSize == 32) {
|
||||
else if (static_cast<int>(warpSize) == 32) {
|
||||
lane = __fns32(coalesced_info.member_mask, __lane_id(), -(lane_delta + 1));
|
||||
}
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ unsigned long long __activemask() {
|
||||
#endif // HIP_DISABLE_WARP_SYNC_BUILTINS
|
||||
|
||||
__device__ static inline unsigned int __lane_id() {
|
||||
if (warpSize == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0);
|
||||
if (static_cast<int>(warpSize) == 32) return __builtin_amdgcn_mbcnt_lo(-1, 0);
|
||||
return __builtin_amdgcn_mbcnt_hi(
|
||||
-1, __builtin_amdgcn_mbcnt_lo(-1, 0));
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ T __hip_readfirstlane(T val) {
|
||||
// When compiling for wave32 mode, ignore the upper half of the 64-bit mask.
|
||||
#define __hip_adjust_mask_for_wave32(MASK) \
|
||||
do { \
|
||||
if (warpSize == 32) MASK &= 0xFFFFFFFF; \
|
||||
if (static_cast<int>(warpSize) == 32) MASK &= 0xFFFFFFFF; \
|
||||
} while (0)
|
||||
|
||||
// We use a macro to expand each builtin into a waterfall that implements the
|
||||
|
||||
@@ -240,7 +240,7 @@ __CG_STATIC_QUALIFIER__ void sync() { __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "
|
||||
// have i-th bit of x set and come before the current thread.
|
||||
__CG_STATIC_QUALIFIER__ unsigned int masked_bit_count(lane_mask x, unsigned int add = 0) {
|
||||
unsigned int counter=0;
|
||||
if (warpSize == 32) {
|
||||
if (static_cast<int>(warpSize) == 32) {
|
||||
counter = __builtin_amdgcn_mbcnt_lo(static_cast<unsigned int>(x), add);
|
||||
} else {
|
||||
unsigned int lo = static_cast<unsigned int>(x & 0xFFFFFFFF);
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren