[DEVICE] Use threadfence on gfx950 for LL protocol (#1686)

Signed-off-by: nileshnegi <Nilesh.Negi@amd.com>

[ROCm/rccl commit: b926203c05]
Bu işleme şunda yer alıyor:
Nilesh M Negi
2025-06-09 01:26:07 -05:00
işlemeyi yapan: GitHub
ebeveyn 7abc3160e7
işleme b797b62f6b
4 değiştirilmiş dosya ile 19 ekleme ve 10 silme
+4 -7
Dosyayı Görüntüle
@@ -15,13 +15,7 @@
#define NCCL_SPINS_BEFORE_CHECK_ABORT 1000000
#if defined(__gfx942__) || defined(__gfx950__)
#define __THREAD_FENCE __threadfence_block()
#else
#define __THREAD_FENCE __threadfence()
#endif
#define barrier_by_group() do { \
#define barrier_by_group_common(__THREAD_FENCE) do { \
if (nthreads == NCCL_MAX_NTHREADS) { \
__THREAD_FENCE; __builtin_amdgcn_s_barrier(); \
} else { \
@@ -53,6 +47,9 @@
} \
} while (0)
#define barrier_by_group() barrier_by_group_common(__threadfence())
#define barrier_by_group_block() barrier_by_group_common(__threadfence_block())
/* Protocol classes: ProtoSimple, ProtoLL, ProtoLL128
* We use these as template args to the Primtiives class instead of integral
* enums (e.g. NCCL_PROTO_LL) because for SIMPLE we need to carry a few extra
+5 -1
Dosyayı Görüntüle
@@ -71,7 +71,11 @@ private:
inline __device__ void barrier() {
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
if (nthreads != WARP_SIZE)
barrier_by_group();
#if defined(__gfx942__)
barrier_by_group_block();
#else
barrier_by_group();
#endif
#else
if (nthreads == WARP_SIZE) {
__syncwarp();
+5 -1
Dosyayı Görüntüle
@@ -76,7 +76,11 @@ private:
inline __device__ void barrier() {
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
if (nthreads != WARP_SIZE)
barrier_by_group();
#if defined(__gfx942__) || defined(__gfx950__)
barrier_by_group_block();
#else
barrier_by_group();
#endif
#else
barrier_sync(15-group, nthreads);
#endif
+5 -1
Dosyayı Görüntüle
@@ -79,7 +79,11 @@ private:
if (nthreads == WARP_SIZE)
__syncwarp();
else
barrier_by_group();
#if defined(__gfx942__) || defined(__gfx950__)
barrier_by_group_block();
#else
barrier_by_group();
#endif
}
inline __device__ void subBarrier() {
barrier();