gfx12 initial enablement (#1219)
Tento commit je obsažen v:
@@ -48,7 +48,9 @@ set(DEFAULT_GPUS
|
||||
gfx1030
|
||||
gfx1100
|
||||
gfx1101
|
||||
gfx1102)
|
||||
gfx1102
|
||||
gfx1200
|
||||
gfx1201)
|
||||
|
||||
# Load CMake modules
|
||||
#==================================================================================================
|
||||
|
||||
@@ -25,8 +25,13 @@
|
||||
|
||||
#define __syncwarp()
|
||||
|
||||
#ifdef __GFX12__
|
||||
#define __synclds() \
|
||||
asm volatile("s_waitcnt lgkmcnt(0) \n s_barrier_signal -1 \n s_barrier_wait -1");
|
||||
#else
|
||||
#define __synclds() \
|
||||
asm volatile("s_waitcnt lgkmcnt(0) \n s_barrier");
|
||||
#endif
|
||||
|
||||
#ifdef __GFX9__
|
||||
#define STORE(DST, SRC) \
|
||||
@@ -36,7 +41,7 @@
|
||||
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
|
||||
#endif
|
||||
|
||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
|
||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
|
||||
#define __trace_hwreg()
|
||||
#else
|
||||
#define __trace_hwreg() \
|
||||
|
||||
@@ -50,7 +50,11 @@ extern __shared__ struct mscclShmemData mscclShmem;
|
||||
inline __device__ static void barrier(int nthreads) {
|
||||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HCC__) || defined(__HIPCC__)
|
||||
assert(nthreads == NCCL_MAX_NTHREADS);
|
||||
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)\ns_barrier");
|
||||
#ifdef __GFX12__
|
||||
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)\ns_barrier_signal -1\ns_barrier_wait -1");
|
||||
#else
|
||||
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)\ns_barrier");
|
||||
#endif
|
||||
#else
|
||||
asm volatile ("bar.sync %1, %0;" :: "r"(nthreads), "r"(15));
|
||||
#endif
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele