gfx12 initial enablement (#1219)

Tento commit je obsažen v:
akolliasAMD
2024-07-10 13:32:09 -06:00
odevzdal GitHub
rodič 7e78641dc1
revize 63e4d76e23
3 změnil soubory, kde provedl 14 přidání a 3 odebrání
+3 -1
Zobrazit soubor
@@ -48,7 +48,9 @@ set(DEFAULT_GPUS
gfx1030
gfx1100
gfx1101
gfx1102)
gfx1102
gfx1200
gfx1201)
# Load CMake modules
#==================================================================================================
+6 -1
Zobrazit soubor
@@ -25,8 +25,13 @@
#define __syncwarp()
#ifdef __GFX12__
#define __synclds() \
asm volatile("s_waitcnt lgkmcnt(0) \n s_barrier_signal -1 \n s_barrier_wait -1");
#else
#define __synclds() \
asm volatile("s_waitcnt lgkmcnt(0) \n s_barrier");
#endif
#ifdef __GFX9__
#define STORE(DST, SRC) \
@@ -36,7 +41,7 @@
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
#endif
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
#define __trace_hwreg()
#else
#define __trace_hwreg() \
+5 -1
Zobrazit soubor
@@ -50,7 +50,11 @@ extern __shared__ struct mscclShmemData mscclShmem;
inline __device__ static void barrier(int nthreads) {
#if defined(__HIP_PLATFORM_AMD__) || defined(__HCC__) || defined(__HIPCC__)
assert(nthreads == NCCL_MAX_NTHREADS);
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)\ns_barrier");
#ifdef __GFX12__
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)\ns_barrier_signal -1\ns_barrier_wait -1");
#else
__asm__ __volatile__("s_waitcnt vmcnt(0) lgkmcnt(0)\ns_barrier");
#endif
#else
asm volatile ("bar.sync %1, %0;" :: "r"(nthreads), "r"(15));
#endif