From 49c811ecf9e1e71877fb1ae3fed73e54efd6d998 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Thu, 22 Sep 2022 08:35:42 -0700 Subject: [PATCH] Rework threadfence and flag setting (#627) --- src/collectives/device/prims_simple.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/collectives/device/prims_simple.h b/src/collectives/device/prims_simple.h index 6ea594ce02..aafb0ec9b5 100644 --- a/src/collectives/device/prims_simple.h +++ b/src/collectives/device/prims_simple.h @@ -153,7 +153,11 @@ private: if (flags & (Recv*RolePostRecv | Send*RolePostSend)) { step += StepPerSlice; +#if defined(__gfx90a__) atomicExch_system((unsigned long long *)connStepPtr, step); +#else + __atomic_store_n(connStepPtr, step, __ATOMIC_SEQ_CST); +#endif } } @@ -319,10 +323,10 @@ private: } barrier(); // This barrier has a counterpart in following loop -#if defined(__gfx1030__) - if (Send && (flags & RolePostSend) && index == 0) __threadfence_system(); +#if defined(__gfx90a__) + if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && index == 0) __threadfence_system(); #else - if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && index == 0) __threadfence_system(); + if (Send && (flags & RolePostSend) && index == 0) __threadfence_system(); #endif __syncwarp(); postPeer(); @@ -343,10 +347,10 @@ private: waitPeer(0, 0, 0, 0); } barrier(); // Has couterpart in preceding worker-only loop. -#if defined(__gfx1030__) - if (Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system(); -#else +#if defined(__gfx90a__) if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system(); +#else + if (Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system(); #endif __syncwarp(); postPeer();