Rework threadfence and flag setting (#627)

Tento commit je obsažen v:
Wenkai Du
2022-09-22 08:35:42 -07:00
odevzdal GitHub
rodič d9216af48b
revize 49c811ecf9
+10 -6
Zobrazit soubor
@@ -153,7 +153,11 @@ private:
if (flags & (Recv*RolePostRecv | Send*RolePostSend)) {
step += StepPerSlice;
#if defined(__gfx90a__)
atomicExch_system((unsigned long long *)connStepPtr, step);
#else
__atomic_store_n(connStepPtr, step, __ATOMIC_SEQ_CST);
#endif
}
}
@@ -319,10 +323,10 @@ private:
}
barrier(); // This barrier has a counterpart in following loop
#if defined(__gfx1030__)
if (Send && (flags & RolePostSend) && index == 0) __threadfence_system();
#if defined(__gfx90a__)
if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && index == 0) __threadfence_system();
#else
if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && index == 0) __threadfence_system();
if (Send && (flags & RolePostSend) && index == 0) __threadfence_system();
#endif
__syncwarp();
postPeer<Recv, Send>();
@@ -343,10 +347,10 @@ private:
waitPeer<DirectRecv, DirectSend, Recv, Send, Src, Dst>(0, 0, 0, 0);
}
barrier(); // Has couterpart in preceding worker-only loop.
#if defined(__gfx1030__)
if (Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system();
#else
#if defined(__gfx90a__)
if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system();
#else
if (Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system();
#endif
__syncwarp();
postPeer<Recv, Send>();