Rework threadfence and flag setting (#627)
Tento commit je obsažen v:
@@ -153,7 +153,11 @@ private:
|
||||
|
||||
if (flags & (Recv*RolePostRecv | Send*RolePostSend)) {
|
||||
step += StepPerSlice;
|
||||
#if defined(__gfx90a__)
|
||||
atomicExch_system((unsigned long long *)connStepPtr, step);
|
||||
#else
|
||||
__atomic_store_n(connStepPtr, step, __ATOMIC_SEQ_CST);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -319,10 +323,10 @@ private:
|
||||
|
||||
}
|
||||
barrier(); // This barrier has a counterpart in following loop
|
||||
#if defined(__gfx1030__)
|
||||
if (Send && (flags & RolePostSend) && index == 0) __threadfence_system();
|
||||
#if defined(__gfx90a__)
|
||||
if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && index == 0) __threadfence_system();
|
||||
#else
|
||||
if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && index == 0) __threadfence_system();
|
||||
if (Send && (flags & RolePostSend) && index == 0) __threadfence_system();
|
||||
#endif
|
||||
__syncwarp();
|
||||
postPeer<Recv, Send>();
|
||||
@@ -343,10 +347,10 @@ private:
|
||||
waitPeer<DirectRecv, DirectSend, Recv, Send, Src, Dst>(0, 0, 0, 0);
|
||||
}
|
||||
barrier(); // Has couterpart in preceding worker-only loop.
|
||||
#if defined(__gfx1030__)
|
||||
if (Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system();
|
||||
#else
|
||||
#if defined(__gfx90a__)
|
||||
if ((MaxSend == 0 || MaxRecv == 0) && Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system();
|
||||
#else
|
||||
if (Send && (flags & RolePostSend) && sliceSize > 0 && index == 0) __threadfence_system();
|
||||
#endif
|
||||
__syncwarp();
|
||||
postPeer<Recv, Send>();
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele