Switching to using atomicAdd_system within kernel for collective trace (#780)

Este commit está contenido en:
gilbertlee-amd
2023-06-20 17:49:52 -06:00
cometido por GitHub
padre 8802de7761
commit 52a28ff2fc
+4 -4
Ver fichero
@@ -281,7 +281,7 @@ class ncclFunction {
#endif
#ifdef ENABLE_COLLTRACE
#define traceColl(launch_type) { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x; \
@@ -316,21 +316,21 @@ class ncclFunction {
traceColl(firstLaunch?ncclCollTraceKernelLaunchType:ncclCollTraceCollLaunchType); \
}
#define traceKernelEnd() { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x; \
collTrace->type = ncclCollTraceKernelEndType; \
}
#define traceAbort() { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x; \
collTrace->type = ncclCollTraceAbortType; \
}
#define traceData(data2, data4, data8_0, data8_1) { \
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
collTrace->bid = blockIdx.x; \
collTrace->timeStamp = wall_clock64(); \