Switching to using atomicAdd_system within kernel for collective trace (#780)
Este commit está contenido en:
@@ -281,7 +281,7 @@ class ncclFunction {
|
||||
#endif
|
||||
#ifdef ENABLE_COLLTRACE
|
||||
#define traceColl(launch_type) { \
|
||||
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
|
||||
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
|
||||
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
|
||||
collTrace->timeStamp = wall_clock64(); \
|
||||
collTrace->bid = blockIdx.x; \
|
||||
@@ -316,21 +316,21 @@ class ncclFunction {
|
||||
traceColl(firstLaunch?ncclCollTraceKernelLaunchType:ncclCollTraceCollLaunchType); \
|
||||
}
|
||||
#define traceKernelEnd() { \
|
||||
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
|
||||
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
|
||||
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
|
||||
collTrace->timeStamp = wall_clock64(); \
|
||||
collTrace->bid = blockIdx.x; \
|
||||
collTrace->type = ncclCollTraceKernelEndType; \
|
||||
}
|
||||
#define traceAbort() { \
|
||||
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
|
||||
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
|
||||
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
|
||||
collTrace->timeStamp = wall_clock64(); \
|
||||
collTrace->bid = blockIdx.x; \
|
||||
collTrace->type = ncclCollTraceAbortType; \
|
||||
}
|
||||
#define traceData(data2, data4, data8_0, data8_1) { \
|
||||
uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \
|
||||
uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \
|
||||
struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \
|
||||
collTrace->bid = blockIdx.x; \
|
||||
collTrace->timeStamp = wall_clock64(); \
|
||||
|
||||
Referencia en una nueva incidencia
Block a user