diff --git a/src/collectives/device/common.h b/src/collectives/device/common.h index 6e563b811c..aadffbf273 100644 --- a/src/collectives/device/common.h +++ b/src/collectives/device/common.h @@ -281,7 +281,7 @@ class ncclFunction { #endif #ifdef ENABLE_COLLTRACE #define traceColl(launch_type) { \ - uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \ + uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \ struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \ collTrace->timeStamp = wall_clock64(); \ collTrace->bid = blockIdx.x; \ @@ -316,21 +316,21 @@ class ncclFunction { traceColl(firstLaunch?ncclCollTraceKernelLaunchType:ncclCollTraceCollLaunchType); \ } #define traceKernelEnd() { \ - uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \ + uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \ struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \ collTrace->timeStamp = wall_clock64(); \ collTrace->bid = blockIdx.x; \ collTrace->type = ncclCollTraceKernelEndType; \ } #define traceAbort() { \ - uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \ + uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \ struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \ collTrace->timeStamp = wall_clock64(); \ collTrace->bid = blockIdx.x; \ collTrace->type = ncclCollTraceAbortType; \ } #define traceData(data2, data4, data8_0, data8_1) { \ - uint32_t pos = __atomic_fetch_add(ncclShmem.comm.collTraceTail, 1, __ATOMIC_SEQ_CST)%COLLTRACE_NUM_ITEMS; \ + uint32_t pos = atomicAdd_system((uint32_t*)ncclShmem.comm.collTraceTail, (uint32_t)1)%COLLTRACE_NUM_ITEMS; \ struct ncclCollTrace* collTrace = ncclShmem.comm.collTrace+pos; \ collTrace->bid = blockIdx.x; \ collTrace->timeStamp = wall_clock64(); \