Removing duplicate definitions of INC_COLL_TRACE and traceData macros (#1500)

They are nearly identical, except the common.h definition sets `collTrace->channelId`.
Этот коммит содержится в:
corey-derochie-amd
2025-01-22 16:50:27 -07:00
коммит произвёл GitHub
родитель 5afe900efd
Коммит f77308a2fe
-21
Просмотреть файл
@@ -26,27 +26,6 @@ extern __shared__ struct mscclShmemData mscclShmem;
#define GET_WORKINDEX_FROM_FLAG(__FLAG__) \
(__FLAG__) / (MSCCL_MAX_ITER*MSCCL_MAX_NUM_STEPS)
#ifdef ENABLE_COLLTRACE
#define INC_COLL_TRACE \
uint32_t pos = atomicAdd(&ncclShmem.collTraceTail->tail, 1)%COLLTRACE_NUM_ITEMS; \
struct ncclCollTrace* collTrace = ncclShmem.collTrace+pos; \
collTrace->timeStamp = wall_clock64(); \
collTrace->bid = blockIdx.x;
// TODO: switch to atomicInc after llvm crash is fixed
// uint32_t pos = atomicInc(&ncclShmem.collTraceTail->tail, COLLTRACE_NUM_ITEMS)
#define traceData(data2, data4, data8_0, data8_1) { \
INC_COLL_TRACE \
collTrace->funcIndex = data2; \
collTrace->data_0 = data4; \
collTrace->opCount = data8_0; \
collTrace->data_1 = data8_1; \
collTrace->type = ncclCollTraceDataType; \
}
#else
#define traceData(data2, data4, data8_0, data8_1)
#endif
inline __device__ static void barrier(int nthreads) {
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
assert(nthreads == NCCL_MAX_NTHREADS);