Removing duplicate definitions of INC_COLL_TRACE and traceData macros (#1500)
They are nearly identical, except the common.h definition sets `collTrace->channelId`.
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
5afe900efd
Коммит
f77308a2fe
@@ -26,27 +26,6 @@ extern __shared__ struct mscclShmemData mscclShmem;
|
||||
#define GET_WORKINDEX_FROM_FLAG(__FLAG__) \
|
||||
(__FLAG__) / (MSCCL_MAX_ITER*MSCCL_MAX_NUM_STEPS)
|
||||
|
||||
#ifdef ENABLE_COLLTRACE
|
||||
#define INC_COLL_TRACE \
|
||||
uint32_t pos = atomicAdd(&ncclShmem.collTraceTail->tail, 1)%COLLTRACE_NUM_ITEMS; \
|
||||
struct ncclCollTrace* collTrace = ncclShmem.collTrace+pos; \
|
||||
collTrace->timeStamp = wall_clock64(); \
|
||||
collTrace->bid = blockIdx.x;
|
||||
// TODO: switch to atomicInc after llvm crash is fixed
|
||||
// uint32_t pos = atomicInc(&ncclShmem.collTraceTail->tail, COLLTRACE_NUM_ITEMS)
|
||||
|
||||
#define traceData(data2, data4, data8_0, data8_1) { \
|
||||
INC_COLL_TRACE \
|
||||
collTrace->funcIndex = data2; \
|
||||
collTrace->data_0 = data4; \
|
||||
collTrace->opCount = data8_0; \
|
||||
collTrace->data_1 = data8_1; \
|
||||
collTrace->type = ncclCollTraceDataType; \
|
||||
}
|
||||
#else
|
||||
#define traceData(data2, data4, data8_0, data8_1)
|
||||
#endif
|
||||
|
||||
inline __device__ static void barrier(int nthreads) {
|
||||
#if defined(__HIP_PLATFORM_AMD__) || defined(__HIPCC__)
|
||||
assert(nthreads == NCCL_MAX_NTHREADS);
|
||||
|
||||
Ссылка в новой задаче
Block a user