@@ -340,6 +340,8 @@ if ($npkit_enabled); then
|
||||
-DENABLE_NPKIT_EVENT_MSCCL_RECV_REDUCE_COPY_EXIT \
|
||||
-DENABLE_NPKIT_EVENT_MSCCL_INIT_ENTRY \
|
||||
-DENABLE_NPKIT_EVENT_MSCCL_INIT_EXIT \
|
||||
-DENABLE_NPKIT_EVENT_BROADCAST_RING_ENTRY \
|
||||
-DENABLE_NPKIT_EVENT_BROADCAST_RING_EXIT \
|
||||
-DENABLE_NPKIT_PRIM_COLLECT_DATA_PROCESS_TIME"
|
||||
fi
|
||||
|
||||
|
||||
@@ -46,6 +46,13 @@ namespace {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_BROADCAST_RING_ENTRY)
|
||||
if (tid == 0) {
|
||||
NpKit::CollectGpuEvent(NPKIT_EVENT_BROADCAST_RING_ENTRY, args->count*sizeof(T), 0, NPKIT_GET_GPU_TIMESTAMP(),
|
||||
ncclShmem.comm.npKitEventCollectContexts + npKitCtxIdx);
|
||||
}
|
||||
#endif
|
||||
|
||||
T *inputBuf = (T*)args->sendbuff;
|
||||
T *outputBuf = (T*)args->recvbuff;
|
||||
Primitives<T, RedOp, FanSymmetric<1>, 0, Proto, 0>
|
||||
@@ -73,6 +80,12 @@ namespace {
|
||||
prims.recvCopySend(offset, nelem);
|
||||
}
|
||||
}
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_BROADCAST_RING_EXIT)
|
||||
if (tid == 0) {
|
||||
NpKit::CollectGpuEvent(NPKIT_EVENT_BROADCAST_RING_EXIT, args->count*sizeof(T), 0, NPKIT_GET_GPU_TIMESTAMP(),
|
||||
ncclShmem.comm.npKitEventCollectContexts + npKitCtxIdx);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -127,4 +127,6 @@
|
||||
#define NPKIT_EVENT_MSCCL_INIT_ENTRY 0x66
|
||||
#define NPKIT_EVENT_MSCCL_INIT_EXIT 0x67
|
||||
|
||||
#define NPKIT_EVENT_BROADCAST_RING_ENTRY 0x68
|
||||
#define NPKIT_EVENT_BROADCAST_RING_EXIT 0x69
|
||||
#endif
|
||||
|
||||
Ссылка в новой задаче
Block a user