npkit: separate network timing between send and test (#798)

Este commit está contenido en:
Wenkai Du
2023-07-10 09:31:49 -07:00
cometido por GitHub
padre c095424e4d
commit 0f14e5a640
Se han modificado 4 ficheros con 39 adiciones y 1 borrados
+2
Ver fichero
@@ -273,6 +273,8 @@ if ($npkit_enabled); then
-DENABLE_NPKIT_EVENT_PRIM_LL128_DATA_PROCESS_EXIT \
-DENABLE_NPKIT_EVENT_NET_SEND_ENTRY \
-DENABLE_NPKIT_EVENT_NET_SEND_EXIT \
-DENABLE_NPKIT_EVENT_NET_TEST_ENTRY \
-DENABLE_NPKIT_EVENT_NET_TEST_EXIT \
-DENABLE_NPKIT_EVENT_NET_RECV_ENTRY \
-DENABLE_NPKIT_EVENT_NET_RECV_EXIT \
-DENABLE_NPKIT_EVENT_ALL_REDUCE_RING_SEND_ENTRY \
+3
Ver fichero
@@ -109,5 +109,8 @@
#define NPKIT_EVENT_ALL_GATHER_RING_DIRECT_RECV_ENTRY 0x56
#define NPKIT_EVENT_ALL_GATHER_RING_DIRECT_RECV_EXIT 0x57
#define NPKIT_EVENT_NET_TEST_ENTRY 0x58
#define NPKIT_EVENT_NET_TEST_EXIT 0x59
#endif
+1
Ver fichero
@@ -74,6 +74,7 @@ struct ncclProxySubArgs {
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_SEND_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_SEND_EXIT)
int npKitSizesFifo[NCCL_STEPS];
uint64_t timestamp[NCCL_STEPS];
#endif
};
+33 -1
Ver fichero
@@ -1067,6 +1067,7 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
g_npkit_net_poll_cnt = 0;
#endif
sub->timestamp[buffSlot] = 0;
#endif
TRACE(NCCL_NET, "sendProxy [%ld/%d] Isend posted, req %p", sub->transmitted, buffSlot, sub->requests[buffSlot]);
@@ -1085,9 +1086,12 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct
if (sub->done < sub->transmitted) {
int done;
int buffSlot = (sub->base+sub->done)%NCCL_STEPS;
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_SEND_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_SEND_EXIT)
if (sub->timestamp[buffSlot] == 0)
sub->timestamp[buffSlot] = *(volatile uint64_t*)NpKit::GetCpuTimestamp();
#endif
NCCLCHECK(proxyState->ncclNet->test(sub->requests[buffSlot], &done, NULL));
if (done) {
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_SEND_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_SEND_EXIT)
NpKit::CollectCpuEvent(
NPKIT_EVENT_NET_SEND_EXIT,
@@ -1095,6 +1099,34 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct
g_npkit_net_poll_cnt,
#else
sub->npKitSizesFifo[buffSlot],
#endif
uint64_t(sub->requests+buffSlot)/sizeof(void*),
sub->timestamp[buffSlot], sub->channelId);
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
g_npkit_net_poll_cnt = 0;
#endif
#endif
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_TEST_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_TEST_EXIT)
NpKit::CollectCpuEvent(
NPKIT_EVENT_NET_TEST_ENTRY,
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
g_npkit_net_poll_cnt,
#else
sub->npKitSizesFifo[buffSlot],
#endif
uint64_t(sub->requests+buffSlot)/sizeof(void*),
sub->timestamp[buffSlot], sub->channelId);
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
g_npkit_net_poll_cnt = 0;
#endif
#endif
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_TEST_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_TEST_EXIT)
NpKit::CollectCpuEvent(
NPKIT_EVENT_NET_TEST_EXIT,
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
g_npkit_net_poll_cnt,
#else
sub->npKitSizesFifo[buffSlot],
#endif
uint64_t(sub->requests+buffSlot)/sizeof(void*),
*(volatile uint64_t*)NpKit::GetCpuTimestamp(), sub->channelId);