npkit: separate network timing between send and test (#798)
Este commit está contenido en:
@@ -273,6 +273,8 @@ if ($npkit_enabled); then
|
||||
-DENABLE_NPKIT_EVENT_PRIM_LL128_DATA_PROCESS_EXIT \
|
||||
-DENABLE_NPKIT_EVENT_NET_SEND_ENTRY \
|
||||
-DENABLE_NPKIT_EVENT_NET_SEND_EXIT \
|
||||
-DENABLE_NPKIT_EVENT_NET_TEST_ENTRY \
|
||||
-DENABLE_NPKIT_EVENT_NET_TEST_EXIT \
|
||||
-DENABLE_NPKIT_EVENT_NET_RECV_ENTRY \
|
||||
-DENABLE_NPKIT_EVENT_NET_RECV_EXIT \
|
||||
-DENABLE_NPKIT_EVENT_ALL_REDUCE_RING_SEND_ENTRY \
|
||||
|
||||
@@ -109,5 +109,8 @@
|
||||
#define NPKIT_EVENT_ALL_GATHER_RING_DIRECT_RECV_ENTRY 0x56
|
||||
#define NPKIT_EVENT_ALL_GATHER_RING_DIRECT_RECV_EXIT 0x57
|
||||
|
||||
#define NPKIT_EVENT_NET_TEST_ENTRY 0x58
|
||||
#define NPKIT_EVENT_NET_TEST_EXIT 0x59
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -74,6 +74,7 @@ struct ncclProxySubArgs {
|
||||
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_SEND_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_SEND_EXIT)
|
||||
int npKitSizesFifo[NCCL_STEPS];
|
||||
uint64_t timestamp[NCCL_STEPS];
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
+33
-1
@@ -1067,6 +1067,7 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct
|
||||
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
|
||||
g_npkit_net_poll_cnt = 0;
|
||||
#endif
|
||||
sub->timestamp[buffSlot] = 0;
|
||||
#endif
|
||||
|
||||
TRACE(NCCL_NET, "sendProxy [%ld/%d] Isend posted, req %p", sub->transmitted, buffSlot, sub->requests[buffSlot]);
|
||||
@@ -1085,9 +1086,12 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct
|
||||
if (sub->done < sub->transmitted) {
|
||||
int done;
|
||||
int buffSlot = (sub->base+sub->done)%NCCL_STEPS;
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_SEND_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_SEND_EXIT)
|
||||
if (sub->timestamp[buffSlot] == 0)
|
||||
sub->timestamp[buffSlot] = *(volatile uint64_t*)NpKit::GetCpuTimestamp();
|
||||
#endif
|
||||
NCCLCHECK(proxyState->ncclNet->test(sub->requests[buffSlot], &done, NULL));
|
||||
if (done) {
|
||||
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_SEND_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_SEND_EXIT)
|
||||
NpKit::CollectCpuEvent(
|
||||
NPKIT_EVENT_NET_SEND_EXIT,
|
||||
@@ -1095,6 +1099,34 @@ static ncclResult_t sendProxyProgress(struct ncclProxyState* proxyState, struct
|
||||
g_npkit_net_poll_cnt,
|
||||
#else
|
||||
sub->npKitSizesFifo[buffSlot],
|
||||
#endif
|
||||
uint64_t(sub->requests+buffSlot)/sizeof(void*),
|
||||
sub->timestamp[buffSlot], sub->channelId);
|
||||
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
|
||||
g_npkit_net_poll_cnt = 0;
|
||||
#endif
|
||||
#endif
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_TEST_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_TEST_EXIT)
|
||||
NpKit::CollectCpuEvent(
|
||||
NPKIT_EVENT_NET_TEST_ENTRY,
|
||||
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
|
||||
g_npkit_net_poll_cnt,
|
||||
#else
|
||||
sub->npKitSizesFifo[buffSlot],
|
||||
#endif
|
||||
uint64_t(sub->requests+buffSlot)/sizeof(void*),
|
||||
sub->timestamp[buffSlot], sub->channelId);
|
||||
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
|
||||
g_npkit_net_poll_cnt = 0;
|
||||
#endif
|
||||
#endif
|
||||
#if defined(ENABLE_NPKIT) && defined(ENABLE_NPKIT_EVENT_NET_TEST_ENTRY) && defined(ENABLE_NPKIT_EVENT_NET_TEST_EXIT)
|
||||
NpKit::CollectCpuEvent(
|
||||
NPKIT_EVENT_NET_TEST_EXIT,
|
||||
#if defined(ENABLE_NPKIT_NET_COLLECT_POLL_CNT)
|
||||
g_npkit_net_poll_cnt,
|
||||
#else
|
||||
sub->npKitSizesFifo[buffSlot],
|
||||
#endif
|
||||
uint64_t(sub->requests+buffSlot)/sizeof(void*),
|
||||
*(volatile uint64_t*)NpKit::GetCpuTimestamp(), sub->channelId);
|
||||
|
||||
Referencia en una nueva incidencia
Block a user