From bf8eb407055e71f71eec81e9dfd0a5b2614817b0 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Fri, 12 Feb 2021 18:06:19 +0000 Subject: [PATCH] Move HDP flush to CPU --- src/collectives/device/primitives.h | 1 - src/include/proxy.h | 1 + src/transport/net.cc | 7 ++++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/collectives/device/primitives.h b/src/collectives/device/primitives.h index a5e52aca7a..5ffb02601c 100644 --- a/src/collectives/device/primitives.h +++ b/src/collectives/device/primitives.h @@ -158,7 +158,6 @@ class ncclPrimitives { inline __device__ void postSend() { if (conn->next_hdp_reg) STORE(conn->next_hdp_reg, 0x1); - if (conn->curr_hdp_reg) STORE(conn->curr_hdp_reg, 0x1); STORE(connTailPtr, step += SLICESTEPS); } diff --git a/src/include/proxy.h b/src/include/proxy.h index 9796baf39a..2e7773e964 100644 --- a/src/include/proxy.h +++ b/src/include/proxy.h @@ -36,6 +36,7 @@ struct ncclProxyArgs { uint64_t transmitted; uint64_t done; uint64_t end; + uint64_t hdp_flushed; void* requests[NCCL_STEPS]; int idle; diff --git a/src/transport/net.cc b/src/transport/net.cc index 84f3d3d4d1..be658ec4d6 100644 --- a/src/transport/net.cc +++ b/src/transport/net.cc @@ -283,7 +283,7 @@ ncclResult_t netSendProxy(struct ncclProxyArgs* args) { if (args->state == ncclProxyOpReady) { // Round to next multiple of sliceSteps resources->step = ROUNDUP(resources->step, args->chunkSteps); - args->posted = args->transmitted = args->done = resources->step; + args->posted = args->transmitted = args->done = args->hdp_flushed = resources->step; args->end = resources->step + args->nsteps; args->state = ncclProxyOpProgress; } @@ -345,6 +345,11 @@ ncclResult_t netSendProxy(struct ncclProxyArgs* args) { } } if (ready) { + // flush HDP if not done + if (resources->curr_hdp_reg && args->hdp_flushed < LOAD(recvTail)) { + args->hdp_flushed = LOAD(recvTail); + STORE(resources->curr_hdp_reg, 1); + } // Data is ready, try to send. NCCLCHECK(ncclNetIsend(resources->netSendComm, buff, size, mhandle, args->requests+buffSlot)); if (args->requests[buffSlot] != NULL) {