From a7fbe6853506e7a99bd551a888e41bd159d44aa9 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Wed, 17 May 2023 16:49:42 +0000 Subject: [PATCH 1/2] Skip doneEvent inside MSCCL by default. Added a RCCL_MSCCL_ENABLE_DONE_EVENT env var, set it be 0 by default. The env var is to control whether to use doneEvent when invoking MSCCL kernels. Skipping doneEvent would cause the firmware to skip L2 cache flush, resulting in overall performance improvement. [ROCm/rccl commit: 12dba425de5c9ca312e9ffcc390371e0fdce60a8] --- projects/rccl/src/misc/msccl/msccl_setup.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/projects/rccl/src/misc/msccl/msccl_setup.cc b/projects/rccl/src/misc/msccl/msccl_setup.cc index 21496f4e89..fd1465820e 100644 --- a/projects/rccl/src/misc/msccl/msccl_setup.cc +++ b/projects/rccl/src/misc/msccl/msccl_setup.cc @@ -13,6 +13,8 @@ #include "msccl/msccl_setup.h" #include "msccl/msccl_status.h" +RCCL_PARAM(MscclEnableDoneEvent, "MSCCL_ENABLE_DONE_EVENT", 0); + ncclResult_t mscclSetupCount(struct mscclAlgo* hostAlgo, ncclComm_t comm, size_t count, ncclDataType_t dataType) { mscclStatus& status = mscclGetStatus(); status.stepSize = comm->buffSizes[hostAlgo->protocol] / NCCL_STEPS; @@ -260,7 +262,9 @@ ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count ncclComm_t comm, hipStream_t stream) { mscclStatus& status = mscclGetStatus(); - if (status.lastStream != stream && status.lastStream != nullptr) { + bool enableDoneEvent = (rcclParamMscclEnableDoneEvent() == 1); + + if (enableDoneEvent && (status.lastStream != stream && status.lastStream != nullptr)) { CUDACHECK(hipStreamWaitEvent(stream, comm->doneEvent, 0)); } @@ -284,7 +288,11 @@ ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count void *args[3] = {&comm->devComm, &devAlgo, &work}; void *func = mscclKernelEntries[(opFull.op * ncclNumTypes + dataType) * NCCL_NUM_PROTOCOLS + hostAlgo->protocol]; - CUDACHECK(hipExtLaunchKernel(func, grid, block, args, 0, stream, NULL, comm->doneEvent, 0)); + if (enableDoneEvent) { + CUDACHECK(hipExtLaunchKernel(func, grid, block, args, 0, stream, NULL, comm->doneEvent, 0)); + } else { + CUDACHECK(hipExtLaunchKernel(func, grid, block, args, 0, stream, NULL, NULL, 0)); + } status.workIndex++; status.lastStream = stream; return ncclSuccess; From 932822fc461192dd75e2c0f286e776adad5ee415 Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Wed, 17 May 2023 17:50:25 +0000 Subject: [PATCH 2/2] Address review feedbacks and make the flag be disabled by default. [ROCm/rccl commit: ca4a1dfd6719e02341168f82eadc756e6ca65826] --- projects/rccl/src/misc/msccl/msccl_setup.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/projects/rccl/src/misc/msccl/msccl_setup.cc b/projects/rccl/src/misc/msccl/msccl_setup.cc index fd1465820e..37c2aca7ba 100644 --- a/projects/rccl/src/misc/msccl/msccl_setup.cc +++ b/projects/rccl/src/misc/msccl/msccl_setup.cc @@ -13,7 +13,9 @@ #include "msccl/msccl_setup.h" #include "msccl/msccl_status.h" -RCCL_PARAM(MscclEnableDoneEvent, "MSCCL_ENABLE_DONE_EVENT", 0); +#ifndef HIP_EVENT_DISABLE_FENCE +RCCL_PARAM(MscclEnableDoneEvent, "MSCCL_ENABLE_DONE_EVENT", 1); +#endif ncclResult_t mscclSetupCount(struct mscclAlgo* hostAlgo, ncclComm_t comm, size_t count, ncclDataType_t dataType) { mscclStatus& status = mscclGetStatus(); @@ -262,7 +264,12 @@ ncclResult_t mscclSetupKernel(const void* sendBuff, void* recvBuff, size_t count ncclComm_t comm, hipStream_t stream) { mscclStatus& status = mscclGetStatus(); - bool enableDoneEvent = (rcclParamMscclEnableDoneEvent() == 1); + bool enableDoneEvent = +#ifndef HIP_EVENT_DISABLE_FENCE + (rcclParamMscclEnableDoneEvent() == 1); +#else + true; +#endif if (enableDoneEvent && (status.lastStream != stream && status.lastStream != nullptr)) { CUDACHECK(hipStreamWaitEvent(stream, comm->doneEvent, 0));