From 7d25ecc65c76e9f5ae33fb06652e64e526334245 Mon Sep 17 00:00:00 2001 From: amd-jiali Date: Mon, 29 Dec 2025 16:43:40 -0800 Subject: [PATCH] Add an environment variable to allow user explicitly turn off direct AllGather (#2119) Co-authored-by: Jiali Li [ROCm/rccl commit: 935208ad0914805cbb5adad37bd23f3ee0122b40] --- projects/rccl/src/rccl_wrap.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/projects/rccl/src/rccl_wrap.cc b/projects/rccl/src/rccl_wrap.cc index 4f7ee90a06..78d7adeb19 100644 --- a/projects/rccl/src/rccl_wrap.cc +++ b/projects/rccl/src/rccl_wrap.cc @@ -351,6 +351,17 @@ ncclResult_t rcclGetProtocolName(int protocol, const char** protocolName) { } bool rcclUseAllGatherDirect(struct ncclComm* comm, size_t& msgSize) { + // Check if user explicitly disabled direct AllGather + static int userDirectAllGatherInput = -2; + if (userDirectAllGatherInput == -2) { + const char *inputStr = getenv("RCCL_DIRECT_ALLGATHER_DISABLE"); + userDirectAllGatherInput = !inputStr ? 0 : 1; + } + if (userDirectAllGatherInput == 1) { + INFO(NCCL_INIT, "RCCL DIRECT ALLGATHER has been disabled."); + return false; + } + size_t threshold = rcclParamDirectAllGatherThreshold(); if (IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx950") && threshold != -1) {