Add an environment variable to allow user explicitly turn off direct AllGather (#2119)

Co-authored-by: Jiali Li <jialili@amd.com>

[ROCm/rccl commit: 935208ad09]
This commit is contained in:
amd-jiali
2025-12-29 16:43:40 -08:00
committed by GitHub
orang tua 2585ae8815
melakukan 7d25ecc65c
+11
Melihat File
@@ -351,6 +351,17 @@ ncclResult_t rcclGetProtocolName(int protocol, const char** protocolName) {
}
bool rcclUseAllGatherDirect(struct ncclComm* comm, size_t& msgSize) {
// Check if user explicitly disabled direct AllGather
static int userDirectAllGatherInput = -2;
if (userDirectAllGatherInput == -2) {
const char *inputStr = getenv("RCCL_DIRECT_ALLGATHER_DISABLE");
userDirectAllGatherInput = !inputStr ? 0 : 1;
}
if (userDirectAllGatherInput == 1) {
INFO(NCCL_INIT, "RCCL DIRECT ALLGATHER has been disabled.");
return false;
}
size_t threshold = rcclParamDirectAllGatherThreshold();
if (IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx950") && threshold != -1) {