From 13f6bbde5735fb7d5f0380af4dc809b119dbed83 Mon Sep 17 00:00:00 2001 From: Mustafa Abduljabbar Date: Wed, 18 Sep 2024 15:19:33 -0500 Subject: [PATCH] Fix MSCCLPP seg-fault when RCCL_MSCCL_ENABLE_SINGLE_PROCESS is enabled (#1338) Removing unnecessary changes. rename unique hosts function Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com> use updated function name Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com> Missed one instance of `mscclIsMultithreadedComm`. rename unique hosts function Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com> use updated function name Co-authored-by: corey-derochie-amd <161367113+corey-derochie-amd@users.noreply.github.com> Missed one instance of `mscclIsMultithreadedComm`. [ROCm/rccl commit: 2fe1e9f7db925004f4badb09b7c7c7e945e4ef17] --- .../rccl/src/misc/msccl/msccl_lifecycle.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/projects/rccl/src/misc/msccl/msccl_lifecycle.cc b/projects/rccl/src/misc/msccl/msccl_lifecycle.cc index d186e0c47f..161eb93b7b 100644 --- a/projects/rccl/src/misc/msccl/msccl_lifecycle.cc +++ b/projects/rccl/src/misc/msccl/msccl_lifecycle.cc @@ -63,12 +63,7 @@ bool mscclAvailable(int rank) { return mscclEnabled() && mscclInitialized(rank); } -static bool mscclCommCompatible(ncclComm_t comm) { - if (rcclParamMscclEnableSingleProcess()) { - // Single process usage enabled. No need to guard against multi-thread. - return true; - } - +static bool allProcessHostsUnique(ncclComm_t comm) { std::map> hostHashToPidHashes; for (int i = 0; i < comm->nRanks; i++) { uint64_t hostHash = comm->peerInfo[i].hostHash; @@ -84,9 +79,17 @@ static bool mscclCommCompatible(ncclComm_t comm) { return true; } -#ifdef ENABLE_MSCCLPP +static bool mscclCommCompatible(ncclComm_t comm) { + if (rcclParamMscclEnableSingleProcess()) { + // Single process usage enabled. No need to guard against multi-thread. + return true; + } + return allProcessHostsUnique(comm); +} + +#ifdef ENABLE_MSCCLPP bool mscclppCommCompatible(ncclComm_t comm) { - return mscclCommCompatible(comm); + return allProcessHostsUnique(comm); } #endif