Increment opCount for intra-node comms as well (#2024)

* Enhance logging in NCCL initialization
It's convenient to log comms obj and default channels together for debugging

* Add opCount to collDevWork and update increment logic
Added opCount to collDevWork and incremented it when proxyOpQueue is empty (e.g., for intra-node comms)

* Clarify opCount increment logic in enqueue.cc
Updated comment to clarify incrementing opCount for intranode communications.

* Refactor NCCL_INIT logging format
Updated logging format for NCCL_INIT to improve clarity.

* Remove duplicate INFO logging in init.cc

[ROCm/rccl commit: b00ee4c83c]
Этот коммит содержится в:
Dingming Wu
2025-11-10 09:23:49 -08:00
коммит произвёл GitHub
родитель b955a7df40
Коммит c601f9b3f8
2 изменённых файлов: 11 добавлений и 3 удалений
+9 -1
Просмотреть файл
@@ -388,7 +388,9 @@ ncclResult_t ncclTasksRegAndEnqueue(struct ncclComm* comm) {
devWork.redOpArgIsPtr = task->opDev.scalarArgIsPtr;
devWork.oneNode = (comm->nNodes == 1);
devWork.rcclUseOneSlice = comm->rcclUseOneSlice;
//[Added-comment] opCount is missing for collDevWork, adding here
devWork.opCount = task->opCount;
devWork.isOneRPN = comm->isOneRPN;
devWork.netRegUsed = devWork.regUsed = 0;
devWork.gfx9CheapFenceOff = gfx9CheapFenceOff(devWork, comm->gfx9CheapFenceOff);
@@ -1881,6 +1883,12 @@ ncclResult_t ncclLaunchKernelAfter_NoCuda(struct ncclComm* comm, struct ncclKern
// hostStreamPlanTask directly
NCCLCHECK(hostStreamPlanTask(comm, plan));
}
// Increment the opCount for intranode comms as well. Previously if proxyOpQueue was empty
// opCount was not incremented because ncclProxyStart wasn't called in hostStreamPlanTask
if (!plan->persistent && ncclIntruQueueHead(&plan->proxyOpQueue) == nullptr) {
comm->opCount++;
}
return ncclSuccess;
}
+2 -2
Просмотреть файл
@@ -1793,8 +1793,8 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
// Compute time models for algorithm and protocol combinations
NCCLCHECKGOTO(ncclTopoTuneModel(comm, comm->minCompCap, comm->maxCompCap, graphs), ret, fail);
INFO(NCCL_INIT, "%d coll channels, %d collnet channels, %d nvls channels, %d p2p channels, %d p2p channels per peer", comm->nChannels, comm->nChannels, comm->nvlsChannels, comm->p2pnChannels, comm->p2pnChannelsPerPeer);
INFO(NCCL_INIT, "comm:%p, nRanks:%d, nNodes:%d, coll channels:%d collnet channels:%d, nvls channels:%d, p2p channels:%d, p2p channels per peer:%d", comm, comm->nRanks, comm->nNodes, comm->nChannels, comm->nChannels, comm->nvlsChannels, comm->p2pnChannels, comm->p2pnChannelsPerPeer);
if (comm->intraRank == 0) { // Load ncclParamLaunchMode
const char* str = ncclGetEnv("NCCL_LAUNCH_MODE");
enum ncclLaunchMode mode, modeOld;