Increment opCount for intra-node comms as well (#2024)

* Enhance logging in NCCL initialization
It's convenient to log comms obj and default channels together for debugging

* Add opCount to collDevWork and update increment logic
Added opCount to collDevWork and incremented it when proxyOpQueue is empty (e.g., for intra-node comms)

* Clarify opCount increment logic in enqueue.cc
Updated comment to clarify incrementing opCount for intranode communications.

* Refactor NCCL_INIT logging format
Updated logging format for NCCL_INIT to improve clarity.

* Remove duplicate INFO logging in init.cc
Bu işleme şunda yer alıyor:
Dingming Wu
2025-11-10 09:23:49 -08:00
işlemeyi yapan: GitHub
ebeveyn b1e680adc0
işleme b00ee4c83c
2 değiştirilmiş dosya ile 11 ekleme ve 3 silme
+9 -1
Dosyayı Görüntüle
@@ -388,7 +388,9 @@ ncclResult_t ncclTasksRegAndEnqueue(struct ncclComm* comm) {
devWork.redOpArgIsPtr = task->opDev.scalarArgIsPtr;
devWork.oneNode = (comm->nNodes == 1);
devWork.rcclUseOneSlice = comm->rcclUseOneSlice;
//[Added-comment] opCount is missing for collDevWork, adding here
devWork.opCount = task->opCount;
devWork.isOneRPN = comm->isOneRPN;
devWork.netRegUsed = devWork.regUsed = 0;
devWork.gfx9CheapFenceOff = gfx9CheapFenceOff(devWork, comm->gfx9CheapFenceOff);
@@ -1881,6 +1883,12 @@ ncclResult_t ncclLaunchKernelAfter_NoCuda(struct ncclComm* comm, struct ncclKern
// hostStreamPlanTask directly
NCCLCHECK(hostStreamPlanTask(comm, plan));
}
// Increment the opCount for intranode comms as well. Previously if proxyOpQueue was empty
// opCount was not incremented because ncclProxyStart wasn't called in hostStreamPlanTask
if (!plan->persistent && ncclIntruQueueHead(&plan->proxyOpQueue) == nullptr) {
comm->opCount++;
}
return ncclSuccess;
}
+2 -2
Dosyayı Görüntüle
@@ -1793,8 +1793,8 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p
// Compute time models for algorithm and protocol combinations
NCCLCHECKGOTO(ncclTopoTuneModel(comm, comm->minCompCap, comm->maxCompCap, graphs), ret, fail);
INFO(NCCL_INIT, "%d coll channels, %d collnet channels, %d nvls channels, %d p2p channels, %d p2p channels per peer", comm->nChannels, comm->nChannels, comm->nvlsChannels, comm->p2pnChannels, comm->p2pnChannelsPerPeer);
INFO(NCCL_INIT, "comm:%p, nRanks:%d, nNodes:%d, coll channels:%d collnet channels:%d, nvls channels:%d, p2p channels:%d, p2p channels per peer:%d", comm, comm->nRanks, comm->nNodes, comm->nChannels, comm->nChannels, comm->nvlsChannels, comm->p2pnChannels, comm->p2pnChannelsPerPeer);
if (comm->intraRank == 0) { // Load ncclParamLaunchMode
const char* str = ncclGetEnv("NCCL_LAUNCH_MODE");
enum ncclLaunchMode mode, modeOld;