diff --git a/src/enqueue.cc b/src/enqueue.cc index 0baf4df801..1e61d0a05e 100644 --- a/src/enqueue.cc +++ b/src/enqueue.cc @@ -388,7 +388,9 @@ ncclResult_t ncclTasksRegAndEnqueue(struct ncclComm* comm) { devWork.redOpArgIsPtr = task->opDev.scalarArgIsPtr; devWork.oneNode = (comm->nNodes == 1); devWork.rcclUseOneSlice = comm->rcclUseOneSlice; - + //[Added-comment] opCount is missing for collDevWork, adding here + devWork.opCount = task->opCount; + devWork.isOneRPN = comm->isOneRPN; devWork.netRegUsed = devWork.regUsed = 0; devWork.gfx9CheapFenceOff = gfx9CheapFenceOff(devWork, comm->gfx9CheapFenceOff); @@ -1881,6 +1883,12 @@ ncclResult_t ncclLaunchKernelAfter_NoCuda(struct ncclComm* comm, struct ncclKern // hostStreamPlanTask directly NCCLCHECK(hostStreamPlanTask(comm, plan)); } + + // Increment the opCount for intranode comms as well. Previously if proxyOpQueue was empty + // opCount was not incremented because ncclProxyStart wasn't called in hostStreamPlanTask + if (!plan->persistent && ncclIntruQueueHead(&plan->proxyOpQueue) == nullptr) { + comm->opCount++; + } return ncclSuccess; } diff --git a/src/init.cc b/src/init.cc index f21b6f0042..4a511633ec 100644 --- a/src/init.cc +++ b/src/init.cc @@ -1793,8 +1793,8 @@ static ncclResult_t initTransportsRank(struct ncclComm* comm, struct ncclComm* p // Compute time models for algorithm and protocol combinations NCCLCHECKGOTO(ncclTopoTuneModel(comm, comm->minCompCap, comm->maxCompCap, graphs), ret, fail); - INFO(NCCL_INIT, "%d coll channels, %d collnet channels, %d nvls channels, %d p2p channels, %d p2p channels per peer", comm->nChannels, comm->nChannels, comm->nvlsChannels, comm->p2pnChannels, comm->p2pnChannelsPerPeer); - + INFO(NCCL_INIT, "comm:%p, nRanks:%d, nNodes:%d, coll channels:%d collnet channels:%d, nvls channels:%d, p2p channels:%d, p2p channels per peer:%d", comm, comm->nRanks, comm->nNodes, comm->nChannels, comm->nChannels, comm->nvlsChannels, comm->p2pnChannels, comm->p2pnChannelsPerPeer); + if (comm->intraRank == 0) { // Load ncclParamLaunchMode const char* str = ncclGetEnv("NCCL_LAUNCH_MODE"); enum ncclLaunchMode mode, modeOld;