diff --git a/src/init.cc b/src/init.cc index be42cc1352..41aae75558 100644 --- a/src/init.cc +++ b/src/init.cc @@ -443,7 +443,7 @@ NCCL_PARAM(AggChannelSize, "AGG_CHANNEL_SIZE", -2); NCCL_PARAM(DisableGraphHelper, "GRAPH_HELPER_DISABLE", 0); // GDRCOPY support: FIFO_ENABLE when enabled locates a workFifo in CUDA memory NCCL_PARAM(GdrCopyFifoEnable, "GDRCOPY_FIFO_ENABLE", 1); -NCCL_PARAM(WorkFifoDepth, "WORK_FIFO_DEPTH", 64<<10); +NCCL_PARAM(WorkFifoDepth, "WORK_FIFO_DEPTH", 256<<10); enum ncclLaunchMode ncclParamLaunchMode; diff --git a/src/misc/msccl/msccl_setup.cc b/src/misc/msccl/msccl_setup.cc index 21be019361..b418dabcd5 100644 --- a/src/misc/msccl/msccl_setup.cc +++ b/src/misc/msccl/msccl_setup.cc @@ -20,7 +20,7 @@ RCCL_PARAM(MscclEnableDoneEvent, "MSCCL_ENABLE_DONE_EVENT", 1); #endif -RCCL_PARAM(MscclWorkFifoDepth, "MSCCL_WORK_FIFO_DEPTH", 64<<10); +RCCL_PARAM(MscclWorkFifoDepth, "MSCCL_WORK_FIFO_DEPTH", 256<<10); static inline size_t computeSizeNeeded(size_t nBytes, int nScratchChunks, int nChunksPerLoop) { return (nBytes * (size_t)nScratchChunks) / (size_t)nChunksPerLoop;