Update RCCL/MSCCL work FIFO depth to 256K (#1091)
Этот коммит содержится в:
+1
-1
@@ -443,7 +443,7 @@ NCCL_PARAM(AggChannelSize, "AGG_CHANNEL_SIZE", -2);
|
||||
NCCL_PARAM(DisableGraphHelper, "GRAPH_HELPER_DISABLE", 0);
|
||||
// GDRCOPY support: FIFO_ENABLE when enabled locates a workFifo in CUDA memory
|
||||
NCCL_PARAM(GdrCopyFifoEnable, "GDRCOPY_FIFO_ENABLE", 1);
|
||||
NCCL_PARAM(WorkFifoDepth, "WORK_FIFO_DEPTH", 64<<10);
|
||||
NCCL_PARAM(WorkFifoDepth, "WORK_FIFO_DEPTH", 256<<10);
|
||||
enum ncclLaunchMode ncclParamLaunchMode;
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
RCCL_PARAM(MscclEnableDoneEvent, "MSCCL_ENABLE_DONE_EVENT", 1);
|
||||
#endif
|
||||
|
||||
RCCL_PARAM(MscclWorkFifoDepth, "MSCCL_WORK_FIFO_DEPTH", 64<<10);
|
||||
RCCL_PARAM(MscclWorkFifoDepth, "MSCCL_WORK_FIFO_DEPTH", 256<<10);
|
||||
|
||||
static inline size_t computeSizeNeeded(size_t nBytes, int nScratchChunks, int nChunksPerLoop) {
|
||||
return (nBytes * (size_t)nScratchChunks) / (size_t)nChunksPerLoop;
|
||||
|
||||
Ссылка в новой задаче
Block a user