From 21bc1ef4932f13e3b20dad7a5b52996bd2395c30 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Fri, 18 Oct 2019 12:54:00 -0700 Subject: [PATCH] Revert collective chunk and slice steps to avoid drop in throughput [ROCm/rccl commit: df74d12946a42bb318b9cb0fc1a4a77c0720148d] --- projects/rccl/src/collectives/collectives.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/projects/rccl/src/collectives/collectives.h b/projects/rccl/src/collectives/collectives.h index 27de1f7540..63fcfd2017 100644 --- a/projects/rccl/src/collectives/collectives.h +++ b/projects/rccl/src/collectives/collectives.h @@ -57,12 +57,18 @@ DECL_ALL_COLLS // CHUNKSIZE must be a multiple of SLICESIZE -#define ALLREDUCE_SLICESTEPS (NCCL_STEPS/4) -#define ALLREDUCE_CHUNKSTEPS (NCCL_STEPS/2) -#define ALLGATHER_SLICESTEPS (NCCL_STEPS/4) -#define ALLGATHER_CHUNKSTEPS (NCCL_STEPS/2) -#define REDUCESCATTER_SLICESTEPS (NCCL_STEPS/4) -#define REDUCESCATTER_CHUNKSTEPS (NCCL_STEPS/2) +//#define ALLREDUCE_SLICESTEPS (NCCL_STEPS/4) +//#define ALLREDUCE_CHUNKSTEPS (NCCL_STEPS/2) +//#define ALLGATHER_SLICESTEPS (NCCL_STEPS/4) +//#define ALLGATHER_CHUNKSTEPS (NCCL_STEPS/2) +//#define REDUCESCATTER_SLICESTEPS (NCCL_STEPS/4) +//#define REDUCESCATTER_CHUNKSTEPS (NCCL_STEPS/2) +#define ALLREDUCE_SLICESTEPS 4 +#define ALLREDUCE_CHUNKSTEPS 4 +#define ALLGATHER_SLICESTEPS 4 +#define ALLGATHER_CHUNKSTEPS 4 +#define REDUCESCATTER_SLICESTEPS 4 +#define REDUCESCATTER_CHUNKSTEPS 4 #define BROADCAST_SLICESTEPS 1 #define BROADCAST_CHUNKSTEPS 1 #define REDUCE_SLICESTEPS 1