From 92bcdcf5b0afe400d6a866da000a1df5d8519d68 Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Thu, 20 May 2021 08:58:45 -0700 Subject: [PATCH] Correction on max number of groups (#373) [ROCm/rccl commit: 50da1b48afc61f5da444bd487bd289d2966dca17] --- projects/rccl/src/include/devcomm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/rccl/src/include/devcomm.h b/projects/rccl/src/include/devcomm.h index 4c7c9980bc..3de95ef0d5 100644 --- a/projects/rccl/src/include/devcomm.h +++ b/projects/rccl/src/include/devcomm.h @@ -176,7 +176,7 @@ struct ncclDevComm; #pragma pack(push) /* push current alignment to stack */ #pragma pack(4) /* set alignment to 4 bytes boundary */ #define NCCL_MAX_WORK_ELEMENTS 1 -#define NCCL_MAX_GROUPS (NCCL_MAX_WORK_ELEMENTS*2) +#define NCCL_MAX_GROUPS (NCCL_MAX_NTHREADS/WARP_SIZE) /* ncclWork is to be a power of two, currently 8x64 bytes, */ /* to make sure reads to host from the CUDA kernel are aligned. */