From 8df2b752cd0a54ee132c853c1955971d705b49da Mon Sep 17 00:00:00 2001 From: Cory Bloor Date: Tue, 6 Jun 2023 16:45:50 -0600 Subject: [PATCH] Fix build on additional architectures (#740) * Fix build on additional architectures Instead of directly wrapping a platform-specific operation with a preprocessor check against a gfx macro, it can be more flexible to check a macro that can be overriden by the user. The gfx macro can then just provide the default value for the macro, resulting in the same default behaviour as if the gfx macro was checked directly but with more control at build-time. For example, to build rccl without using buffer_wbinvl1_vol on gfx902, but still use the default on other archs, a user could export CXXFLAGS='-Xarch_gfx902 -DRCCL_USE_WBINVL1_VOL=1' before configuring the build. This flexibility isn't always necessary, but it's nicer to have it and not need it than to need it and not have it. * Define WARP_SIZE using warpSize builtin [ROCm/rccl commit: b1a65afd58b8b442120ec96990061694365c5e7b] --- projects/rccl/src/collectives/device/prims_ll128.h | 10 +++++++++- projects/rccl/src/include/devcomm.h | 6 +----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/projects/rccl/src/collectives/device/prims_ll128.h b/projects/rccl/src/collectives/device/prims_ll128.h index 48f7796df8..0529464f36 100644 --- a/projects/rccl/src/collectives/device/prims_ll128.h +++ b/projects/rccl/src/collectives/device/prims_ll128.h @@ -12,6 +12,14 @@ #define NCCL_LL128_FLAGTHREAD (NCCL_LL128_LINEELEMS-1) +#ifndef RCCL_USE_WBINVL1_VOL +#if defined(__GFX8__) || defined(__GFX9__) +#define RCCL_USE_WBINVL1_VOL 1 +#else +#define RCCL_USE_WBINVL1_VOL 0 +#endif +#endif + template class Primitives: public PrimitivesWithoutDirect> { @@ -304,7 +312,7 @@ private: } } -#if !defined(__gfx1030__) && !defined(__gfx1100__) && !defined(__gfx1101__) && !defined(__gfx1102__) +#if RCCL_USE_WBINVL1_VOL if (tid == 0) __asm__ __volatile__("buffer_wbinvl1_vol"); #endif /************************ Send **************************/ diff --git a/projects/rccl/src/include/devcomm.h b/projects/rccl/src/include/devcomm.h index c569a39ede..1c80a3f32c 100644 --- a/projects/rccl/src/include/devcomm.h +++ b/projects/rccl/src/include/devcomm.h @@ -53,11 +53,7 @@ union ncclLLFifoLine { int4 i4; }; -#if defined(__gfx1030__) || defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) -#define WARP_SIZE 32 -#else -#define WARP_SIZE 64 -#endif +#define WARP_SIZE warpSize #define MAXCHANNELS 32 #define NCCL_MAX_NTHREADS 256 #define NCCL_SIMPLE_MAX_NTHREADS NCCL_MAX_NTHREADS