Re-enable L128 on gfx90a of compiler supports it (#1036)

[ROCm/rccl commit: 5851ae5974]
This commit is contained in:
Wenkai Du
2024-01-10 08:01:11 -08:00
committed by GitHub
parent c1acf97c05
commit 64cf812da0
2 changed files with 7 additions and 3 deletions
+4
View File
@@ -601,6 +601,10 @@ if(DEMANGLE_DIR)
target_compile_definitions(rccl PRIVATE "HAVE_CPLUS_DEMANGLE=1")
target_compile_definitions(rccl PRIVATE "HAVE_DECL_BASENAME=1")
endif()
if(${hipcc_version_string} VERSION_GREATER_EQUAL "6.1.33591")
target_compile_definitions(rccl PRIVATE ENABLE_LL128)
message(STATUS "RCCL LL128 protocol enabled")
endif()
## Set RCCL compile options
target_compile_options(rccl PRIVATE -parallel-jobs=12)
@@ -32,7 +32,7 @@
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
#endif
#ifdef ENABLE_LL128
#if defined(ENABLE_LL128) && defined(__gfx90a__)
#define NCCL_FUNC5(func, algo, devredop, type, nullify) \
MACRO_IF(nullify, nullptr, NCCL_FUNC_NAME(func, algo, LL, devredop, type)), \
MACRO_IF(nullify, nullptr, NCCL_FUNC_NAME(func, algo, LL128, devredop, type)), \
@@ -571,7 +571,7 @@ __forceinline__ __device__ void ncclKernel(
#if defined(USE_INDIRECT_FUNCTION_CALL) && !defined(__gfx940__) && !defined(__gfx941__) && !defined(__gfx942__)
ncclFuncs[ncclShmem.work.header.funcIndex]();
#else
#ifdef ENABLE_LL128
#if defined(ENABLE_LL128) && defined(__gfx90a__)
NCCL_CALL_FUNCTIONS<1>(ncclShmem.work.header.funcIndex);
#else
NCCL_CALL_FUNCTIONS<0>(ncclShmem.work.header.funcIndex);
@@ -640,7 +640,7 @@ __device__ __attribute__((noinline)) void NCCL_FUNC_NAME(func, algo, proto, dev
#endif
// Only generate inline kernels for LL
#ifdef ENABLE_LL128
#if defined(ENABLE_LL128) && defined(__gfx90a__)
#define IMPL_COLL4(func, algo, devredop, type) \
IMPL_COLL_FUNC(func, algo, LL, devredop, type) \
IMPL_COLL_FUNC(func, algo, LL128, devredop, type) \