From 0b8251289a064d5ede2c0571d5a4d3b7274cb26b Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Sat, 31 Jan 2026 16:42:58 +0000 Subject: [PATCH] feat(rccl): add gfx1151 support --- projects/rccl/CMakeLists.txt | 3 ++- projects/rccl/src/device/common.h | 2 +- projects/rccl/src/graph/tuning.cc | 2 +- projects/rccl/src/include/rccl_float8.h | 2 +- projects/rccl/tools/JitterBench/Common.hpp | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/projects/rccl/CMakeLists.txt b/projects/rccl/CMakeLists.txt index 653feafdee..dcd8ff80a9 100644 --- a/projects/rccl/CMakeLists.txt +++ b/projects/rccl/CMakeLists.txt @@ -58,7 +58,8 @@ set(DEFAULT_GPUS gfx1101 gfx1102 gfx1200 - gfx1201) + gfx1201 + gfx1151) # Load CMake modules #================================================================================================== diff --git a/projects/rccl/src/device/common.h b/projects/rccl/src/device/common.h index 53aca5be92..40e7ae933b 100644 --- a/projects/rccl/src/device/common.h +++ b/projects/rccl/src/device/common.h @@ -26,7 +26,7 @@ { __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); } #endif -#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1200__) || defined(__gfx1201__) #define __trace_hwreg() \ collTrace->data_0 = 0; #else diff --git a/projects/rccl/src/graph/tuning.cc b/projects/rccl/src/graph/tuning.cc index 72498bdbd4..5ce7c0272b 100644 --- a/projects/rccl/src/graph/tuning.cc +++ b/projects/rccl/src/graph/tuning.cc @@ -1019,7 +1019,7 @@ ncclResult_t ncclTopoGetAlgoTime(struct ncclComm* comm, int coll, int algorithm, int rcclGetTuningIndexForArch(const char* gfxarch) { static const std::vector> tuningIndexMap = { {"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5}, - {"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0}, + {"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0}, {"gfx1151", 0}, {"gfx1200", 7}, {"gfx1201", 7} }; if (gfxarch == nullptr) return 0; diff --git a/projects/rccl/src/include/rccl_float8.h b/projects/rccl/src/include/rccl_float8.h index 8ccd4bd517..8fca538f53 100755 --- a/projects/rccl/src/include/rccl_float8.h +++ b/projects/rccl/src/include/rccl_float8.h @@ -41,7 +41,7 @@ typedef struct } rccl_bfloat8; // __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__)) -#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1030__)) +#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1030__)) #include diff --git a/projects/rccl/tools/JitterBench/Common.hpp b/projects/rccl/tools/JitterBench/Common.hpp index bad12a1b6f..116f60a963 100644 --- a/projects/rccl/tools/JitterBench/Common.hpp +++ b/projects/rccl/tools/JitterBench/Common.hpp @@ -43,7 +43,7 @@ THE SOFTWARE. #endif // Macro for collecting HW_REG_HW_ID -#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__NVCC__) #define GetHwId(val) \ val = 0 #else