feat(rccl): add gfx1151 support

Cette révision appartient à :
Donato Capitella
2026-01-31 16:42:58 +00:00
Parent c19441b2b9
révision 0b8251289a
5 fichiers modifiés avec 6 ajouts et 5 suppressions
+2 -1
Voir le fichier
@@ -58,7 +58,8 @@ set(DEFAULT_GPUS
gfx1101 gfx1101
gfx1102 gfx1102
gfx1200 gfx1200
gfx1201) gfx1201
gfx1151)
# Load CMake modules # Load CMake modules
#================================================================================================== #==================================================================================================
+1 -1
Voir le fichier
@@ -26,7 +26,7 @@
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); } { __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
#endif #endif
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__) #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1200__) || defined(__gfx1201__)
#define __trace_hwreg() \ #define __trace_hwreg() \
collTrace->data_0 = 0; collTrace->data_0 = 0;
#else #else
+1 -1
Voir le fichier
@@ -1019,7 +1019,7 @@ ncclResult_t ncclTopoGetAlgoTime(struct ncclComm* comm, int coll, int algorithm,
int rcclGetTuningIndexForArch(const char* gfxarch) { int rcclGetTuningIndexForArch(const char* gfxarch) {
static const std::vector<std::pair<std::string, int>> tuningIndexMap = { static const std::vector<std::pair<std::string, int>> tuningIndexMap = {
{"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5}, {"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5},
{"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0}, {"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0}, {"gfx1151", 0},
{"gfx1200", 7}, {"gfx1201", 7} {"gfx1200", 7}, {"gfx1201", 7}
}; };
if (gfxarch == nullptr) return 0; if (gfxarch == nullptr) return 0;
+1 -1
Voir le fichier
@@ -41,7 +41,7 @@ typedef struct
} rccl_bfloat8; } rccl_bfloat8;
// __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__)) // __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__))
#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1030__)) #elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1030__))
#include <hip/hip_fp8.h> #include <hip/hip_fp8.h>
+1 -1
Voir le fichier
@@ -43,7 +43,7 @@ THE SOFTWARE.
#endif #endif
// Macro for collecting HW_REG_HW_ID // Macro for collecting HW_REG_HW_ID
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__) #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__NVCC__)
#define GetHwId(val) \ #define GetHwId(val) \
val = 0 val = 0
#else #else