feat(rccl): add gfx1151 support
Cette révision appartient à :
@@ -58,7 +58,8 @@ set(DEFAULT_GPUS
|
|||||||
gfx1101
|
gfx1101
|
||||||
gfx1102
|
gfx1102
|
||||||
gfx1200
|
gfx1200
|
||||||
gfx1201)
|
gfx1201
|
||||||
|
gfx1151)
|
||||||
|
|
||||||
# Load CMake modules
|
# Load CMake modules
|
||||||
#==================================================================================================
|
#==================================================================================================
|
||||||
|
|||||||
@@ -26,7 +26,7 @@
|
|||||||
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
|
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1200__) || defined(__gfx1201__)
|
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1200__) || defined(__gfx1201__)
|
||||||
#define __trace_hwreg() \
|
#define __trace_hwreg() \
|
||||||
collTrace->data_0 = 0;
|
collTrace->data_0 = 0;
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -1019,7 +1019,7 @@ ncclResult_t ncclTopoGetAlgoTime(struct ncclComm* comm, int coll, int algorithm,
|
|||||||
int rcclGetTuningIndexForArch(const char* gfxarch) {
|
int rcclGetTuningIndexForArch(const char* gfxarch) {
|
||||||
static const std::vector<std::pair<std::string, int>> tuningIndexMap = {
|
static const std::vector<std::pair<std::string, int>> tuningIndexMap = {
|
||||||
{"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5},
|
{"gfx906", 0}, {"gfx908", 0}, {"gfx90a", 0}, {"gfx942", 5},
|
||||||
{"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0},
|
{"gfx950", 6}, {"gfx1030", 0}, {"gfx1100", 0}, {"gfx1102", 0}, {"gfx1151", 0},
|
||||||
{"gfx1200", 7}, {"gfx1201", 7}
|
{"gfx1200", 7}, {"gfx1201", 7}
|
||||||
};
|
};
|
||||||
if (gfxarch == nullptr) return 0;
|
if (gfxarch == nullptr) return 0;
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ typedef struct
|
|||||||
} rccl_bfloat8;
|
} rccl_bfloat8;
|
||||||
|
|
||||||
// __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__))
|
// __cplusplus < 201103L || (!defined(__HIP_PLATFORM_AMD__) && !defined(__HIPCC__))
|
||||||
#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1030__))
|
#elif HIP_VERSION >= 60300000 && !(defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__gfx1030__))
|
||||||
|
|
||||||
#include <hip/hip_fp8.h>
|
#include <hip/hip_fp8.h>
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ THE SOFTWARE.
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Macro for collecting HW_REG_HW_ID
|
// Macro for collecting HW_REG_HW_ID
|
||||||
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__NVCC__)
|
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1151__) || defined(__NVCC__)
|
||||||
#define GetHwId(val) \
|
#define GetHwId(val) \
|
||||||
val = 0
|
val = 0
|
||||||
#else
|
#else
|
||||||
|
|||||||
Référencer dans un nouveau ticket
Bloquer un utilisateur