diff --git a/projects/rccl/CMakeLists.txt b/projects/rccl/CMakeLists.txt index a9930e16d1..513f1e3a85 100644 --- a/projects/rccl/CMakeLists.txt +++ b/projects/rccl/CMakeLists.txt @@ -46,12 +46,7 @@ option(QUIET_WARNINGS "Supress compiler warnings" option(ENABLE_ROCSHMEM "Enable rocSHMEM support in RCCL" OFF) option(ENABLE_AMDSMI "Enable AMD/ROCm SMI support" ON) -if(ENABLE_AMDSMI) - message(STATUS "SMI Support: ENABLED") - add_compile_definitions(RCCL_SMI_ENABLED) -else() - message(STATUS "SMI Support: DISABLED via ENABLE_AMDSMI=OFF") -endif() + # Default GPU architectures to build #================================================================================================== @@ -125,9 +120,23 @@ else() set(SUPPORTED_GPUS ${DEFAULT_GPUS}) endif() +set(GPU_TARGETS "${SUPPORTED_GPUS}") set(GPU_TARGETS "${SUPPORTED_GPUS}") message(STATUS "Compiling for ${GPU_TARGETS}") +# Auto-disable SMI for gfx1151 as it is not supported +if("${GPU_TARGETS}" MATCHES "gfx1151") + message(STATUS "Detected gfx1151 target: Forcing ENABLE_AMDSMI=OFF") + set(ENABLE_AMDSMI OFF CACHE BOOL "Force disable SMI for gfx1151" FORCE) +endif() + +if(ENABLE_AMDSMI) + message(STATUS "SMI Support: ENABLED") + add_compile_definitions(RCCL_SMI_ENABLED) +else() + message(STATUS "SMI Support: DISABLED") +endif() + ## NOTE: Reload rocm-cmake in order to update GPU_TARGETS include(cmake/Dependencies.cmake) # Reloading to use desired GPU_TARGETS instead of defaults diff --git a/projects/rccl/src/include/rocm_smi_wrap.h b/projects/rccl/src/include/rocm_smi_wrap.h index 95d9fff42b..4a12b821a4 100644 --- a/projects/rccl/src/include/rocm_smi_wrap.h +++ b/projects/rccl/src/include/rocm_smi_wrap.h @@ -36,7 +36,6 @@ ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBus ncclResult_t rocm_smi_getDeviceIndexByPciBusId(const char* pciBusId, uint32_t* deviceIndex); ncclResult_t rocm_smi_getLinkInfo(int srcDev, int dstDev, RSMI_IO_LINK_TYPE* rsmi_type, int *hops, int *count); #else -#error "DEBUG_TRACE: SMI is indeed disabled, using inline functions." inline ncclResult_t rocm_smi_init() { return ncclSuccess; } inline ncclResult_t rocm_smi_getNumDevice(uint32_t* num_devs) { *num_devs = 0; return ncclSuccess; } inline ncclResult_t rocm_smi_getDevicePciBusIdString(uint32_t deviceIndex, char* pciBusId, size_t len) { if (len > 0) pciBusId[0] = '\0'; return ncclSuccess; }