diff --git a/projects/rccl/CMakeLists.txt b/projects/rccl/CMakeLists.txt index 4c8bbf5777..36fcedcb1f 100644 --- a/projects/rccl/CMakeLists.txt +++ b/projects/rccl/CMakeLists.txt @@ -44,6 +44,7 @@ option(TRACE "Enable additional tracing" option(FAULT_INJECTION "Enable fault injection" ON) option(QUIET_WARNINGS "Supress compiler warnings" OFF) option(ENABLE_ROCSHMEM "Enable rocSHMEM support in RCCL" OFF) +option(ENABLE_AMDSMI "Enable AMD/ROCm SMI support" ON) # Default GPU architectures to build #================================================================================================== @@ -301,6 +302,7 @@ if(NOT USE_AMDSMI AND ENABLE_AMDSMI) message(STATUS "Found rocm_smi at ${SMI_INCLUDE_DIR}") set(SMI_LIB_NAME "rocm-smi-lib" CACHE INTERNAL "rocm-smi-lib for packaging") set(SMI_LIBRARIES rocm_smi64) + add_definitions("-DUSE_ROCMSMI") check_include_file_cxx("${SMI_INCLUDE_DIR}/rocm_smi/rocm_smi64Config.h" HAVE_ROCM_SMI64CONFIG) diff --git a/projects/rccl/src/rccl_wrap.cc b/projects/rccl/src/rccl_wrap.cc index d15853d08c..ba7e2d8f75 100644 --- a/projects/rccl/src/rccl_wrap.cc +++ b/projects/rccl/src/rccl_wrap.cc @@ -742,7 +742,7 @@ int getFirmwareVersion() { fw_version = info.fw_info_list[0].fw_version; -#else +#elif defined(USE_ROCMSMI) rsmi_status_t ret; ret = rsmi_init(0); if (ret != RSMI_STATUS_SUCCESS) { @@ -755,6 +755,9 @@ int getFirmwareVersion() { ERROR("Could not query firmware info using rocm-smi"); return -1; } +#else + // SMI disabled + fw_version = -1; #endif return fw_version;