From 42dc44f54dc8391f76f56404d8d6d0e9fb9aed15 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Tue, 18 Feb 2025 14:04:17 +0100 Subject: [PATCH] rsmi_init: Do not complain loudly when no driver is found When librocm-smi is pulled through a dependency, we may end up on a system without actual hardware supported by ROCM, and rsmi_init() failing is actually expected, we do want to frighten the user in such a case. [ROCm/rocm_smi_lib commit: 8ca4207d5c4b8ea10c16a4ecdd1c7423a675fb54] --- projects/rocm-smi-lib/src/rocm_smi.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 604d9e380c..a5190f9d9f 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -485,6 +485,20 @@ rsmi_init(uint64_t flags) { if (smi.ref_count() == 1) { try { smi.Initialize(flags); + } catch(const amd::smi::rsmi_exception& e) { + smi.Cleanup(); + if (e.error_code() == RSMI_INITIALIZATION_ERROR && + !strcmp(e.what(), + "Failed to initialize rocm_smi library (KFD node discovery).")) { + // This system does not actually have ROCM drivers set up + // We were probably just called through dependency, just report the + // error and log without complaining loudly. + std::ostringstream ss; + ss << "Exception caught: " << e.what() << "."; + LOG_INFO(ss); + return RSMI_STATUS_NOT_SUPPORTED; + } + throw amd::smi::rsmi_exception(RSMI_STATUS_INIT_ERROR, __FUNCTION__); } catch(...) { smi.Cleanup(); throw amd::smi::rsmi_exception(RSMI_STATUS_INIT_ERROR, __FUNCTION__);