From 967e879861c7ed06b5aa160b8bdbfa2e0b1a533d Mon Sep 17 00:00:00 2001 From: "Arif, Maisam" Date: Thu, 19 Jun 2025 15:24:03 -0500 Subject: [PATCH] [SWDEV-538786] Fix ecc counts returning file error (#494) Change-Id: I5cea584289df95e89b6151d549bf69e4c3e50d22 Signed-off-by: Maisam Arif --- rocm_smi/src/rocm_smi.cc | 7 +++++++ tests/amd_smi_test/functional/err_cnt_read.cc | 5 ++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 4e9876b215..ec03a85238 100644 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -763,6 +763,13 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block, if (val_vec.size() < 2 ) ret = RSMI_STATUS_FILE_ERROR; if (ret != RSMI_STATUS_SUCCESS) { + if (ret == RSMI_STATUS_FILE_ERROR) { + ss << __PRETTY_FUNCTION__ << " | ======= end =======" + << ", GetDevValueVec() ret was RSMI_STATUS_FILE_ERROR " + << "-> reporting RSMI_STATUS_NOT_SUPPORTED"; + LOG_ERROR(ss); + return RSMI_STATUS_NOT_SUPPORTED; + } ss << __PRETTY_FUNCTION__ << " | ======= end =======" << ", GetDevValueVec() ret was not RSMI_STATUS_SUCCESS" << " -> reporting " << amd::smi::getRSMIStatusString(ret); diff --git a/tests/amd_smi_test/functional/err_cnt_read.cc b/tests/amd_smi_test/functional/err_cnt_read.cc index d409ae279f..f346b41d2b 100644 --- a/tests/amd_smi_test/functional/err_cnt_read.cc +++ b/tests/amd_smi_test/functional/err_cnt_read.cc @@ -120,7 +120,7 @@ void TestErrCntRead::Run(void) { err = amdsmi_get_gpu_ecc_count(processor_handles_[i], static_cast(b), &ec); - if (err == AMDSMI_STATUS_NOT_SUPPORTED || err == AMDSMI_STATUS_FILE_ERROR) { + if (err == AMDSMI_STATUS_NOT_SUPPORTED) { IF_VERB(STANDARD) { std::cout << "\t**Error Count for " << GetBlockNameStr(static_cast(b)) << @@ -129,8 +129,7 @@ void TestErrCntRead::Run(void) { // Verify api support checking functionality is working err = amdsmi_get_gpu_ecc_count(processor_handles_[i], static_cast(b), nullptr); - ASSERT_TRUE(err == AMDSMI_STATUS_NOT_SUPPORTED - || err == AMDSMI_STATUS_FILE_ERROR); + ASSERT_TRUE(err == AMDSMI_STATUS_NOT_SUPPORTED); } else { CHK_ERR_ASRT(err)