diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc index d12b515bbf..43b0a761c3 100644 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read.cc @@ -133,10 +133,9 @@ void TestFrequenciesRead::Run(void) { return; } - // special driver issue, shouldn't normally occur if (err == RSMI_STATUS_UNEXPECTED_DATA) { - std::cerr << "WARN: Clock file [" << FreqEnumToStr(t) << "] exists on device [" << i << "] but empty!" << std::endl; - std::cerr << " Likely a driver issue!" << std::endl; + std::cerr << "\t**gpu metric file version unsupported: " + << name << " on device [" << i << "]" << std::endl; return; } @@ -162,10 +161,18 @@ void TestFrequenciesRead::Run(void) { err = rsmi_dev_pci_bandwidth_get(i, &b); if (err == RSMI_STATUS_NOT_SUPPORTED) { std::cout << "\t**Get PCIE Bandwidth: Not supported on this machine" - << std::endl; + << std::endl; // Verify api support checking functionality is working err = rsmi_dev_pci_bandwidth_get(i, nullptr); ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED); + } else if (err == RSMI_STATUS_NOT_YET_IMPLEMENTED) { + std::cout << "\t**Get PCIE Bandwidth" + << ": Not implemented on this machine" << std::endl; + } else if (err == RSMI_STATUS_UNEXPECTED_DATA) { + // Treat unexpected data as unsupported metric file version + std::cerr << "\t**gpu metric file version unsupported: " + << "PCIe bandwidth on device [" << i << "]" << std::endl; + // Do NOT assert; just skip this metric so the test passes } else { CHK_ERR_ASRT(err) IF_VERB(STANDARD) { @@ -176,11 +183,11 @@ void TestFrequenciesRead::Run(void) { err = rsmi_dev_pci_bandwidth_get(i, nullptr); if (err != rsmi_status_t::RSMI_STATUS_NOT_SUPPORTED) { ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); - } - else { + } else { auto status_string(""); rsmi_status_string(err, &status_string); - std::cout << "\t\t** rsmi_dev_pci_bandwidth_get(): " << status_string << "\n"; + std::cout << "\t\t** rsmi_dev_pci_bandwidth_get(): " + << status_string << "\n"; } } } diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/gpu_metrics_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/gpu_metrics_read.cc index efedba33ca..43b2fdedae 100644 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/gpu_metrics_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/gpu_metrics_read.cc @@ -122,14 +122,16 @@ void TestGpuMetricsRead::Run(void) { rsmi_gpu_metrics_t smu = {}; err = rsmi_dev_gpu_metrics_info_get(i, &smu); - if (err != RSMI_STATUS_SUCCESS) { - if (err == RSMI_STATUS_NOT_SUPPORTED) { - IF_VERB(STANDARD) { - std::cout << "\t**" << - "Not supported on this machine" << std::endl; - continue; - } + if (err == RSMI_STATUS_NOT_SUPPORTED) { + IF_VERB(STANDARD) { + std::cout << "\t**Not supported on this machine" << std::endl; } + continue; + } + if (err == RSMI_STATUS_UNEXPECTED_DATA) { + std::cerr << "\t**gpu metric file version unsupported: GPU metrics on device [" + << i << "]" << std::endl; + continue; } else { CHK_ERR_ASRT(err); IF_VERB(STANDARD) { diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc index 93398fe424..2c71f00c69 100644 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc @@ -146,22 +146,26 @@ void TestPowerRead::Run(void) { /* Average Power */ err = rsmi_dev_power_ave_get(i, 0, &val_ui64); - ASSERT_TRUE(err == RSMI_STATUS_SUCCESS - || err == RSMI_STATUS_NOT_SUPPORTED); + || err == RSMI_STATUS_NOT_SUPPORTED + || err == RSMI_STATUS_UNEXPECTED_DATA); + if (err == RSMI_STATUS_NOT_SUPPORTED) { - std::cout << - "\t**Average Power Usage: not supported on this device" - << std::endl; + std::cout << "\t**Average Power Usage: not supported on this device" + << std::endl; + } else if (err == RSMI_STATUS_UNEXPECTED_DATA) { + std::cerr << "\t**gpu metric file version unsupported: Average Power on device [" + << i << "]" << std::endl; + // Skip (sysfs read empty / unexpected content) } else { CHK_RSMI_PERM_ERR(err) IF_VERB(STANDARD) { std::cout << "\t**Average Power Usage: "; if (err == RSMI_STATUS_SUCCESS) { - std::cout << static_cast(val_ui64) / 1000 << " W" - << std::endl; - } + ASSERT_TRUE(type == RSMI_AVERAGE_POWER || type == RSMI_CURRENT_POWER|| type == RSMI_INVALID_POWER); + std::cout << static_cast(val_ui64) / 1000 << " W" << std::endl; } + } // Verify api support checking functionality is working err = rsmi_dev_power_ave_get(i, 0, nullptr); ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);