From f25177840e3eb309ec3a6f2cb610698aa51d5e71 Mon Sep 17 00:00:00 2001 From: "Galantsev, Dmitrii" Date: Wed, 13 Sep 2023 20:48:49 -0500 Subject: [PATCH] TESTS - Check power and frequency support It is not guaranteed that power can be read or set for some GPUs (MI300). It is also not guaranteed that frequencies can be set. As this is not a tool issue - we simply skip the failing test. Change-Id: I134e96a476040cef513cd924f00e30cd6dea42a5 Signed-off-by: Galantsev, Dmitrii [ROCm/rocm_smi_lib commit: 5c574ac79cbf75d89ff6f304b2c0e640c15765c6] --- .../functional/frequencies_read_write.cc | 23 ++++++++++++++----- .../functional/power_cap_read_write.cc | 7 ++++++ .../rocm_smi_test/functional/power_read.cc | 10 ++++++++ .../functional/power_read_write.cc | 9 ++++++++ .../tests/rocm_smi_test/rsmitst.exclude | 3 +-- 5 files changed, 44 insertions(+), 8 deletions(-) diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc index 5ad627cb5f..5060b59208 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/frequencies_read_write.cc @@ -104,8 +104,7 @@ void TestFrequenciesReadWrite::Run(void) { for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) { PrintDeviceHeader(dv_ind); - for (uint32_t clk = (uint32_t)RSMI_CLK_TYPE_FIRST; - clk <= RSMI_CLK_TYPE_LAST; ++clk) { + for (uint32_t clk = RSMI_CLK_TYPE_FIRST; clk <= RSMI_CLK_TYPE_LAST; ++clk) { rsmi_clk = (rsmi_clk_type)clk; auto freq_read = [&]() -> bool { @@ -147,14 +146,18 @@ void TestFrequenciesReadWrite::Run(void) { std::endl; } ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask); - //Certain ASICs does not allow to set particular clocks. If set function for a clock returns - //permission error despite root access, manually set ret value to success and return - if (ret == RSMI_STATUS_PERMISSION && geteuid() == 0) { + // Certain ASICs does not allow to set particular clocks. If set function for a clock returns + // permission error despite root access, manually set ret value to success and return + // + // Sometimes setting clock frequencies is completely not supported + if ((ret == RSMI_STATUS_PERMISSION && geteuid() == 0) || + (ret == RSMI_STATUS_NOT_SUPPORTED)) { std::cout << "\t**Set " << FreqEnumToStr(rsmi_clk) << ": Not supported on this machine. Skipping..." << std::endl; ret = RSMI_STATUS_SUCCESS; return; } + CHK_ERR_ASRT(ret) ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f); if (ret != RSMI_STATUS_SUCCESS) { @@ -166,12 +169,20 @@ void TestFrequenciesReadWrite::Run(void) { std::cout << "Resetting mask to all frequencies." << std::endl; } ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF); + if (ret == RSMI_STATUS_NOT_SUPPORTED) { + std::cout << "\t**Set " << FreqEnumToStr(rsmi_clk) + << ": Not supported on this machine. Skipping..." << std::endl; + ret = RSMI_STATUS_SUCCESS; + return; + } if (ret != RSMI_STATUS_SUCCESS) { return; } ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); - if (ret != RSMI_STATUS_SUCCESS) { + if (ret == RSMI_STATUS_NOT_SUPPORTED) { + std::cout << "\t**Setting performance level is not supported on this machine. Skipping..." << std::endl; + ret = RSMI_STATUS_SUCCESS; return; } }; diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_cap_read_write.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_cap_read_write.cc index 5d57ec0db2..5481996627 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_cap_read_write.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_cap_read_write.cc @@ -114,6 +114,13 @@ void TestPowerCapReadWrite::Run(void) { ret = rsmi_dev_power_cap_get(dv_ind, 0, nullptr); ASSERT_EQ(ret, RSMI_STATUS_INVALID_ARGS); + // Check if power cap is within the range + // skip the test otherwise + if (orig < min || orig > max) { + std::cout << "Power cap is not within the range. Skipping test for " << dv_ind << std::endl; + continue; + } + new_cap = (max + min)/2; IF_VERB(STANDARD) { diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc index 02ec355b46..a18cd70676 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read.cc @@ -117,6 +117,16 @@ void TestPowerRead::Run(void) { } err = rsmi_dev_power_ave_get(i, 0, &val_ui64); + if (err == RSMI_STATUS_NOT_SUPPORTED) { + std::cout << + "\t**Power average information is not supported for this device" + << std::endl; + + // Verify api support checking functionality is working + err = rsmi_dev_power_ave_get(i, 0, nullptr); + ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED); + continue; + } IF_VERB(STANDARD) { std::cout << "\t**Average Power Usage: "; CHK_RSMI_PERM_ERR(err) diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read_write.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read_write.cc index dc18cb6de0..1040716ad0 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read_write.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/power_read_write.cc @@ -123,6 +123,15 @@ void TestPowerReadWrite::Run(void) { PrintDeviceHeader(dv_ind); ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status); + if (ret == RSMI_STATUS_NOT_SUPPORTED) { + std::cout << + "\t**Power profile presets are not supported for this device" + << std::endl; + // Verify api support checking functionality is working + ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, nullptr); + ASSERT_EQ(ret, RSMI_STATUS_NOT_SUPPORTED); + continue; + } CHK_ERR_ASRT(ret) // Verify api support checking functionality is working diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/rsmitst.exclude b/projects/rocm-smi-lib/tests/rocm_smi_test/rsmitst.exclude index 0632242f93..d87c409421 100644 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/rsmitst.exclude +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/rsmitst.exclude @@ -64,8 +64,7 @@ $BLACKLIST_ALL_ASICS\ # /sys/class/kfd/kfd/topology/nodes/*/properties FILTER[90400]=\ $BLACKLIST_ALL_ASICS\ -"rsmitstReadOnly.TestVoltCurvRead:"\ -"rsmitstReadWrite.TestPowerReadWrite" +"rsmitstReadOnly.TestVoltCurvRead" FILTER[90401]=${FILTER[90400]} FILTER[90402]=${FILTER[90400]}