TESTS - Check power and frequency support

It is not guaranteed that power can be read or set for some GPUs
(MI300). It is also not guaranteed that frequencies can be set.

As this is not a tool issue - we simply skip the failing test.

Change-Id: I134e96a476040cef513cd924f00e30cd6dea42a5
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>


[ROCm/rocm_smi_lib commit: 5c574ac79c]
Bu işleme şunda yer alıyor:
Galantsev, Dmitrii
2023-09-13 20:48:49 -05:00
işlemeyi yapan: Dmitrii Galantsev
ebeveyn 3707b84f81
işleme f25177840e
5 değiştirilmiş dosya ile 44 ekleme ve 8 silme
+17 -6
Dosyayı Görüntüle
@@ -104,8 +104,7 @@ void TestFrequenciesReadWrite::Run(void) {
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
PrintDeviceHeader(dv_ind);
for (uint32_t clk = (uint32_t)RSMI_CLK_TYPE_FIRST;
clk <= RSMI_CLK_TYPE_LAST; ++clk) {
for (uint32_t clk = RSMI_CLK_TYPE_FIRST; clk <= RSMI_CLK_TYPE_LAST; ++clk) {
rsmi_clk = (rsmi_clk_type)clk;
auto freq_read = [&]() -> bool {
@@ -147,14 +146,18 @@ void TestFrequenciesReadWrite::Run(void) {
std::endl;
}
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask);
//Certain ASICs does not allow to set particular clocks. If set function for a clock returns
//permission error despite root access, manually set ret value to success and return
if (ret == RSMI_STATUS_PERMISSION && geteuid() == 0) {
// Certain ASICs does not allow to set particular clocks. If set function for a clock returns
// permission error despite root access, manually set ret value to success and return
//
// Sometimes setting clock frequencies is completely not supported
if ((ret == RSMI_STATUS_PERMISSION && geteuid() == 0) ||
(ret == RSMI_STATUS_NOT_SUPPORTED)) {
std::cout << "\t**Set " << FreqEnumToStr(rsmi_clk) <<
": Not supported on this machine. Skipping..." << std::endl;
ret = RSMI_STATUS_SUCCESS;
return;
}
CHK_ERR_ASRT(ret)
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -166,12 +169,20 @@ void TestFrequenciesReadWrite::Run(void) {
std::cout << "Resetting mask to all frequencies." << std::endl;
}
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout << "\t**Set " << FreqEnumToStr(rsmi_clk)
<< ": Not supported on this machine. Skipping..." << std::endl;
ret = RSMI_STATUS_SUCCESS;
return;
}
if (ret != RSMI_STATUS_SUCCESS) {
return;
}
ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
if (ret != RSMI_STATUS_SUCCESS) {
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout << "\t**Setting performance level is not supported on this machine. Skipping..." << std::endl;
ret = RSMI_STATUS_SUCCESS;
return;
}
};
+7
Dosyayı Görüntüle
@@ -114,6 +114,13 @@ void TestPowerCapReadWrite::Run(void) {
ret = rsmi_dev_power_cap_get(dv_ind, 0, nullptr);
ASSERT_EQ(ret, RSMI_STATUS_INVALID_ARGS);
// Check if power cap is within the range
// skip the test otherwise
if (orig < min || orig > max) {
std::cout << "Power cap is not within the range. Skipping test for " << dv_ind << std::endl;
continue;
}
new_cap = (max + min)/2;
IF_VERB(STANDARD) {
+10
Dosyayı Görüntüle
@@ -117,6 +117,16 @@ void TestPowerRead::Run(void) {
}
err = rsmi_dev_power_ave_get(i, 0, &val_ui64);
if (err == RSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**Power average information is not supported for this device"
<< std::endl;
// Verify api support checking functionality is working
err = rsmi_dev_power_ave_get(i, 0, nullptr);
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
continue;
}
IF_VERB(STANDARD) {
std::cout << "\t**Average Power Usage: ";
CHK_RSMI_PERM_ERR(err)
+9
Dosyayı Görüntüle
@@ -123,6 +123,15 @@ void TestPowerReadWrite::Run(void) {
PrintDeviceHeader(dv_ind);
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**Power profile presets are not supported for this device"
<< std::endl;
// Verify api support checking functionality is working
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, nullptr);
ASSERT_EQ(ret, RSMI_STATUS_NOT_SUPPORTED);
continue;
}
CHK_ERR_ASRT(ret)
// Verify api support checking functionality is working
+1 -2
Dosyayı Görüntüle
@@ -64,8 +64,7 @@ $BLACKLIST_ALL_ASICS\
# /sys/class/kfd/kfd/topology/nodes/*/properties
FILTER[90400]=\
$BLACKLIST_ALL_ASICS\
"rsmitstReadOnly.TestVoltCurvRead:"\
"rsmitstReadWrite.TestPowerReadWrite"
"rsmitstReadOnly.TestVoltCurvRead"
FILTER[90401]=${FILTER[90400]}
FILTER[90402]=${FILTER[90400]}