From d0ea73d2a26421f1a10c4a680f33a1d9b7c53c04 Mon Sep 17 00:00:00 2001 From: Charis Poag Date: Wed, 23 Aug 2023 09:14:15 -0500 Subject: [PATCH] Error handling for unset freqs Sending RSMI_STATUS_UNEXPECTED_DATA for drivers which do not set some clock freqs Change-Id: I43a9515c2757dddd412bb25cfd54095e63367030 Signed-off-by: Charis Poag [ROCm/amdsmi commit: f191c2753c5d95d65f916afd4c58e37e74749d05] --- projects/amdsmi/include/rocm_smi/rocm_smi.h | 4 ++++ projects/amdsmi/rocm_smi/example/rocm_smi_example.cc | 10 +++++++++- projects/amdsmi/src/rocm_smi.cc | 10 +++++++--- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/projects/amdsmi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/include/rocm_smi/rocm_smi.h index 6c0e1b9d60..053447b501 100755 --- a/projects/amdsmi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/include/rocm_smi/rocm_smi.h @@ -1468,6 +1468,8 @@ rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id); * written * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * @retval ::RSMI_STATUS_UNEXPECTED_DATA Data read or provided was not as + * expected * */ rsmi_status_t @@ -2370,6 +2372,8 @@ rsmi_status_t rsmi_dev_mem_overdrive_level_get(uint32_t dv_ind, uint32_t *od); * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function with the given arguments * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::RSMI_STATUS_UNEXPECTED_DATA Data read or provided was not as + * expected * */ rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, diff --git a/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc b/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc index 9e9019e2b8..0e78debb91 100755 --- a/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc +++ b/projects/amdsmi/rocm_smi/example/rocm_smi_example.cc @@ -577,7 +577,7 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) { static void print_frequencies(rsmi_frequencies_t *f) { assert(f != nullptr); for (uint32_t j = 0; j < f->num_supported; ++j) { - std::cout << "\t** " << j << ": " << f->frequency[j]; + std::cout << "\t** " << j << ": " << std::to_string(f->frequency[j]); if (j == f->current) { std::cout << " *"; } @@ -777,6 +777,14 @@ int main() { std::cout << f.num_supported << std::endl; print_frequencies(&f); + ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SOC, &f); + CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret) + std::cout << "\t**Supported GPU clock frequencies (SOC clk): "; + std::cout << f.num_supported << std::endl; + std::cout << "\t**Current value (SOC clk): "; + std::cout << f.current << std::endl; + print_frequencies(&f); + std::cout << "\t**Monitor name: "; char name[128]; ret = rsmi_dev_name_get(i, name, 128); diff --git a/projects/amdsmi/src/rocm_smi.cc b/projects/amdsmi/src/rocm_smi.cc index 38c0023466..b3347a155d 100755 --- a/projects/amdsmi/src/rocm_smi.cc +++ b/projects/amdsmi/src/rocm_smi.cc @@ -1065,6 +1065,8 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_ if (f == nullptr) { return RSMI_STATUS_INVALID_ARGS; } + memset(f, 0, sizeof(rsmi_frequencies_t)); + f->current=0; ret = GetDevValueVec(type, dv_ind, &val_vec); if (ret != RSMI_STATUS_SUCCESS) { @@ -1114,6 +1116,7 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_ // assert(f->current < f->num_supported); if (f->current >= f->num_supported) { f->current = -1; + return RSMI_STATUS_UNEXPECTED_DATA; } return RSMI_STATUS_SUCCESS; @@ -1748,7 +1751,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, TRY std::ostringstream ss; - ss << __PRETTY_FUNCTION__ << "| ======= start ======="; + ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); REQUIRE_ROOT_ACCESS DEVICE_MUTEX @@ -3250,8 +3253,9 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) { break; case RSMI_STATUS_UNEXPECTED_DATA: - *status_string = "RSMI_STATUS_UNEXPECTED_DATA: Data (usually from reading" - " a file) was not of the type that was expected"; + *status_string = "RSMI_STATUS_UNEXPECTED_DATA: Data read (usually from " + "a file) or provided to function is " + "not what was expected"; break; case RSMI_STATUS_BUSY: