From bbcff5221d496e0377a4ef02299e7f4d46553700 Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Wed, 8 May 2024 03:48:34 -0500 Subject: [PATCH] SWDEV-458102 - Updates to pp_od_clk_voltage parsing Signed-off-by: Maisam Arif Change-Id: I650dae1a99856dcde914fe66917cf9111f3ce0e2 [ROCm/amdsmi commit: 7d999aa34c1299b71f9f840110d8ef6e5ec6f67e] --- projects/amdsmi/CHANGELOG.md | 17 +- projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 30 +- projects/amdsmi/include/amd_smi/amdsmi.h | 25 +- projects/amdsmi/py-interface/README.md | 29 +- .../rocm_smi/include/rocm_smi/rocm_smi.h | 3 +- .../include/rocm_smi/rocm_smi_utils.h | 336 +++++++++++++++++- projects/amdsmi/rocm_smi/src/rocm_smi.cc | 199 ++++++----- .../amdsmi/rocm_smi/src/rocm_smi_utils.cc | 3 +- .../tests/amd_smi_test/amdsmitst.exclude | 2 +- .../functional/mutual_exclusion.cc | 8 +- .../functional/perf_determinism.cc | 4 +- .../functional/volt_freq_curv_read.cc | 35 +- 12 files changed, 547 insertions(+), 144 deletions(-) diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index fc527e05df..1169674e4e 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -61,7 +61,19 @@ GPU PCIE_BW 0 26 Mb/s ``` -### Changes +- **Updated CLI voltage curve command output**. +The output for `amd-smi metric --voltage-curve` now splits the frequency and voltage output by curve point or outputs N/A if not applicable + +```shell +GPU: 0 + VOLTAGE_CURVE: + POINT_0_FREQUENCY: 872 Mhz + POINT_0_VOLTAGE: 736 mV + POINT_1_FREQUENCY: 1354 Mhz + POINT_1_VOLTAGE: 860 mV + POINT_2_FREQUENCY: 1837 Mhz + POINT_2_VOLTAGE: 1186 mV +``` - **Updated `amdsmi_get_gpu_board_info()` now has larger structure sizes for `amdsmi_board_info_t`**. Updated sizes that work for retreiving relavant board information across AMD's @@ -121,6 +133,9 @@ garbage output to users using the API. This fix intends to populate as many valu Then any failure(s) found along the way, `\0` is provided to `amdsmi_board_info_t` structures data members which cannot be populated. Ensuring empty char string values. +- **Fixed parsing of `pp_od_clk_voltage` within `amdsmi_get_gpu_od_volt_info`**. +The parsing of `pp_od_clk_voltage` was not dynamic enough to work with the dropping of voltage curve support on MI series cards. This propagates down to correcting the CLI's output `amd-smi metric --voltage-curve` to N/A if voltage curve is not enabled. + - **Fixed `amd-smi metric --power` now provides power output for Navi2x/Navi3x/MI1x**. These systems use an older version of gpu_metrics in amdgpu. This fix only updates what CLI outputs. No change in any of our APIs. diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 460a0f84d3..9ec0b5fa02 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1792,17 +1792,39 @@ class AMDSMICommands(): if args.voltage_curve: try: od_volt = amdsmi_interface.amdsmi_get_gpu_od_volt_info(args.gpu) + logging.debug(f"OD Voltage info: {od_volt}") + # Populate N/A values per voltage point voltage_point_dict = {} + for point in range(amdsmi_interface.AMDSMI_NUM_VOLTAGE_CURVE_POINTS): + voltage_point_dict[f'point_{point}_frequency'] = "N/A" + voltage_point_dict[f'point_{point}_voltage'] = "N/A" - for point in range(3): + # Populate voltage point values + for point in range(amdsmi_interface.AMDSMI_NUM_VOLTAGE_CURVE_POINTS): if isinstance(od_volt, dict): + logging.debug(f"point_{point} frequency: {od_volt['curve.vc_points'][point].frequency}") + logging.debug(f"point_{point} voltage: {od_volt['curve.vc_points'][point].voltage}") frequency = int(od_volt["curve.vc_points"][point].frequency / 1000000) voltage = int(od_volt["curve.vc_points"][point].voltage) else: - frequency = 0 - voltage = 0 - voltage_point_dict[f'voltage_point_{point}'] = f"{frequency} Mhz {voltage} mV" + frequency = "N/A" + voltage = "N/A" + + if frequency == 0: + frequency = "N/A" + + if voltage == 0: + voltage = "N/A" + + if frequency != "N/A": + frequency = self.helpers.unit_format(self.logger, frequency, "Mhz") + + if voltage != "N/A": + voltage = self.helpers.unit_format(self.logger, voltage, "mV") + + voltage_point_dict[f'point_{point}_frequency'] = frequency + voltage_point_dict[f'point_{point}_voltage'] = voltage values_dict['voltage_curve'] = voltage_point_dict except amdsmi_exception.AmdSmiLibraryException as e: diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index d80f3260ca..0a718aed66 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -1255,7 +1255,7 @@ typedef struct { typedef struct { amdsmi_range_t curr_sclk_range; //!< The current SCLK frequency range amdsmi_range_t curr_mclk_range; //!< The current MCLK frequency range; - //!< (upper bound only) + //!< (upper bound only) amdsmi_range_t sclk_freq_limits; //!< The range possible of SCLK values amdsmi_range_t mclk_freq_limits; //!< The range possible of MCLK values @@ -2967,8 +2967,9 @@ amdsmi_status_t amdsmi_get_clk_freq(amdsmi_processor_handle processor_handle, amdsmi_status_t amdsmi_reset_gpu(amdsmi_processor_handle processor_handle); /** - * @brief This function retrieves the voltage/frequency curve information. It is - * not supported on virtual machine guest + * @brief This function retrieves the overdrive GFX & MCLK information. If valid + * for the GPU it will also populate the voltage curve data. It is not supported + * on virtual machine guest * * @platform{gpu_bm_linux} * @@ -3054,14 +3055,14 @@ amdsmi_status_t amdsmi_get_gpu_metrics_info(amdsmi_processor_handle processor_ha * @param[inout] num_of_metrics a pointer to uint32_t to which the number of * metrics is allocated for pm_metrics array as input, and the number of metrics retreived * as output. If this parameter is NULL, this function will return - * ::AMDSMI_STATUS_INVALID_ARGS if the function is supported with the provided, + * ::AMDSMI_STATUS_INVAL if the function is supported with the provided, * arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the * provided arguments. * * @retval ::AMDSMI_STATUS_SUCCESS call was successful * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function with the given arguments - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * */ amdsmi_status_t amdsmi_get_gpu_pm_metrics_info( @@ -3092,14 +3093,14 @@ amdsmi_status_t amdsmi_get_gpu_pm_metrics_info( * @param[inout] num_of_metrics a pointer to uint32_t to which the number of * metrics is allocated for reg_metrics array as input, and the number of metrics retreived * as output. If this parameter is NULL, this function will return - * ::AMDSMI_STATUS_INVALID_ARGS if the function is supported with the provided, + * ::AMDSMI_STATUS_INVAL if the function is supported with the provided, * arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the * provided arguments. * * @retval ::AMDSMI_STATUS_SUCCESS call was successful * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function with the given arguments - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * */ amdsmi_status_t amdsmi_get_gpu_reg_table_info( @@ -3504,7 +3505,7 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process * @platform{gpu_bm_linux} @platform{guest_1vf} * * @details Given a processor handle @p processor_handle, and a sclean flag @p sclean, - * this function will clear the SRAM data of this processor. This can be called between + * this function will clear the SRAM data of this processor. This can be called between * user logins to prevent information leak. * * @note This function requires root access @@ -4222,7 +4223,7 @@ amdsmi_is_P2P_accessible(amdsmi_processor_handle processor_handle_src, * , suggested length is 4 or greater. * * @retval ::AMDSMI_STATUS_SUCCESS call was successful - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * @retval ::AMDSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function @@ -4252,7 +4253,7 @@ amdsmi_get_gpu_compute_partition(amdsmi_processor_handle processor_handle, * * @retval ::AMDSMI_STATUS_SUCCESS call was successful * @retval ::AMDSMI_STATUS_PERMISSION function requires root access - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * @retval ::AMDSMI_STATUS_SETTING_UNAVAILABLE the provided setting is * unavailable for current device * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not @@ -4312,7 +4313,7 @@ amdsmi_status_t amdsmi_reset_gpu_compute_partition(amdsmi_processor_handle proce * suggested length is 5 or greater. * * @retval ::AMDSMI_STATUS_SUCCESS call was successful - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * @retval ::AMDSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function @@ -4341,7 +4342,7 @@ amdsmi_get_gpu_memory_partition(amdsmi_processor_handle processor_handle, * * @retval ::AMDSMI_STATUS_SUCCESS call was successful * @retval ::AMDSMI_STATUS_PERMISSION function requires root access - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * @retval ::AMDSMI_STATUS_INVAL the provided arguments are not valid * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not * support this function * @retval ::AMDSMI_STATUS_AMDGPU_RESTART_ERR could not successfully restart diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index 1d1d9cd5c6..c54f711339 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -1224,7 +1224,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_pci_bandwidth(device, 0) + amdsmi_set_gpu_pci_bandwidth(device, 0) except AmdSmiException as e: print(e) ``` @@ -1620,7 +1620,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_od_clk_info( + amdsmi_set_gpu_od_clk_info( device, AmdSmiFreqInd.AMDSMI_FREQ_IND_MAX, 1000, @@ -1691,7 +1691,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_od_volt_info(device, 1, 1000, 980) + amdsmi_set_gpu_od_volt_info(device, 1, 1000, 980) except AmdSmiException as e: print(e) ``` @@ -2168,15 +2168,16 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_clk_freq(device, AmdSmiClkType.SYS) + amdsmi_get_clk_freq(device, AmdSmiClkType.SYS) except AmdSmiException as e: print(e) ``` ### amdsmi_get_gpu_od_volt_info -Description: This function retrieves the voltage/frequency curve information -It is not supported on virtual machine guest +Description: This function retrieves the voltage/frequency curve information. +If the num_regions is 0 then the voltage curve is not supported. +It is not supported on virtual machine guest. Input parameters: @@ -2190,8 +2191,8 @@ Field | Description `curr_mclk_range` |
Subfield Description
`lower_bound`lower bound mclk range
`upper_bound`upper bound mclk range
`sclk_freq_limits` |
Subfield Description
`lower_bound`lower bound sclk range limt
`upper_bound`upper bound sclk range limit
`mclk_freq_limits` |
Subfield Description
`lower_bound`lower bound mclk range limit
`upper_bound`upper bound mclk range limit
-`curve.vc_points` | The number of supported frequencies -`num_regions` | The current frequency index +`curve.vc_points` | List of voltage curve points +`num_regions` | The number of voltage curve regions Exceptions that can be thrown by `amdsmi_get_gpu_od_volt_info` function: @@ -2208,7 +2209,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_od_volt_info(dev) + amdsmi_get_gpu_od_volt_info(dev) except AmdSmiException as e: print(e) ``` @@ -2300,7 +2301,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_metrics_info(dev) + amdsmi_get_gpu_metrics_info(dev) except AmdSmiException as e: print(e) ``` @@ -2337,7 +2338,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_od_volt_curve_regions(device, 3) + amdsmi_get_gpu_od_volt_curve_regions(device, 3) except AmdSmiException as e: print(e) ``` @@ -2375,7 +2376,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_get_gpu_power_profile_presets(device, 0) + amdsmi_get_gpu_power_profile_presets(device, 0) except AmdSmiException as e: print(e) ``` @@ -2604,7 +2605,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_perf_level(device, AmdSmiDevPerfLevel.STABLE_PEAK) + amdsmi_set_gpu_perf_level(device, AmdSmiDevPerfLevel.STABLE_PEAK) except AmdSmiException as e: print(e) ``` @@ -2907,7 +2908,7 @@ try: print("No GPUs on machine") else: for device in devices: - amdsmi_set_gpu_overdrive_level(device, 0) + amdsmi_set_gpu_overdrive_level(device, 0) except AmdSmiException as e: print(e) ``` diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h index 8d2c3668e0..9be5ee1e3b 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h @@ -3451,8 +3451,7 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, uint32_t pisolate); /** - * @brief Clear the GPU SRAM data - * + * @brief Clear the GPU SRAM data * * @details Given a device index @p dv_ind, this function will clear the * GPU SRAM data of this device. This can be called between user logins to prevent information leak. diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h index 67d9d8b8d8..75ba165a45 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h @@ -45,14 +45,17 @@ #include -#include +#include #include -#include -#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include #include "rocm_smi/rocm_smi_device.h" @@ -125,13 +128,33 @@ std::string print_rsmi_od_volt_freq_regions(uint32_t num_regions, bool is_sudo_user(); rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind, std::string *gfx_version); + +std::string leftTrim(const std::string &s); +std::string rightTrim(const std::string &s); +std::string trim(const std::string &s); +std::string removeNewLines(const std::string &s); + +std::string removeString(const std::string origStr, + const std::string &removeMe); template - std::string print_int_as_hex(T i, bool showHexNotation = true) { + std::string print_int_as_hex(T i, bool showHexNotation = true, + int overloadBitSize = 0) { std::stringstream ss; if (showHexNotation) { - ss << "0x" << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex; + if (overloadBitSize == 0) { + ss << "0x" << std::hex << std::setw(sizeof(T) * 2) << std::setfill('0'); + } else { + // 8 bits per 1 byte + int byteSize = (overloadBitSize / 8) * 2; + ss << "0x" << std::hex << std::setw(byteSize) << std::setfill('0'); + } } else { - ss << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex; + if (overloadBitSize == 0) { + ss << std::hex << std::setw(sizeof(T) * 2) << std::setfill('0'); + } else { + int byteSize = (overloadBitSize / 8) * 2; + ss << std::hex << std::setw(byteSize) << std::setfill('0'); + } } if (std::is_same::value) { @@ -162,7 +185,8 @@ std::string print_unsigned_hex_and_int(T i, std::string heading="") { } ss << "Hex (MSB): " << print_int_as_hex(i) << ", " << "Unsigned int: " << print_unsigned_int(i) << ", " - << "Byte Size: " << sizeof(T); + << "Byte Size: " << sizeof(T) << ", " + << "Bits: " << sizeof(T) * 8; // 8 bits per 1 byte return ss.str(); } @@ -283,8 +307,298 @@ class ScopedAcquire { // In VM environment, the /proc/cpuinfo set hypervisor flag by default bool is_vm_guest(); -// trim a string -std::string trim(const std::string &s); + +// +enum class TagSplitterPositional_t +{ + kFIRST, + kBETWEEN, + kLAST, + kNONE, +}; + +template +class TagTextContents_t +{ + public: + using TextLines_t = std::vector; + using PrimaryList_t = std::vector; + using SecondaryList_t = std::vector; + using PrimaryKeyTbl_t = std::map; + using SecondaryKeyTbl_t = std::map; + using StructuredKeysTbl_t = std::map>; + + // + TagTextContents_t() = default; + TagTextContents_t(const TagTextContents_t&) = delete; + TagTextContents_t(TagTextContents_t&&) = delete; + TagTextContents_t& operator=(const TagTextContents_t&) = delete; + TagTextContents_t& operator=(TagTextContents_t&&) = delete; + + explicit TagTextContents_t(const TextLines_t& text_content) + : m_text_content(text_content) {} + + TagTextContents_t& set_text_content(const TextLines_t& text_content) + { + m_text_content = text_content; + } + + TagTextContents_t& set_title_terminator(const std::string& title_mark, + TagSplitterPositional_t title_mark_position) { + m_title_mark = title_mark; + m_title_mark_position = title_mark_position; + + return *this; + } + + TagTextContents_t& set_key_data_splitter(const std::string& line_splitter_mark, + TagSplitterPositional_t line_mark_position) { + m_line_splitter_mark = line_splitter_mark; + m_line_mark_position = line_mark_position; + + return *this; + } + + TagTextContents_t& structure_content() { + // Sanitizes the content. + if (!m_text_content.empty()) { + std::for_each(m_text_content.begin(), m_text_content.end(), trim); + section_title_lookup(); + section_data_lookup(); + } + + return *this; + } + + decltype(auto) get_title_size() { + return m_primary.size(); + } + + decltype(auto) get_structured_subkeys_size(const PrimaryKeyType& prim_key) { + return m_structured[prim_key].size(); + } + + decltype(auto) contains_title_key(const PrimaryKeyType& key) { + return (m_primary.find(key) != m_primary.end()); + } + + decltype(auto) contains_structured_key(const PrimaryKeyType& prim_key, + const SecondaryKeyType& sec_key) { + if (auto first_key_itr = m_structured.find(prim_key); + first_key_itr != m_structured.end()) { + if (auto sec_key_itr = first_key_itr->second.find(sec_key); + sec_key_itr != first_key_itr->second.end()) { + return true; + } + } + + return false; + } + + decltype(auto) get_structured_value_by_keys(const PrimaryKeyType& prim_key, + const SecondaryKeyType& sec_key, + bool is_value_id = true) { + if (auto first_key_itr = m_structured.find(prim_key); + first_key_itr != m_structured.end()) { + if (auto sec_key_itr = first_key_itr->second.find(sec_key); + sec_key_itr != first_key_itr->second.end()) { + SecondaryDataType key_value{}; + if (is_value_id) { + key_value = SecondaryDataType(sec_key_itr->first) + " "; + } + key_value += sec_key_itr->second; + return key_value; + } + } + + return SecondaryDataType{}; + } + + decltype(auto) get_structured_data_subkey_by_position(const PrimaryKeyType& prim_key, + uint32_t key_position) { + auto key_counter = uint32_t(0); + SecondaryKeyType data_key{}; + if (key_position < (get_structured_subkeys_size(prim_key))) { + for (const auto& [sec_key, sec_value] : m_structured[prim_key]) { + if (key_counter == key_position) { + data_key = static_cast(sec_key); + return data_key; + } + ++key_counter; + } + } + + return data_key; + } + + decltype(auto) get_structured_data_subkey_first(const PrimaryKeyType& prim_key) { + return (get_structured_value_by_keys(prim_key, + get_structured_data_subkey_by_position(prim_key, 0))); + } + + decltype(auto) get_structured_data_subkey_last(const PrimaryKeyType& prim_key) { + return (get_structured_value_by_keys(prim_key, get_structured_data_subkey_by_position(prim_key, + (get_structured_subkeys_size(prim_key) - 1)))); + } + + void reset() { + m_text_content.clear(); + m_primary.clear(); + m_structured.clear(); + m_title_mark.clear(); + m_line_splitter_mark.clear(); + m_title_mark_position = TagSplitterPositional_t::kNONE; + m_line_mark_position = TagSplitterPositional_t::kNONE; + } + + decltype(auto) dump_structured_content() { + std::ostringstream ostrstream; + ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======" << "\n"; + ostrstream << "** Primary Table **" << "\n"; + for (const auto& [key, values] : m_primary) { + ostrstream << "key: " << key << " values: " << values.size() << "\n"; + for (const auto& value : values) { + ostrstream << "\t value: " << value << "\n"; + } + } + + ostrstream << "\n ** Structured Table **" << "\n"; + for (const auto& [prim_key, prim_values] : m_structured) { + ostrstream << "key: " << prim_key << "\n"; + for (const auto& [sec_key, sec_value] : prim_values) { + ostrstream << "\t key: " << sec_key << " -> " << sec_value << "\n"; + } + } + ostrstream << "\n\n"; + + return ostrstream.str(); + } + + + private: + TextLines_t m_text_content; + PrimaryKeyTbl_t m_primary; + StructuredKeysTbl_t m_structured; + std::string m_title_mark; + std::string m_line_splitter_mark; + TagSplitterPositional_t m_title_mark_position; + TagSplitterPositional_t m_line_mark_position; + + // + // Note: Organizes table with Title as a Key, and a list of values. + // + decltype(auto) section_title_lookup() { + if (m_title_mark.empty() || + m_title_mark_position == TagSplitterPositional_t::kNONE) { + return; + } + + // + // Note: + // - top_title_line: Left pointer for the sliding window + // - bottom_title_line: Right pointer for the sliding window + // + auto top_title_line = uint32_t(std::numeric_limits::max()); + auto bottom_title_line = uint32_t(std::numeric_limits::max()); + auto line_counter = uint32_t(0); + + // + // Note: This whole interval/window where the section/title starts, and where it ends. + // + auto update_primary_tbl = [&](const uint32_t& from_line, const uint32_t& to_line) { + auto key = static_cast(m_text_content[from_line]); + for (auto line_num(from_line + 1); line_num < to_line; ++line_num) { + if ((line_num < m_text_content.size()) && !m_text_content[line_num].empty()) { + m_primary[key].push_back(m_text_content[line_num]); + } + } + }; + + auto adjust_sliding_window = [&](const uint32_t& title_line) { + // First time top_title_line gets adjusted. + if (top_title_line == uint32_t(std::numeric_limits::max())) { + top_title_line = title_line; + bottom_title_line = top_title_line; + return; + } + if (title_line > bottom_title_line) { + bottom_title_line = title_line; + update_primary_tbl(top_title_line, bottom_title_line); + top_title_line = bottom_title_line; + } + }; + + for (const auto& line : m_text_content) { + auto was_title_found{false}; + switch (m_title_mark_position) { + case TagSplitterPositional_t::kFIRST: + // Section/Title Mark was found at the first position + if (line.find_first_of(m_title_mark.c_str()) == 0) { + was_title_found = true; + } + break; + + case TagSplitterPositional_t::kLAST: + // Section/Title Mark was found at the last position + if ((line.find_last_of(m_title_mark.c_str()) + 1) == line.size()) { + was_title_found = true; + } + break; + + default: + break; + } + + if (was_title_found) { + adjust_sliding_window(line_counter); + } + ++line_counter; + } + + // Any remaining elements? If so, the data belongs to the last found section title + if (line_counter > bottom_title_line) { + update_primary_tbl(bottom_title_line, line_counter); + } + } + + decltype(auto) section_data_lookup() { + if (m_line_splitter_mark.empty() || + m_line_mark_position == TagSplitterPositional_t::kNONE) { + return; + } + + // + // Note: Organizes table with Title as a Key, a Key/ID for values and values. + // It takes into consideration the initial constraints were all good and + // that the primary table has been populated. + auto sec_key = std::string(); + auto sec_data = std::string(); + auto auto_key = uint32_t(0); + for (const auto& [prim_key, prim_values] : m_primary) { + for (const auto& value : prim_values) { + if (auto mark_pos = value.find_first_of(m_line_splitter_mark.c_str()); + mark_pos != std::string::npos) { + sec_key = trim(value.substr(0, mark_pos + 1)); + sec_data = trim(value.substr((mark_pos + 1), value.size())); + } + // In case there is no 'key' based on the data token marker, generate one. + else { + sec_key = std::to_string(auto_key) + m_line_splitter_mark; + sec_data = trim(value.substr(0, value.size())); + ++auto_key; + } + if (!sec_key.empty()) { + m_structured[prim_key].insert(std::make_pair(sec_key, sec_data)); + } + } + } + } +}; + +using TextFileTagContents_t = TagTextContents_t; + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi.cc b/projects/amdsmi/rocm_smi/src/rocm_smi.cc index dd8e903328..b545fa1973 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi.cc @@ -1415,17 +1415,6 @@ For the new format, GFXCLK field will show min and max values(0/1). If the curre frequency in neither min/max but lies within the range, this is indicated by an additional value followed by * at index 1 and max value at index 2. */ -constexpr uint32_t kOD_SCLK_label_array_index = 0; -constexpr uint32_t kOD_MCLK_label_array_index = - kOD_SCLK_label_array_index + 3; -constexpr uint32_t kOD_VDDC_CURVE_label_array_index = - kOD_MCLK_label_array_index + 2; -constexpr uint32_t kOD_OD_RANGE_label_array_index = - kOD_VDDC_CURVE_label_array_index + 4; -constexpr uint32_t kOD_VDDC_CURVE_start_index = - kOD_OD_RANGE_label_array_index + 3; -// constexpr uint32_t kOD_VDDC_CURVE_num_lines = -// kOD_VDDC_CURVE_start_index + 4; constexpr uint32_t kMIN_VALID_LINES = 2; static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, @@ -1450,62 +1439,95 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, return RSMI_STATUS_NOT_YET_IMPLEMENTED; } - assert(val_vec[kOD_SCLK_label_array_index] == "OD_SCLK:" || - val_vec[kOD_SCLK_label_array_index] == "GFXCLK:"); - if ((val_vec[kOD_SCLK_label_array_index] != "OD_SCLK:") && - (val_vec[kOD_SCLK_label_array_index] != "GFXCLK:")) { - return RSMI_STATUS_UNEXPECTED_DATA; + // Tags expected in this file + const std::string kTAG_OD_SCLK{"OD_SCLK:"}; + const std::string KTAG_OD_MCLK{"OD_MCLK:"}; + const std::string kTAG_GFXCLK{"GFXCLK:"}; + const std::string KTAG_MCLK{"MCLK:"}; + const std::string KTAG_SCLK{"SCLK:"}; + const std::string KTAG_OD_RANGE{"OD_RANGE:"}; + const std::string KTAG_OD_VDDGFX_OFFSET{"OD_VDDGFX_OFFSET:"}; + const std::string KTAG_FIRST_FREQ_IDX{"0:"}; + + amd::smi::TextFileTagContents_t txt_power_dev_od_voltage(val_vec); + txt_power_dev_od_voltage + .set_title_terminator(":", amd::smi::TagSplitterPositional_t::kLAST) + .set_key_data_splitter(":", amd::smi::TagSplitterPositional_t::kBETWEEN) + .structure_content(); + + // + // Note: We must have minimum of 'GFXCLK:' && 'MCLK:' OR: + // 'OD_SCLK:' && 'OD_MCLK:' tags. + if (txt_power_dev_od_voltage.get_title_size() < kMIN_VALID_LINES) { + return rsmi_status_t::RSMI_STATUS_NO_DATA; } - - // find last_item but skip empty lines - int last_item = val_vec.size()-1; - while (val_vec[last_item].empty() || val_vec[last_item][0] == 0) - last_item--; - - p->curr_sclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 1); - p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 2); - - if (val_vec.size() < (kOD_MCLK_label_array_index + 1)) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } - // The condition below checks if it is the old style or new style format. - if (val_vec[kOD_MCLK_label_array_index] == "OD_MCLK:") { - p->curr_mclk_range.lower_bound = 0; - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 1); - } else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") { - p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 1); - // the upper memory frequency is the last - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, last_item); - return RSMI_STATUS_SUCCESS; - } else { - if (val_vec.size() < (kOD_MCLK_label_array_index + 3)) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } - if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") { - p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 3); - p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 2); - // the upper memory frequency is the last - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, last_item); - return RSMI_STATUS_SUCCESS; - } - return RSMI_STATUS_NOT_YET_IMPLEMENTED; + // Note: For debug builds/purposes only. + assert(txt_power_dev_od_voltage.contains_title_key(kTAG_GFXCLK) || + txt_power_dev_od_voltage.contains_title_key(kTAG_OD_SCLK)); + // Note: For release builds/purposes. + if (!txt_power_dev_od_voltage.contains_title_key(kTAG_GFXCLK) && + !txt_power_dev_od_voltage.contains_title_key(kTAG_OD_SCLK)) { + return rsmi_status_t::RSMI_STATUS_UNEXPECTED_DATA; } - if (val_vec.size() < kOD_VDDC_CURVE_label_array_index) { - return RSMI_STATUS_UNEXPECTED_SIZE; + // Note: Quick helpers for getting 1st and last elements found + auto build_lower_bound = [&](const std::string& prim_key) { + auto lower_bound_data = txt_power_dev_od_voltage.get_structured_data_subkey_first(prim_key); + return std::vector{lower_bound_data}; + }; + + auto build_upper_bound = [&](const std::string& prim_key) { + auto upper_bound_data = txt_power_dev_od_voltage.get_structured_data_subkey_last(prim_key); + return std::vector{upper_bound_data}; + }; + + // Validates 'OD_SCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(kTAG_OD_SCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_sclk_range.lower_bound = freq_string_to_int(build_lower_bound(kTAG_OD_SCLK), nullptr, nullptr, 0); + p->curr_sclk_range.upper_bound = freq_string_to_int(build_upper_bound(kTAG_OD_SCLK), nullptr, nullptr, 0); + + // Validates 'OD_MCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_MCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_mclk_range.lower_bound = freq_string_to_int(build_lower_bound(KTAG_OD_MCLK), nullptr, nullptr, 0); + p->curr_mclk_range.upper_bound = freq_string_to_int(build_upper_bound(KTAG_OD_MCLK), nullptr, nullptr, 0); + } + + // Validates 'OD_RANGE' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_RANGE, + KTAG_SCLK)) { + od_value_pair_str_to_range(txt_power_dev_od_voltage + .get_structured_value_by_keys(KTAG_OD_RANGE, KTAG_SCLK), + &p->sclk_freq_limits); + } + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_RANGE, + KTAG_MCLK)) { + od_value_pair_str_to_range(txt_power_dev_od_voltage + .get_structured_value_by_keys(KTAG_OD_RANGE, KTAG_MCLK), + &p->mclk_freq_limits); + } + } + // Validates 'GFXCLK' is in the structure + else if (txt_power_dev_od_voltage.contains_structured_key(kTAG_GFXCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_sclk_range.lower_bound = freq_string_to_int(build_lower_bound(kTAG_GFXCLK), nullptr, nullptr, 0); + p->curr_sclk_range.upper_bound = freq_string_to_int(build_upper_bound(kTAG_GFXCLK), nullptr, nullptr, 0); + + // Validates 'MCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_MCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_mclk_range.lower_bound = freq_string_to_int(build_lower_bound(KTAG_MCLK), nullptr, nullptr, 0); + p->curr_mclk_range.upper_bound = freq_string_to_int(build_upper_bound(KTAG_MCLK), nullptr, nullptr, 0); + } + } + else { + return RSMI_STATUS_NOT_YET_IMPLEMENTED; } - p->num_regions = - static_cast((val_vec.size()) / 2); + // Note: No curve entries. + p->num_regions = 0; return RSMI_STATUS_SUCCESS; CATCH @@ -1674,28 +1696,34 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, } -static void get_vc_region(uint32_t start_ind, - std::vector *val_vec, rsmi_freq_volt_region_t *p) { +static void get_vc_region(const std::vector& val_vec, rsmi_freq_volt_region_t& p) +{ std::ostringstream ss; ss << __PRETTY_FUNCTION__ << " | ======= start ======="; LOG_TRACE(ss); - assert(p != nullptr); - assert(val_vec != nullptr); - THROW_IF_NULLPTR_DEREF(p) - THROW_IF_NULLPTR_DEREF(val_vec) - // There must be at least 1 region to read in - assert(val_vec->size() >= kOD_OD_RANGE_label_array_index + 2); - assert((*val_vec)[kOD_OD_RANGE_label_array_index] == "OD_RANGE:"); - if ((val_vec->size() < kOD_OD_RANGE_label_array_index + 2) || - ((*val_vec)[kOD_OD_RANGE_label_array_index] != "OD_RANGE:") ) { - ss << __PRETTY_FUNCTION__ << " | ======= end ======= | returning " - << getRSMIStatusString(RSMI_STATUS_UNEXPECTED_DATA); - LOG_TRACE(ss); - throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__); + // + amd::smi::TextFileTagContents_t txt_power_dev_od_voltage(val_vec); + txt_power_dev_od_voltage + .set_title_terminator(":", amd::smi::TagSplitterPositional_t::kLAST) + .set_key_data_splitter(":", amd::smi::TagSplitterPositional_t::kBETWEEN) + .structure_content(); + + const std::string KTAG_OD_RANGE{"OD_RANGE:"}; + const std::string KTAG_MCLK{"MCLK:"}; + const std::string KTAG_SCLK{"SCLK:"}; + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_RANGE, + KTAG_SCLK)) { + od_value_pair_str_to_range(txt_power_dev_od_voltage + .get_structured_value_by_keys(KTAG_OD_RANGE, KTAG_SCLK), + &p.freq_range); + } + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_RANGE, + KTAG_MCLK)) { + od_value_pair_str_to_range(txt_power_dev_od_voltage + .get_structured_value_by_keys(KTAG_OD_RANGE, KTAG_MCLK), + &p.volt_range); } - od_value_pair_str_to_range((*val_vec)[start_ind], &p->freq_range); - od_value_pair_str_to_range((*val_vec)[start_ind + 1], &p->volt_range); } /* @@ -1729,23 +1757,24 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind, // This is a work-around to handle systems where kDevPowerODVoltage is not // fully supported yet. - if (val_vec.size() < 2) { + if (val_vec.size() < kMIN_VALID_LINES) { ss << __PRETTY_FUNCTION__ - << " | Issue: val_vec.size() < 2" << "; returning " + << " | Issue: val_vec.size() < " << kMIN_VALID_LINES << "; returning " << getRSMIStatusString(RSMI_STATUS_NOT_YET_IMPLEMENTED); LOG_ERROR(ss); return RSMI_STATUS_NOT_YET_IMPLEMENTED; } uint32_t val_vec_size = static_cast(val_vec.size()); - assert((val_vec_size - kOD_VDDC_CURVE_start_index) > 0); - ss << __PRETTY_FUNCTION__ << " | val_vec_size = " << std::dec - << val_vec_size - << " | kOD_VDDC_CURVE_start_index = " << kOD_VDDC_CURVE_start_index; + << val_vec_size; LOG_DEBUG(ss); - *num_regions = std::min((val_vec_size) / 2, *num_regions); + + // Note: No curve entries. + *num_regions = 0; + // Get OD ranges. + get_vc_region(val_vec, *p); return RSMI_STATUS_SUCCESS; CATCH diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc index 61ec4243dc..122584f9bd 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc @@ -1140,7 +1140,7 @@ std::string print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv) { &odv->mclk_freq_limits); ss << "\t**Current Freq/Volt. curve: " << "\n"; - ss << pt_vddc_curve(&odv->curve); + ss << "\t\t N/A" << "\n"; ss << "\t**Number of Freq./Volt. regions: " << odv->num_regions << "\n\n"; return ss.str(); @@ -1224,5 +1224,6 @@ std::queue getAllDeviceGfxVers() { return deviceGfxVersions; } + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude b/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude index 5a49f0dc0a..8f955bae5f 100644 --- a/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude +++ b/projects/amdsmi/tests/amd_smi_test/amdsmitst.exclude @@ -64,7 +64,7 @@ $BLACKLIST_ALL_ASICS\ # /sys/class/kfd/kfd/topology/nodes/*/properties FILTER[90400]=\ $BLACKLIST_ALL_ASICS\ -"amdsmitstReadOnly.TestVoltCurvRead" +# "amdsmitstReadOnly.TestVoltCurvRead" FILTER[90401]=${FILTER[90400]} FILTER[90402]=${FILTER[90400]} diff --git a/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc b/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc index 48bbe82934..e5578619f1 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc @@ -183,10 +183,10 @@ void TestMutualExclusion::Run(void) { int64_t dmy_i64 = 0; char dmy_str[10]; amdsmi_dev_perf_level_t dmy_perf_lvl; - amdsmi_frequencies_t dmy_freqs; - amdsmi_od_volt_freq_data_t dmy_od_volt; - amdsmi_freq_volt_region_t dmy_vlt_reg; - amdsmi_error_count_t dmy_err_cnt; + amdsmi_frequencies_t dmy_freqs{}; + amdsmi_od_volt_freq_data_t dmy_od_volt{}; + amdsmi_freq_volt_region_t dmy_vlt_reg{}; + amdsmi_error_count_t dmy_err_cnt{}; amdsmi_ras_err_state_t dmy_ras_err_st; // This can be replaced with ASSERT_EQ() once env. stabilizes diff --git a/projects/amdsmi/tests/amd_smi_test/functional/perf_determinism.cc b/projects/amdsmi/tests/amd_smi_test/functional/perf_determinism.cc index 7a410ba71d..d2731324ac 100644 --- a/projects/amdsmi/tests/amd_smi_test/functional/perf_determinism.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/perf_determinism.cc @@ -92,9 +92,9 @@ void TestPerfDeterminism::Close() { void TestPerfDeterminism::Run(void) { amdsmi_status_t err; amdsmi_dev_perf_level_t pfl; - amdsmi_od_volt_freq_data_t odv; + amdsmi_od_volt_freq_data_t odv{}; amdsmi_status_t ret; - uint64_t clkvalue; + uint64_t clkvalue(0); TestBase::Run(); if (setup_failed_) { std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl; diff --git a/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc index 4c1a758fc9..3d061698b2 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc @@ -146,7 +146,7 @@ static void print_amdsmi_od_volt_freq_regions(uint32_t num_regions, void TestVoltCurvRead::Run(void) { amdsmi_status_t err; - amdsmi_od_volt_freq_data_t odv; + amdsmi_od_volt_freq_data_t odv{}; TestBase::Run(); if (setup_failed_) { @@ -172,7 +172,6 @@ void TestVoltCurvRead::Run(void) { ASSERT_EQ(err, AMDSMI_STATUS_NOT_SUPPORTED); } } else { - CHK_ERR_ASRT(err) // Verify api support checking functionality is working err = amdsmi_get_gpu_od_volt_info(processor_handles_[i], nullptr); ASSERT_EQ(err, AMDSMI_STATUS_INVAL); @@ -182,15 +181,37 @@ void TestVoltCurvRead::Run(void) { std::cout << "\t**Frequency-voltage curve data:" << std::endl; print_amdsmi_od_volt_freq_data_t(&odv); - amdsmi_freq_volt_region_t *regions; + amdsmi_freq_volt_region_t *regions{}; uint32_t num_regions; regions = new amdsmi_freq_volt_region_t[odv.num_regions]; - ASSERT_TRUE(regions != nullptr); + ASSERT_NE(regions, nullptr); num_regions = odv.num_regions; - err = amdsmi_get_gpu_od_volt_curve_regions(processor_handles_[i], &num_regions, regions); - CHK_ERR_ASRT(err) - ASSERT_TRUE(num_regions == odv.num_regions); + err = amdsmi_get_gpu_od_volt_curve_regions(processor_handles_[i], + &num_regions, regions); + + IF_VERB(STANDARD) { + std::cout << "\t**amdsmi_get_gpu_od_volt_curve_regions(" + << "processor_handles_[i], &num_regions, regions): " + << err << "\n" + << "\t**Number of regions: " << std::dec << num_regions + << "\n"; + } + ASSERT_TRUE(err == AMDSMI_STATUS_SUCCESS + || err == AMDSMI_STATUS_NOT_SUPPORTED + || err == AMDSMI_STATUS_UNEXPECTED_DATA + || err == AMDSMI_STATUS_UNEXPECTED_SIZE + || err == AMDSMI_STATUS_INVAL); + if (err != AMDSMI_STATUS_SUCCESS) { + IF_VERB(STANDARD) { + std::cout << "\t**amdsmi_get_gpu_od_volt_curve_regions: " + "Not supported on this machine" << std::endl; + } + continue; + } + + ASSERT_EQ(err, AMDSMI_STATUS_SUCCESS); + ASSERT_EQ(num_regions, odv.num_regions); std::cout << "\t**Frequency-voltage curve regions:" << std::endl; print_amdsmi_od_volt_freq_regions(num_regions, regions);