From cc8eb1775976252e554feb6cc1690a7fe4328b8c Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Thu, 2 May 2024 01:23:18 -0500 Subject: [PATCH] SWDEV-458102 - Deprecated Voltage Curve API Signed-off-by: Maisam Arif Change-Id: I111c3ce26d2ab66d5e755432f4b8a9bfa631f805 [ROCm/amdsmi commit: 1423fb632e340359bd3e4d7e71035112199eb08e] --- projects/amdsmi/CHANGELOG.md | 31 +- projects/amdsmi/amdsmi_cli/README.md | 3 +- projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 38 +- projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 2 - projects/amdsmi/include/amd_smi/amdsmi.h | 13 +- projects/amdsmi/py-interface/README.md | 4 +- .../rocm_smi/include/rocm_smi/rocm_smi.h | 6 +- .../include/rocm_smi/rocm_smi_utils.h | 328 +++++++++++++++++- projects/amdsmi/rocm_smi/src/rocm_smi.cc | 161 ++++----- .../amdsmi/rocm_smi/src/rocm_smi_utils.cc | 9 +- .../functional/mutual_exclusion.cc | 8 +- .../functional/volt_freq_curv_read.cc | 22 +- 12 files changed, 443 insertions(+), 182 deletions(-) diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 970604dd4c..eae998bfdb 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -4,16 +4,22 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/]( ***All information listed below is for reference and subject to change.*** -## amd_smi_lib for ROCm 6.1.1 +## amd_smi_lib for ROCm 6.1.2 ### Added -- N/A +- **Updated Python Library return types for amdsmi_get_gpu_memory_reserved_pages & amdsmi_get_gpu_bad_page_info** +Previously calls were returning "No bad pages found." if no pages were found, now it only returns the list type and can be empty. ### Changed -- **Updated Python Library return types for amdsmi_get_gpu_memory_reserved_pages & amdsmi_get_gpu_bad_page_info** -Previously calls were returning "No bad pages found." if no pages were found, now it only returns the list type and can be empty. +- **Deprecated Volt Curve APIs** +The latest amdgpu driver has dropped support for getting and setting volt curve information. amdsmi_set_gpu_od_volt_info() & amdsmi_get_gpu_od_volt_curve_regions() have been deprecated with amdsmi_get_gpu_od_volt_info() now no longer populating voltage curve frequencies. + +- **Removed `amd-smi metric --voltage-curve` from CLI Tool** +Due to amdgpu driver dropping support for voltage curve, the CLI option has been removed as well. + +### Optimizations - **Updated `amd-smi monitor --pcie` output** The source for pcie bandwidth monitor output was a legacy file we no longer support and was causing delays within the monitor command. The output is no longer using TX/RX but instantaneous bandwidth from gpu_metrics instead; updated output: @@ -24,6 +30,23 @@ GPU PCIE_BW 0 26 Mb/s ``` +### Fixed + +- **Fixed python interface call amdsmi_get_gpu_memory_reserved_pages & amdsmi_get_gpu_bad_page_info** +Previously python interface calls to populated bad pages resulted in a `ValueError: NULL pointer access`. This fixes the bad-pages subcommand CLI subcommand as well. + +### Known issues + +- None + +## amd_smi_lib for ROCm 6.1.1 + +### Added + +- N/A + +### Changed + - **Updated `amd-smi metric --ecc-blocks` output** The ecc blocks arguement was outputing blocks without counters available, updated the filtering show blocks that counters are available for: diff --git a/projects/amdsmi/amdsmi_cli/README.md b/projects/amdsmi/amdsmi_cli/README.md index 27e54e04c2..a152894415 100644 --- a/projects/amdsmi/amdsmi_cli/README.md +++ b/projects/amdsmi/amdsmi_cli/README.md @@ -269,7 +269,7 @@ Command Modifiers: ~$ amd-smi metric --help usage: amd-smi metric [-h] [-g GPU [GPU ...] | -U CPU [CPU ...] | -O CORE [CORE ...]] [-w INTERVAL] [-W TIME] [-i ITERATIONS] [-m] [-u] [-p] [-c] [-t] - [-P] [-e] [-k] [-f] [-C] [-o] [-l] [-x] [-E] [--cpu-power-metrics] + [-P] [-e] [-k] [-f] [-o] [-l] [-x] [-E] [--cpu-power-metrics] [--cpu-prochot] [--cpu-freq-metrics] [--cpu-c0-res] [--cpu-lclk-dpm-level NBIOID] [--cpu-pwr-svi-telemtry-rails] [--cpu-io-bandwidth IO_BW LINKID_NAME] @@ -313,7 +313,6 @@ Metric arguments: -e, --ecc Total number of ECC errors -k, --ecc-blocks Number of ECC errors per block -f, --fan Current fan speed - -C, --voltage-curve Display voltage curve -o, --overdrive Current GPU clock overdrive level -l, --perf-level Current DPM performance level -x, --xgmi-err XGMI error information since last read diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index 72fe9f96f1..54539ddd66 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1108,7 +1108,7 @@ class AMDSMICommands(): def metric_gpu(self, args, multiple_devices=False, watching_output=False, gpu=None, usage=None, watch=None, watch_time=None, iterations=None, power=None, clock=None, temperature=None, ecc=None, ecc_blocks=None, pcie=None, - fan=None, voltage_curve=None, overdrive=None, perf_level=None, + fan=None, overdrive=None, perf_level=None, xgmi_err=None, energy=None, mem_usage=None, schedule=None, guard=None, guest_data=None, fb_usage=None, xgmi=None,): """Get Metric information for target gpu @@ -1129,7 +1129,6 @@ class AMDSMICommands(): ecc_blocks (bool, optional): Value override for args.ecc. Defaults to None. pcie (bool, optional): Value override for args.pcie. Defaults to None. fan (bool, optional): Value override for args.fan. Defaults to None. - voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None. overdrive (bool, optional): Value override for args.overdrive. Defaults to None. perf_level (bool, optional): Value override for args.perf_level. Defaults to None. xgmi_err (bool, optional): Value override for args.xgmi_err. Defaults to None. @@ -1188,8 +1187,6 @@ class AMDSMICommands(): if self.helpers.is_baremetal() and self.helpers.is_linux(): if fan: args.fan = fan - if voltage_curve: - args.voltage_curve = voltage_curve if overdrive: args.overdrive = overdrive if perf_level: @@ -1198,8 +1195,8 @@ class AMDSMICommands(): args.xgmi_err = xgmi_err if energy: args.energy = energy - current_platform_args += ["fan", "voltage_curve", "overdrive", "perf_level", "xgmi_err", "energy"] - current_platform_values += [args.fan, args.voltage_curve, args.overdrive, args.perf_level, args.xgmi_err, args.energy] + current_platform_args += ["fan", "overdrive", "perf_level", "xgmi_err", "energy"] + current_platform_values += [args.fan, args.overdrive, args.perf_level, args.xgmi_err, args.energy] if self.helpers.is_hypervisor(): if schedule: @@ -1786,26 +1783,6 @@ class AMDSMICommands(): logging.debug("Failed to get fan rpms for gpu %s | %s", args.gpu, e.get_error_info()) values_dict["fan"] = fan_dict - if "voltage_curve" in current_platform_args: - if args.voltage_curve: - try: - od_volt = amdsmi_interface.amdsmi_get_gpu_od_volt_info(args.gpu) - - voltage_point_dict = {} - - for point in range(3): - if isinstance(od_volt, dict): - frequency = int(od_volt["curve.vc_points"][point].frequency / 1000000) - voltage = int(od_volt["curve.vc_points"][point].voltage) - else: - frequency = 0 - voltage = 0 - voltage_point_dict[f'voltage_point_{point}'] = f"{frequency} Mhz {voltage} mV" - - values_dict['voltage_curve'] = voltage_point_dict - except amdsmi_exception.AmdSmiLibraryException as e: - values_dict['voltage_curve'] = "N/A" - logging.debug("Failed to get voltage curve for gpu %s | %s", gpu_id, e.get_error_info()) if "overdrive" in current_platform_args: if args.overdrive: try: @@ -2321,7 +2298,7 @@ class AMDSMICommands(): def metric(self, args, multiple_devices=False, watching_output=False, gpu=None, usage=None, watch=None, watch_time=None, iterations=None, power=None, clock=None, temperature=None, ecc=None, ecc_blocks=None, pcie=None, - fan=None, voltage_curve=None, overdrive=None, perf_level=None, + fan=None, overdrive=None, perf_level=None, xgmi_err=None, energy=None, mem_usage=None, schedule=None, guard=None, guest_data=None, fb_usage=None, xgmi=None, cpu=None, cpu_power_metrics=None, cpu_prochot=None, cpu_freq_metrics=None, @@ -2350,7 +2327,6 @@ class AMDSMICommands(): ecc_blocks (bool, optional): Value override for args.ecc. Defaults to None. pcie (bool, optional): Value override for args.pcie. Defaults to None. fan (bool, optional): Value override for args.fan. Defaults to None. - voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None. overdrive (bool, optional): Value override for args.overdrive. Defaults to None. perf_level (bool, optional): Value override for args.perf_level. Defaults to None. xgmi_err (bool, optional): Value override for args.xgmi_err. Defaults to None. @@ -2404,7 +2380,7 @@ class AMDSMICommands(): # Check if a GPU argument has been set gpu_args_enabled = False gpu_attributes = ["usage", "watch", "watch_time", "iterations", "power", "clock", - "temperature", "ecc", "ecc_blocks", "pcie", "fan", "voltage_curve", + "temperature", "ecc", "ecc_blocks", "pcie", "fan", "overdrive", "perf_level", "xgmi_err", "energy", "mem_usage", "schedule", "guard", "guest_data", "fb_usage", "xgmi"] for attr in gpu_attributes: @@ -2477,7 +2453,7 @@ class AMDSMICommands(): self.metric_gpu(args, multiple_devices, watching_output, gpu, usage, watch, watch_time, iterations, power, clock, temperature, ecc, ecc_blocks, pcie, - fan, voltage_curve, overdrive, perf_level, + fan, overdrive, perf_level, xgmi_err, energy, mem_usage, schedule, guard, guest_data, fb_usage, xgmi) elif self.helpers.is_amd_hsmp_initialized(): # Only CPU is initialized @@ -2512,7 +2488,7 @@ class AMDSMICommands(): self.metric_gpu(args, multiple_devices, watching_output, gpu, usage, watch, watch_time, iterations, power, clock, temperature, ecc, ecc_blocks, pcie, - fan, voltage_curve, overdrive, perf_level, + fan, overdrive, perf_level, xgmi_err, energy, mem_usage, schedule) diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 4b11188b03..af22db7137 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -699,7 +699,6 @@ class AMDSMIParser(argparse.ArgumentParser): # Help text for Arguments only on Linux Baremetal platforms fan_help = "Current fan speed" - vc_help = "Display voltage curve" overdrive_help = "Current GPU clock overdrive level" perf_level_help = "Current DPM performance level" xgmi_err_help = "XGMI error information since last read" @@ -770,7 +769,6 @@ class AMDSMIParser(argparse.ArgumentParser): # Optional Args for Linux Baremetal Systems if self.helpers.is_baremetal() and self.helpers.is_linux(): metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help) - metric_parser.add_argument('-C', '--voltage-curve', action='store_true', required=False, help=vc_help) metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help) metric_parser.add_argument('-l', '--perf-level', action='store_true', required=False, help=perf_level_help) metric_parser.add_argument('-x', '--xgmi-err', action='store_true', required=False, help=xgmi_err_help) diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index 2840fb5e62..6502cabc11 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -1253,12 +1253,13 @@ typedef struct { typedef struct { amdsmi_range_t curr_sclk_range; //!< The current SCLK frequency range amdsmi_range_t curr_mclk_range; //!< The current MCLK frequency range; - //!< (upper bound only) + //!< (upper bound only) amdsmi_range_t sclk_freq_limits; //!< The range possible of SCLK values amdsmi_range_t mclk_freq_limits; //!< The range possible of MCLK values /** * @brief The current voltage curve + * @deprecated ::Voltage curve support has been deprecated by the driver */ amdsmi_od_volt_curve_t curve; uint32_t num_regions; //!< The number of voltage curve regions @@ -2965,7 +2966,7 @@ amdsmi_status_t amdsmi_get_clk_freq(amdsmi_processor_handle processor_handle, amdsmi_status_t amdsmi_reset_gpu(amdsmi_processor_handle processor_handle); /** - * @brief This function retrieves the voltage/frequency curve information. It is + * @brief This function retrieves the overdrive GFX & MCLK information. It is * not supported on virtual machine guest * * @platform{gpu_bm_linux} @@ -3166,6 +3167,9 @@ amdsmi_status_t amdsmi_set_gpu_od_clk_info(amdsmi_processor_handle processor_han * * @platform{gpu_bm_linux} * + * @deprecated ::Voltage curve information is no longer supported by the + * amdgpu driver; this includes the ability to set voltage curve regions + * * @details Given a processor handle @p processor_handle, a voltage point @p vpoint * and a voltage value @p voltvalue this function will set voltage curve point * @@ -3192,6 +3196,9 @@ amdsmi_status_t amdsmi_set_gpu_od_volt_info(amdsmi_processor_handle processor_ha * * @platform{gpu_bm_linux} * + * @deprecated ::Voltage curve information is no longer supported by the + * amdgpu driver; this includes the number of valid voltage regions + * * @details Given a processor handle @p processor_handle, a pointer to an unsigned integer * @p num_regions and a buffer of ::amdsmi_freq_volt_region_t structures, @p * buffer, this function will populate @p buffer with the current @@ -3502,7 +3509,7 @@ amdsmi_status_t amdsmi_set_gpu_process_isolation(amdsmi_processor_handle process * @platform{gpu_bm_linux} @platform{guest_1vf} * * @details Given a processor handle @p processor_handle, and a sclean flag @p sclean, - * this function will clear the SRAM data of this processor. This can be called between + * this function will clear the SRAM data of this processor. This can be called between * user logins to prevent information leak. * * @note This function requires root access diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index 6061152f8b..7d1f414565 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -1591,7 +1591,7 @@ except AmdSmiException as e: ### amdsmi_set_gpu_od_clk_info -Description: This function sets the clock frequency information +Description: **deprecated** This function sets the clock frequency information It is not supported on virtual machine guest Input parameters: @@ -2306,7 +2306,7 @@ except AmdSmiException as e: ### amdsmi_get_gpu_od_volt_curve_regions -Description: This function will retrieve the current valid regions in the +Description: **deprecated** This function will retrieve the current valid regions in the frequency/voltage space. It is not supported on virtual machine guest Input parameters: diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h index 0fafa31c8f..8797cf1b5f 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi.h @@ -3058,6 +3058,8 @@ rsmi_status_t rsmi_dev_od_clk_info_set(uint32_t dv_ind, rsmi_freq_ind_t level, /** * @brief This function sets 1 of the 3 voltage curve points. * + * @deprecated This function is deprecated due to driver changes. + * * @details Given a device index @p dv_ind, a voltage point @p vpoint * and a voltage value @p voltvalue this function will set voltage curve point * @@ -3083,6 +3085,8 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, * @brief This function will retrieve the current valid regions in the * frequency/voltage space. * + * @deprecated This function is deprecated due to driver changes. + * * @details Given a device index @p dv_ind, a pointer to an unsigned integer * @p num_regions and a buffer of ::rsmi_freq_volt_region_t structures, @p * buffer, this function will populate @p buffer with the current @@ -3448,7 +3452,7 @@ rsmi_status_t rsmi_dev_process_isolation_set(uint32_t dv_ind, uint32_t pisolate); /** - * @brief Clear the GPU SRAM data + * @brief Clear the GPU SRAM data * * * @details Given a device index @p dv_ind, this function will clear the diff --git a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h index 67d9d8b8d8..32e6bdeefc 100755 --- a/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h +++ b/projects/amdsmi/rocm_smi/include/rocm_smi/rocm_smi_utils.h @@ -45,14 +45,17 @@ #include -#include +#include #include -#include -#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include #include "rocm_smi/rocm_smi_device.h" @@ -125,13 +128,33 @@ std::string print_rsmi_od_volt_freq_regions(uint32_t num_regions, bool is_sudo_user(); rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind, std::string *gfx_version); + +std::string leftTrim(const std::string &s); +std::string rightTrim(const std::string &s); +std::string trim(const std::string &s); +std::string removeNewLines(const std::string &s); + +std::string removeString(const std::string origStr, + const std::string &removeMe); template - std::string print_int_as_hex(T i, bool showHexNotation = true) { + std::string print_int_as_hex(T i, bool showHexNotation = true, + int overloadBitSize = 0) { std::stringstream ss; if (showHexNotation) { - ss << "0x" << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex; + if (overloadBitSize == 0) { + ss << "0x" << std::hex << std::setw(sizeof(T) * 2) << std::setfill('0'); + } else { + // 8 bits per 1 byte + int byteSize = (overloadBitSize / 8) * 2; + ss << "0x" << std::hex << std::setw(byteSize) << std::setfill('0'); + } } else { - ss << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex; + if (overloadBitSize == 0) { + ss << std::hex << std::setw(sizeof(T) * 2) << std::setfill('0'); + } else { + int byteSize = (overloadBitSize / 8) * 2; + ss << std::hex << std::setw(byteSize) << std::setfill('0'); + } } if (std::is_same::value) { @@ -162,7 +185,8 @@ std::string print_unsigned_hex_and_int(T i, std::string heading="") { } ss << "Hex (MSB): " << print_int_as_hex(i) << ", " << "Unsigned int: " << print_unsigned_int(i) << ", " - << "Byte Size: " << sizeof(T); + << "Byte Size: " << sizeof(T) << ", " + << "Bits: " << sizeof(T) * 8; // 8 bits per 1 byte return ss.str(); } @@ -283,8 +307,290 @@ class ScopedAcquire { // In VM environment, the /proc/cpuinfo set hypervisor flag by default bool is_vm_guest(); -// trim a string -std::string trim(const std::string &s); + +// +enum class TagSplitterPositional_t +{ + kFIRST, + kBETWEEN, + kLAST, + kNONE, +}; + +template +class TagTextContents_t +{ + public: + using TextLines_t = std::vector; + using PrimaryList_t = std::vector; + using SecondaryList_t = std::vector; + using PrimaryKeyTbl_t = std::map; + using SecondaryKeyTbl_t = std::map; + using StructuredKeysTbl_t = std::map>; + + // + TagTextContents_t() = default; + TagTextContents_t(const TagTextContents_t&) = delete; + TagTextContents_t(TagTextContents_t&&) = delete; + TagTextContents_t& operator=(const TagTextContents_t&) = delete; + TagTextContents_t& operator=(TagTextContents_t&&) = delete; + + explicit TagTextContents_t(const TextLines_t& text_content) + : m_text_content(text_content) {} + + TagTextContents_t& set_text_content(const TextLines_t& text_content) + { + m_text_content = text_content; + } + + TagTextContents_t& set_title_terminator(const std::string& title_mark, + TagSplitterPositional_t title_mark_position) { + m_title_mark = title_mark; + m_title_mark_position = title_mark_position; + + return *this; + } + + TagTextContents_t& set_key_data_splitter(const std::string& line_splitter_mark, + TagSplitterPositional_t line_mark_position) { + m_line_splitter_mark = line_splitter_mark; + m_line_mark_position = line_mark_position; + + return *this; + } + + TagTextContents_t& structure_content() { + // Sanitizes the content. + if (!m_text_content.empty()) { + std::for_each(m_text_content.begin(), m_text_content.end(), trim); + section_title_lookup(); + section_data_lookup(); + } + + return *this; + } + + decltype(auto) get_title_size() { + return m_primary.size(); + } + + decltype(auto) get_structured_subkeys_size(const PrimaryKeyType& prim_key) { + return m_structured[prim_key].size(); + } + + decltype(auto) contains_title_key(const PrimaryKeyType& key) { + return (m_primary.find(key) != m_primary.end()); + } + + decltype(auto) contains_structured_key(const PrimaryKeyType& prim_key, + const SecondaryKeyType& sec_key) { + if (auto first_key_itr = m_structured.find(prim_key); + first_key_itr != m_structured.end()) { + if (auto sec_key_itr = first_key_itr->second.find(sec_key); + sec_key_itr != first_key_itr->second.end()) { + return true; + } + } + + return false; + } + + decltype(auto) get_structured_value_by_keys(const PrimaryKeyType& prim_key, + const SecondaryKeyType& sec_key, + bool is_value_id = true) { + if (auto first_key_itr = m_structured.find(prim_key); + first_key_itr != m_structured.end()) { + if (auto sec_key_itr = first_key_itr->second.find(sec_key); + sec_key_itr != first_key_itr->second.end()) { + SecondaryDataType key_value{}; + if (is_value_id) { + key_value = SecondaryDataType(sec_key_itr->first) + " "; + } + key_value += sec_key_itr->second; + return key_value; + } + } + + return SecondaryDataType{}; + } + + decltype(auto) get_structured_data_subkey_by_position(const PrimaryKeyType& prim_key, + uint32_t key_position) { + auto key_counter = uint32_t(0); + SecondaryKeyType data_key{}; + if (key_position < (get_structured_subkeys_size(prim_key))) { + for (const auto& [sec_key, sec_value] : m_structured[prim_key]) { + if (key_counter == key_position) { + data_key = static_cast(sec_key); + return data_key; + } + ++key_counter; + } + } + + return data_key; + } + + decltype(auto) get_structured_data_subkey_first(const PrimaryKeyType& prim_key) { + return (get_structured_value_by_keys(prim_key, + get_structured_data_subkey_by_position(prim_key, 0))); + } + + decltype(auto) get_structured_data_subkey_last(const PrimaryKeyType& prim_key) { + return (get_structured_value_by_keys(prim_key, get_structured_data_subkey_by_position(prim_key, + (get_structured_subkeys_size(prim_key) - 1)))); + } + + void reset() { + m_text_content.clear(); + m_primary.clear(); + m_structured.clear(); + m_title_mark.clear(); + m_line_splitter_mark.clear(); + m_title_mark_position = TagSplitterPositional_t::kNONE; + m_line_mark_position = TagSplitterPositional_t::kNONE; + } + + decltype(auto) dump_structured_content() { + std::ostringstream ostrstream; + ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======" << "\n"; + ostrstream << "** Primary Table **" << "\n"; + for (const auto& [key, values] : m_primary) { + ostrstream << "key: " << key << " values: " << values.size() << "\n"; + for (const auto& value : values) { + ostrstream << "\t value: " << value << "\n"; + } + } + + ostrstream << "\n ** Structured Table **" << "\n"; + for (const auto& [prim_key, prim_values] : m_structured) { + ostrstream << "key: " << prim_key << "\n"; + for (const auto& [sec_key, sec_value] : prim_values) { + ostrstream << "\t key: " << sec_key << " -> " << sec_value << "\n"; + } + } + ostrstream << "\n\n"; + + return ostrstream.str(); + } + + + private: + TextLines_t m_text_content; + PrimaryKeyTbl_t m_primary; + StructuredKeysTbl_t m_structured; + std::string m_title_mark; + std::string m_line_splitter_mark; + TagSplitterPositional_t m_title_mark_position; + TagSplitterPositional_t m_line_mark_position; + + // + // Note: Organizes table with Title as a Key, and a list of values. + // + decltype(auto) section_title_lookup() { + if (m_title_mark.empty() || + m_title_mark_position == TagSplitterPositional_t::kNONE) { + return; + } + + // + // Note: + // - top_title_line: Left pointer for the sliding window + // - bottom_title_line: Right pointer for the sliding window + // + auto top_title_line = uint32_t(std::numeric_limits::max()); + auto bottom_title_line = uint32_t(std::numeric_limits::max()); + auto line_counter = uint32_t(0); + + // + // Note: This whole interval/window where the section/title starts, and where it ends. + // + auto update_primary_tbl = [&](const uint32_t& from_line, const uint32_t& to_line) { + auto key = static_cast(m_text_content[from_line]); + for (auto line_num(from_line + 1); line_num < to_line; ++line_num) { + if ((line_num < m_text_content.size()) && !m_text_content[line_num].empty()) { + m_primary[key].push_back(m_text_content[line_num]); + } + } + }; + + auto adjust_sliding_window = [&](const uint32_t& title_line) { + // First time top_title_line gets adjusted. + if (top_title_line == uint32_t(std::numeric_limits::max())) { + top_title_line = title_line; + bottom_title_line = top_title_line; + return; + } + if (title_line > bottom_title_line) { + bottom_title_line = title_line; + update_primary_tbl(top_title_line, bottom_title_line); + top_title_line = bottom_title_line; + } + }; + + for (const auto& line : m_text_content) { + auto was_title_found{false}; + switch (m_title_mark_position) { + case TagSplitterPositional_t::kFIRST: + // Section/Title Mark was found at the first position + if (line.find_first_of(m_title_mark.c_str()) == 0) { + was_title_found = true; + } + break; + + case TagSplitterPositional_t::kLAST: + // Section/Title Mark was found at the last position + if ((line.find_last_of(m_title_mark.c_str()) + 1) == line.size()) { + was_title_found = true; + } + break; + + default: + break; + } + + if (was_title_found) { + adjust_sliding_window(line_counter); + } + ++line_counter; + } + + // Any remaining elements? + if (line_counter > bottom_title_line) { + update_primary_tbl(bottom_title_line, (line_counter - 1)); + } + } + + decltype(auto) section_data_lookup() { + if (m_line_splitter_mark.empty() || + m_line_mark_position == TagSplitterPositional_t::kNONE) { + return; + } + + // + // Note: Organizes table with Title as a Key, a Key/ID for values and values. + // It takes into consideration the initial constraints were all good and + // that the primary table has been populated. + for (const auto& [prim_key, prim_values] : m_primary) { + for (const auto& value : prim_values) { + if (auto mark_pos = value.find_first_of(m_line_splitter_mark.c_str()); + mark_pos != std::string::npos) { + auto sec_key = trim(value.substr(0, mark_pos + 1)); + auto sec_data = trim(value.substr((mark_pos + 1), value.size())); + if (!sec_key.empty()) { + m_structured[prim_key].insert(std::make_pair(sec_key, sec_data)); + } + } + } + } + } + +}; + +using TextFileTagContents_t = TagTextContents_t; + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi.cc b/projects/amdsmi/rocm_smi/src/rocm_smi.cc index dd8e903328..aa5f30d9d1 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi.cc @@ -1415,17 +1415,6 @@ For the new format, GFXCLK field will show min and max values(0/1). If the curre frequency in neither min/max but lies within the range, this is indicated by an additional value followed by * at index 1 and max value at index 2. */ -constexpr uint32_t kOD_SCLK_label_array_index = 0; -constexpr uint32_t kOD_MCLK_label_array_index = - kOD_SCLK_label_array_index + 3; -constexpr uint32_t kOD_VDDC_CURVE_label_array_index = - kOD_MCLK_label_array_index + 2; -constexpr uint32_t kOD_OD_RANGE_label_array_index = - kOD_VDDC_CURVE_label_array_index + 4; -constexpr uint32_t kOD_VDDC_CURVE_start_index = - kOD_OD_RANGE_label_array_index + 3; -// constexpr uint32_t kOD_VDDC_CURVE_num_lines = -// kOD_VDDC_CURVE_start_index + 4; constexpr uint32_t kMIN_VALID_LINES = 2; static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, @@ -1450,62 +1439,75 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, return RSMI_STATUS_NOT_YET_IMPLEMENTED; } - assert(val_vec[kOD_SCLK_label_array_index] == "OD_SCLK:" || - val_vec[kOD_SCLK_label_array_index] == "GFXCLK:"); - if ((val_vec[kOD_SCLK_label_array_index] != "OD_SCLK:") && - (val_vec[kOD_SCLK_label_array_index] != "GFXCLK:")) { - return RSMI_STATUS_UNEXPECTED_DATA; + // + const std::string kTAG_OD_SCLK{"OD_SCLK:"}; + const std::string kTAG_GFXCLK{"GFXCLK:"}; + const std::string KTAG_OD_MCLK{"OD_MCLK:"}; + const std::string KTAG_MCLK{"MCLK:"}; + const std::string KTAG_FIRST_FREQ_IDX{"0:"}; + amd::smi::TextFileTagContents_t txt_power_dev_od_voltage(val_vec); + txt_power_dev_od_voltage + .set_title_terminator(":", amd::smi::TagSplitterPositional_t::kLAST) + .set_key_data_splitter(":", amd::smi::TagSplitterPositional_t::kBETWEEN) + .structure_content(); + + // + // Note: We must have minimum of 'GFXCLK:' && 'MCLK:' OR: + // 'OD_SCLK:' && 'OD_MCLK:' tags. + if (txt_power_dev_od_voltage.get_title_size() < kMIN_VALID_LINES) { + return rsmi_status_t::RSMI_STATUS_NO_DATA; } - - // find last_item but skip empty lines - int last_item = val_vec.size()-1; - while (val_vec[last_item].empty() || val_vec[last_item][0] == 0) - last_item--; - - p->curr_sclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 1); - p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 2); - - if (val_vec.size() < (kOD_MCLK_label_array_index + 1)) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } - // The condition below checks if it is the old style or new style format. - if (val_vec[kOD_MCLK_label_array_index] == "OD_MCLK:") { - p->curr_mclk_range.lower_bound = 0; - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 1); - } else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") { - p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 1); - // the upper memory frequency is the last - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, last_item); - return RSMI_STATUS_SUCCESS; - } else { - if (val_vec.size() < (kOD_MCLK_label_array_index + 3)) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } - if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") { - p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 3); - p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 2); - // the upper memory frequency is the last - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, last_item); - return RSMI_STATUS_SUCCESS; - } - return RSMI_STATUS_NOT_YET_IMPLEMENTED; + // Note: For debug builds/purposes only. + assert(txt_power_dev_od_voltage.contains_title_key(kTAG_GFXCLK) || + txt_power_dev_od_voltage.contains_title_key(kTAG_OD_SCLK)); + // Note: For release builds/purposes. + if (!txt_power_dev_od_voltage.contains_title_key(kTAG_GFXCLK) && + !txt_power_dev_od_voltage.contains_title_key(kTAG_OD_SCLK)) { + return rsmi_status_t::RSMI_STATUS_UNEXPECTED_DATA; } - if (val_vec.size() < kOD_VDDC_CURVE_label_array_index) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } + // Note: Quick helpers for getting 1st and last elements found + auto build_lower_bound = [&](const std::string& prim_key) { + auto lower_bound_data = txt_power_dev_od_voltage.get_structured_data_subkey_first(prim_key); + return std::vector{lower_bound_data}; + }; - p->num_regions = - static_cast((val_vec.size()) / 2); + auto build_upper_bound = [&](const std::string& prim_key) { + auto upper_bound_data = txt_power_dev_od_voltage.get_structured_data_subkey_last(prim_key); + return std::vector{upper_bound_data}; + }; + + // Validates 'OD_SCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(kTAG_OD_SCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_sclk_range.lower_bound = freq_string_to_int(build_lower_bound(kTAG_OD_SCLK), nullptr, nullptr, 0); + p->curr_sclk_range.upper_bound = freq_string_to_int(build_upper_bound(kTAG_OD_SCLK), nullptr, nullptr, 0); + + // Validates 'OD_MCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_MCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_mclk_range.lower_bound = freq_string_to_int(build_lower_bound(KTAG_OD_MCLK), nullptr, nullptr, 0); + p->curr_mclk_range.upper_bound = freq_string_to_int(build_upper_bound(KTAG_OD_MCLK), nullptr, nullptr, 0); + } + } + // Validates 'GFXCLK' is in the structure + else if (txt_power_dev_od_voltage.contains_structured_key(kTAG_GFXCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_sclk_range.lower_bound = freq_string_to_int(build_lower_bound(kTAG_GFXCLK), nullptr, nullptr, 0); + p->curr_sclk_range.upper_bound = freq_string_to_int(build_upper_bound(kTAG_GFXCLK), nullptr, nullptr, 0); + + // Validates 'MCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_MCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_mclk_range.lower_bound = freq_string_to_int(build_lower_bound(KTAG_MCLK), nullptr, nullptr, 0); + p->curr_mclk_range.upper_bound = freq_string_to_int(build_upper_bound(KTAG_MCLK), nullptr, nullptr, 0); + } + } + else { + return RSMI_STATUS_NOT_YET_IMPLEMENTED; + } + p->num_regions = 0; return RSMI_STATUS_SUCCESS; CATCH @@ -1674,30 +1676,6 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, } -static void get_vc_region(uint32_t start_ind, - std::vector *val_vec, rsmi_freq_volt_region_t *p) { - std::ostringstream ss; - ss << __PRETTY_FUNCTION__ << " | ======= start ======="; - LOG_TRACE(ss); - assert(p != nullptr); - assert(val_vec != nullptr); - THROW_IF_NULLPTR_DEREF(p) - THROW_IF_NULLPTR_DEREF(val_vec) - - // There must be at least 1 region to read in - assert(val_vec->size() >= kOD_OD_RANGE_label_array_index + 2); - assert((*val_vec)[kOD_OD_RANGE_label_array_index] == "OD_RANGE:"); - if ((val_vec->size() < kOD_OD_RANGE_label_array_index + 2) || - ((*val_vec)[kOD_OD_RANGE_label_array_index] != "OD_RANGE:") ) { - ss << __PRETTY_FUNCTION__ << " | ======= end ======= | returning " - << getRSMIStatusString(RSMI_STATUS_UNEXPECTED_DATA); - LOG_TRACE(ss); - throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__); - } - od_value_pair_str_to_range((*val_vec)[start_ind], &p->freq_range); - od_value_pair_str_to_range((*val_vec)[start_ind + 1], &p->volt_range); -} - /* * num_regions [inout] on calling, the number of regions requested to be read * in. At completion, the number of regions actually read in @@ -1729,23 +1707,20 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind, // This is a work-around to handle systems where kDevPowerODVoltage is not // fully supported yet. - if (val_vec.size() < 2) { + if (val_vec.size() < kMIN_VALID_LINES) { ss << __PRETTY_FUNCTION__ - << " | Issue: val_vec.size() < 2" << "; returning " + << " | Issue: val_vec.size() < " << kMIN_VALID_LINES << "; returning " << getRSMIStatusString(RSMI_STATUS_NOT_YET_IMPLEMENTED); LOG_ERROR(ss); return RSMI_STATUS_NOT_YET_IMPLEMENTED; } uint32_t val_vec_size = static_cast(val_vec.size()); - assert((val_vec_size - kOD_VDDC_CURVE_start_index) > 0); - ss << __PRETTY_FUNCTION__ << " | val_vec_size = " << std::dec - << val_vec_size - << " | kOD_VDDC_CURVE_start_index = " << kOD_VDDC_CURVE_start_index; + << val_vec_size; LOG_DEBUG(ss); - *num_regions = std::min((val_vec_size) / 2, *num_regions); + *num_regions = 0; return RSMI_STATUS_SUCCESS; CATCH diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc index 61ec4243dc..45dd3fe40f 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_utils.cc @@ -1134,14 +1134,6 @@ std::string print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv) { ss << pt_rng_Mhz("\t**Current SCLK frequency range: ", &odv->curr_sclk_range); ss << pt_rng_Mhz("\t**Current MCLK frequency range: ", &odv->curr_mclk_range); - ss << pt_rng_Mhz("\t**Min/Max Possible SCLK frequency range: ", - &odv->sclk_freq_limits); - ss << pt_rng_Mhz("\t**Min/Max Possible MCLK frequency range: ", - &odv->mclk_freq_limits); - - ss << "\t**Current Freq/Volt. curve: " << "\n"; - ss << pt_vddc_curve(&odv->curve); - ss << "\t**Number of Freq./Volt. regions: " << odv->num_regions << "\n\n"; return ss.str(); } @@ -1224,5 +1216,6 @@ std::queue getAllDeviceGfxVers() { return deviceGfxVersions; } + } // namespace smi } // namespace amd diff --git a/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc b/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc index 48bbe82934..e5578619f1 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/mutual_exclusion.cc @@ -183,10 +183,10 @@ void TestMutualExclusion::Run(void) { int64_t dmy_i64 = 0; char dmy_str[10]; amdsmi_dev_perf_level_t dmy_perf_lvl; - amdsmi_frequencies_t dmy_freqs; - amdsmi_od_volt_freq_data_t dmy_od_volt; - amdsmi_freq_volt_region_t dmy_vlt_reg; - amdsmi_error_count_t dmy_err_cnt; + amdsmi_frequencies_t dmy_freqs{}; + amdsmi_od_volt_freq_data_t dmy_od_volt{}; + amdsmi_freq_volt_region_t dmy_vlt_reg{}; + amdsmi_error_count_t dmy_err_cnt{}; amdsmi_ras_err_state_t dmy_ras_err_st; // This can be replaced with ASSERT_EQ() once env. stabilizes diff --git a/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc index 4c1a758fc9..080d8e9a1a 100755 --- a/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/volt_freq_curv_read.cc @@ -146,7 +146,7 @@ static void print_amdsmi_od_volt_freq_regions(uint32_t num_regions, void TestVoltCurvRead::Run(void) { amdsmi_status_t err; - amdsmi_od_volt_freq_data_t odv; + amdsmi_od_volt_freq_data_t odv{}; TestBase::Run(); if (setup_failed_) { @@ -177,25 +177,5 @@ void TestVoltCurvRead::Run(void) { err = amdsmi_get_gpu_od_volt_info(processor_handles_[i], nullptr); ASSERT_EQ(err, AMDSMI_STATUS_INVAL); } - - if (err == AMDSMI_STATUS_SUCCESS) { - std::cout << "\t**Frequency-voltage curve data:" << std::endl; - print_amdsmi_od_volt_freq_data_t(&odv); - - amdsmi_freq_volt_region_t *regions; - uint32_t num_regions; - regions = new amdsmi_freq_volt_region_t[odv.num_regions]; - ASSERT_TRUE(regions != nullptr); - - num_regions = odv.num_regions; - err = amdsmi_get_gpu_od_volt_curve_regions(processor_handles_[i], &num_regions, regions); - CHK_ERR_ASRT(err) - ASSERT_TRUE(num_regions == odv.num_regions); - - std::cout << "\t**Frequency-voltage curve regions:" << std::endl; - print_amdsmi_od_volt_freq_regions(num_regions, regions); - - delete []regions; - } } }