diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 0ecfc20acc..d82a21d330 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -2869,6 +2869,8 @@ rsmi_status_t rsmi_dev_od_clk_info_set(uint32_t dv_ind, rsmi_freq_ind_t level, /** * @brief This function sets 1 of the 3 voltage curve points. * + * @deprecated This function is deprecated due to driver changes. + * * @details Given a device index @p dv_ind, a voltage point @p vpoint * and a voltage value @p voltvalue this function will set voltage curve point * @@ -2894,6 +2896,8 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, * @brief This function will retrieve the current valid regions in the * frequency/voltage space. * + * @deprecated This function is deprecated due to driver changes. + * * @details Given a device index @p dv_ind, a pointer to an unsigned integer * @p num_regions and a buffer of ::rsmi_freq_volt_region_t structures, @p * buffer, this function will populate @p buffer with the current diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h index 40f24eca4f..8196822ceb 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h @@ -45,14 +45,17 @@ #include -#include +#include #include -#include -#include #include -#include -#include +#include +#include #include +#include +#include +#include +#include +#include #include "rocm_smi/rocm_smi_device.h" @@ -123,6 +126,12 @@ std::string print_rsmi_od_volt_freq_regions(uint32_t num_regions, bool is_sudo_user(); rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind, std::string *gfx_version); + +std::string leftTrim(const std::string &s); +std::string rightTrim(const std::string &s); +std::string trim(const std::string &s); +std::string removeNewLines(const std::string &s); + std::string removeString(const std::string origStr, const std::string &removeMe); template @@ -296,6 +305,291 @@ class ScopedAcquire { // In VM environment, the /proc/cpuinfo set hypervisor flag by default bool is_vm_guest(); + +// +enum class TagSplitterPositional_t +{ + kFIRST, + kBETWEEN, + kLAST, + kNONE, +}; + +template +class TagTextContents_t +{ + public: + using TextLines_t = std::vector; + using PrimaryList_t = std::vector; + using SecondaryList_t = std::vector; + using PrimaryKeyTbl_t = std::map; + using SecondaryKeyTbl_t = std::map; + using StructuredKeysTbl_t = std::map>; + + // + TagTextContents_t() = default; + TagTextContents_t(const TagTextContents_t&) = delete; + TagTextContents_t(TagTextContents_t&&) = delete; + TagTextContents_t& operator=(const TagTextContents_t&) = delete; + TagTextContents_t& operator=(TagTextContents_t&&) = delete; + + explicit TagTextContents_t(const TextLines_t& text_content) + : m_text_content(text_content) {} + + TagTextContents_t& set_text_content(const TextLines_t& text_content) + { + m_text_content = text_content; + } + + TagTextContents_t& set_title_terminator(const std::string& title_mark, + TagSplitterPositional_t title_mark_position) { + m_title_mark = title_mark; + m_title_mark_position = title_mark_position; + + return *this; + } + + TagTextContents_t& set_key_data_splitter(const std::string& line_splitter_mark, + TagSplitterPositional_t line_mark_position) { + m_line_splitter_mark = line_splitter_mark; + m_line_mark_position = line_mark_position; + + return *this; + } + + TagTextContents_t& structure_content() { + // Sanitizes the content. + if (!m_text_content.empty()) { + std::for_each(m_text_content.begin(), m_text_content.end(), trim); + section_title_lookup(); + section_data_lookup(); + } + + return *this; + } + + decltype(auto) get_title_size() { + return m_primary.size(); + } + + decltype(auto) get_structured_subkeys_size(const PrimaryKeyType& prim_key) { + return m_structured[prim_key].size(); + } + + decltype(auto) contains_title_key(const PrimaryKeyType& key) { + return (m_primary.find(key) != m_primary.end()); + } + + decltype(auto) contains_structured_key(const PrimaryKeyType& prim_key, + const SecondaryKeyType& sec_key) { + if (auto first_key_itr = m_structured.find(prim_key); + first_key_itr != m_structured.end()) { + if (auto sec_key_itr = first_key_itr->second.find(sec_key); + sec_key_itr != first_key_itr->second.end()) { + return true; + } + } + + return false; + } + + decltype(auto) get_structured_value_by_keys(const PrimaryKeyType& prim_key, + const SecondaryKeyType& sec_key, + bool is_value_id = true) { + if (auto first_key_itr = m_structured.find(prim_key); + first_key_itr != m_structured.end()) { + if (auto sec_key_itr = first_key_itr->second.find(sec_key); + sec_key_itr != first_key_itr->second.end()) { + SecondaryDataType key_value{}; + if (is_value_id) { + key_value = SecondaryDataType(sec_key_itr->first) + " "; + } + key_value += sec_key_itr->second; + return key_value; + } + } + + return SecondaryDataType{}; + } + + decltype(auto) get_structured_data_subkey_by_position(const PrimaryKeyType& prim_key, + uint32_t key_position) { + auto key_counter = uint32_t(0); + SecondaryKeyType data_key{}; + if (key_position < (get_structured_subkeys_size(prim_key))) { + for (const auto& [sec_key, sec_value] : m_structured[prim_key]) { + if (key_counter == key_position) { + data_key = static_cast(sec_key); + return data_key; + } + ++key_counter; + } + } + + return data_key; + } + + decltype(auto) get_structured_data_subkey_first(const PrimaryKeyType& prim_key) { + return (get_structured_value_by_keys(prim_key, + get_structured_data_subkey_by_position(prim_key, 0))); + } + + decltype(auto) get_structured_data_subkey_last(const PrimaryKeyType& prim_key) { + return (get_structured_value_by_keys(prim_key, get_structured_data_subkey_by_position(prim_key, + (get_structured_subkeys_size(prim_key) - 1)))); + } + + void reset() { + m_text_content.clear(); + m_primary.clear(); + m_structured.clear(); + m_title_mark.clear(); + m_line_splitter_mark.clear(); + m_title_mark_position = TagSplitterPositional_t::kNONE; + m_line_mark_position = TagSplitterPositional_t::kNONE; + } + + decltype(auto) dump_structured_content() { + std::ostringstream ostrstream; + ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======" << "\n"; + ostrstream << "** Primary Table **" << "\n"; + for (const auto& [key, values] : m_primary) { + ostrstream << "key: " << key << " values: " << values.size() << "\n"; + for (const auto& value : values) { + ostrstream << "\t value: " << value << "\n"; + } + } + + ostrstream << "\n ** Structured Table **" << "\n"; + for (const auto& [prim_key, prim_values] : m_structured) { + ostrstream << "key: " << prim_key << "\n"; + for (const auto& [sec_key, sec_value] : prim_values) { + ostrstream << "\t key: " << sec_key << " -> " << sec_value << "\n"; + } + } + ostrstream << "\n\n"; + + return ostrstream.str(); + } + + + private: + TextLines_t m_text_content; + PrimaryKeyTbl_t m_primary; + StructuredKeysTbl_t m_structured; + std::string m_title_mark; + std::string m_line_splitter_mark; + TagSplitterPositional_t m_title_mark_position; + TagSplitterPositional_t m_line_mark_position; + + // + // Note: Organizes table with Title as a Key, and a list of values. + // + decltype(auto) section_title_lookup() { + if (m_title_mark.empty() || + m_title_mark_position == TagSplitterPositional_t::kNONE) { + return; + } + + // + // Note: + // - top_title_line: Left pointer for the sliding window + // - bottom_title_line: Right pointer for the sliding window + // + auto top_title_line = uint32_t(std::numeric_limits::max()); + auto bottom_title_line = uint32_t(std::numeric_limits::max()); + auto line_counter = uint32_t(0); + + // + // Note: This whole interval/window where the section/title starts, and where it ends. + // + auto update_primary_tbl = [&](const uint32_t& from_line, const uint32_t& to_line) { + auto key = static_cast(m_text_content[from_line]); + for (auto line_num(from_line + 1); line_num < to_line; ++line_num) { + if ((line_num < m_text_content.size()) && !m_text_content[line_num].empty()) { + m_primary[key].push_back(m_text_content[line_num]); + } + } + }; + + auto adjust_sliding_window = [&](const uint32_t& title_line) { + // First time top_title_line gets adjusted. + if (top_title_line == uint32_t(std::numeric_limits::max())) { + top_title_line = title_line; + bottom_title_line = top_title_line; + return; + } + if (title_line > bottom_title_line) { + bottom_title_line = title_line; + update_primary_tbl(top_title_line, bottom_title_line); + top_title_line = bottom_title_line; + } + }; + + for (const auto& line : m_text_content) { + auto was_title_found{false}; + switch (m_title_mark_position) { + case TagSplitterPositional_t::kFIRST: + // Section/Title Mark was found at the first position + if (line.find_first_of(m_title_mark.c_str()) == 0) { + was_title_found = true; + } + break; + + case TagSplitterPositional_t::kLAST: + // Section/Title Mark was found at the last position + if ((line.find_last_of(m_title_mark.c_str()) + 1) == line.size()) { + was_title_found = true; + } + break; + + default: + break; + } + + if (was_title_found) { + adjust_sliding_window(line_counter); + } + ++line_counter; + } + + // Any remaining elements? + if (line_counter > bottom_title_line) { + update_primary_tbl(bottom_title_line, (line_counter - 1)); + } + } + + decltype(auto) section_data_lookup() { + if (m_line_splitter_mark.empty() || + m_line_mark_position == TagSplitterPositional_t::kNONE) { + return; + } + + // + // Note: Organizes table with Title as a Key, a Key/ID for values and values. + // It takes into consideration the initial constraints were all good and + // that the primary table has been populated. + for (const auto& [prim_key, prim_values] : m_primary) { + for (const auto& value : prim_values) { + if (auto mark_pos = value.find_first_of(m_line_splitter_mark.c_str()); + mark_pos != std::string::npos) { + auto sec_key = trim(value.substr(0, mark_pos + 1)); + auto sec_data = trim(value.substr((mark_pos + 1), value.size())); + if (!sec_key.empty()) { + m_structured[prim_key].insert(std::make_pair(sec_key, sec_data)); + } + } + } + } + } + +}; + +using TextFileTagContents_t = TagTextContents_t; + + } // namespace smi } // namespace amd diff --git a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py index aed3292de6..334fb9be29 100755 --- a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py +++ b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py @@ -157,7 +157,7 @@ def formatMatrixToJSON(deviceList, matrix, metricName): :param deviceList: List of DRM devices (can be a single-item list) :param metricName: Title of the item to print to the log :param matrix: symmetric matrix full of values of every permutation of DRM devices. - + Matrix example: .. math:: @@ -554,9 +554,9 @@ def getPidList(): def getPower(device): """ Return dictionary of power responses. Response power dictionary: - + .. code-block:: python - + { 'power': string wattage response or 'N/A' (for not RSMI_STATUS_SUCCESS), 'power_type': power type string - 'Current Socket' or 'Average', @@ -566,7 +566,7 @@ def getPower(device): :param device: DRM device identifier """ - + power = c_int64(0) power_type = rsmi_power_type_t() power_ret_dict = { @@ -668,7 +668,7 @@ def getPowerLabel(deviceList): return powerLabel device=deviceList[0] power_dict = getPower(device) - if (power_dict['ret'] == rsmi_status_t.RSMI_STATUS_SUCCESS and + if (power_dict['ret'] == rsmi_status_t.RSMI_STATUS_SUCCESS and power_dict['power_type'] == 'CURRENT SOCKET'): powerLabel = rsmi_power_label.CURRENT_SOCKET_POWER return powerLabel @@ -1251,7 +1251,7 @@ def setClockExtremum(deviceList, level, clkType, clkValue, autoRespond): if level == "max": point = 1 try: - int(clkValue) + int(clkValue) except ValueError: printErrLog(None, 'Unable to set %s' % (clkValue)) logging.error('%s is not an integer', clkValue) @@ -1270,34 +1270,6 @@ def setClockExtremum(deviceList, level, clkType, clkValue, autoRespond): printLog(device, 'Setting %s %s clock is not supported for this device.' % (level, clkType), None) -def setVoltageCurve(deviceList, point, clk, volt, autoRespond): - """ Set voltage curve for a point in the PowerPlay table for a list of devices. - - :param deviceList: List of DRM devices (can be a single-item list) - :param point: Point on the voltage curve to modify - :param clk: Clock speed specified for this curve point - :param volt: Voltage specified for this curve point - :param autoRespond: Response to automatically provide for all prompts - """ - global RETCODE - value = '%s %s %s' % (point, clk, volt) - try: - any(int(item) for item in value.split()) - except ValueError: - printErrLog(None, 'Unable to set Voltage curve') - printErrLog(None, 'Non-integer characters are present in %s' %value) - RETCODE = 1 - return - confirmOutOfSpecWarning(autoRespond) - for device in deviceList: - ret = rocmsmi.rsmi_dev_od_volt_info_set(device, int(point), int(clk), int(volt)) - if rsmi_ret_ok(ret, device, 'set_voltage_curve'): - printLog(device, 'Successfully set voltage point %s to %s(MHz) %s(mV)' % (point, clk, volt), None) - else: - printErrLog(device, 'Unable to set voltage point %s to %s(MHz) %s(mV)' % (point, clk, volt)) - RETCODE = 1 - - def setPowerPlayTableLevel(deviceList, clkType, point, clk, volt, autoRespond): """ Set clock frequency and voltage for a level in the PowerPlay table for a list of devices. @@ -1972,7 +1944,7 @@ def showAllConcise(deviceList): temp_val += degree_sign + 'C' power_dict = getPower(device) powerVal = 'N/A' - if (power_dict['ret'] == rsmi_status_t.RSMI_STATUS_SUCCESS and + if (power_dict['ret'] == rsmi_status_t.RSMI_STATUS_SUCCESS and power_dict['power_type'] != 'INVALID_POWER_TYPE'): if power_dict['power'] != 0: powerVal = power_dict['power'] + power_dict['unit'] @@ -2001,7 +1973,7 @@ def showAllConcise(deviceList): values['card%s' % (str(device))] = [device, getNodeId(device), str(getDRMDeviceId(device)) + ", ", str(getGUID(device)), - temp_val, powerVal, + temp_val, powerVal, combined_partition_data, sclk, mclk, fan, str(perf).lower(), str(pwrCap), @@ -2371,7 +2343,7 @@ def getCoarseGrainUtil(device, typeName=None): for ut_counter in utilization_counters: printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val) - + :param device: DRM device identifier :param typeName: 'GFX Activity', 'Memory Activity' """ @@ -2695,10 +2667,10 @@ def showPower(deviceList): for device in deviceList: power_dict = getPower(device) power = 'N/A' - if (power_dict['ret'] == rsmi_status_t.RSMI_STATUS_SUCCESS and + if (power_dict['ret'] == rsmi_status_t.RSMI_STATUS_SUCCESS and power_dict['power_type'] != 'INVALID_POWER_TYPE'): power = power_dict['power'] - printLog(device, power_dict['power_type'].title() + ' Graphics Package Power (' + printLog(device, power_dict['power_type'].title() + ' Graphics Package Power (' + power_dict['unit'] + ')', power) elif checkIfSecondaryDie(device): @@ -2711,49 +2683,6 @@ def showPower(deviceList): printLogSpacer() -def showPowerPlayTable(deviceList): - """ Display current GPU Memory clock frequencies and voltages for a list of devices - - :param deviceList: List of DRM devices (can be a single-item list) - """ - global PRINT_JSON - if PRINT_JSON: - return - printLogSpacer(' GPU Memory clock frequencies and voltages ') - odvf = rsmi_od_volt_freq_data_t() - for device in deviceList: - ret = rocmsmi.rsmi_dev_od_volt_info_get(device, byref(odvf)) - if rsmi_ret_ok(ret, device, 'get_od_volt'): - # TODO: Make this more dynamic and less hard-coded if possible - printLog(device, 'OD_SCLK:', None) - printLog(device, '0: %sMhz' % (int(odvf.curr_sclk_range.lower_bound / 1000000)), None) - printLog(device, '1: %sMhz' % (int(odvf.curr_sclk_range.upper_bound / 1000000)), None) - printLog(device, 'OD_MCLK:', None) - printLog(device, '1: %sMhz' % (int(odvf.curr_mclk_range.upper_bound / 1000000)), None) - if odvf.num_regions > 0: - printLog(device, 'OD_VDDC_CURVE:', None) - for position in range(3): - printLog(device, '%d: %sMhz %smV' % ( - position, int(list(odvf.curve.vc_points)[position].frequency / 1000000), - int(list(odvf.curve.vc_points)[position].voltage)), None) - if odvf.sclk_freq_limits.lower_bound > 0 or odvf.sclk_freq_limits.upper_bound > 0 \ - or odvf.mclk_freq_limits.lower_bound >0 or odvf.mclk_freq_limits.upper_bound > 0: - printLog(device, 'OD_RANGE:', None) - if odvf.sclk_freq_limits.lower_bound > 0 or odvf.sclk_freq_limits.upper_bound > 0: - printLog(device, 'SCLK: %sMhz %sMhz' % ( - int(odvf.sclk_freq_limits.lower_bound / 1000000), int(odvf.sclk_freq_limits.upper_bound / 1000000)), None) - if odvf.mclk_freq_limits.lower_bound >0 or odvf.mclk_freq_limits.upper_bound > 0: - printLog(device, 'MCLK: %sMhz %sMhz' % ( - int(odvf.mclk_freq_limits.lower_bound / 1000000), int(odvf.mclk_freq_limits.upper_bound / 1000000)), None) - if odvf.num_regions > 0: - for position in range(3): - printLog(device, 'VDDC_CURVE_SCLK[%d]: %sMhz' % ( - position, int(list(odvf.curve.vc_points)[position].frequency / 1000000)), None) - printLog(device, 'VDDC_CURVE_VOLT[%d]: %smV' % ( - position, int(list(odvf.curve.vc_points)[position].voltage)), None) - printLogSpacer() - - def showProduct(deviceList): """ Show the requested product information for a list of devices @@ -2825,7 +2754,7 @@ def showRange(deviceList, rangeType): :param rangeType: [sclk|voltage] Type of range to return """ global RETCODE - if rangeType not in {'sclk', 'mclk', 'voltage'}: + if rangeType not in {'sclk', 'mclk'}: printLog(None, 'Invalid range identifier %s' % (rangeType), None) RETCODE = 1 return @@ -2840,21 +2769,6 @@ def showRange(deviceList, rangeType): if rangeType == 'mclk': printLog(device, 'Valid mclk range: %sMhz - %sMhz' % ( int(odvf.curr_mclk_range.lower_bound / 1000000), int(odvf.curr_mclk_range.upper_bound / 1000000)), None) - if rangeType == 'voltage': - if odvf.num_regions == 0: - printErrLog(device, 'Voltage curve regions unsupported.') - continue - num_regions = c_uint32(odvf.num_regions) - regions = (rsmi_freq_volt_region_t * odvf.num_regions)() - ret = rocmsmi.rsmi_dev_od_volt_curve_regions_get(device, byref(num_regions), byref(regions)) - if rsmi_ret_ok(ret, device, 'volt'): - for i in range(num_regions.value): - printLog(device, - 'Region %d: Valid voltage range: %smV - %smV' % (i, regions[i].volt_range.lower_bound, - regions[i].volt_range.upper_bound), - None) - else: - printLog(device, 'Unable to display %s range' % (rangeType), None) printLogSpacer() @@ -3172,25 +3086,6 @@ def showVoltage(deviceList): printLogSpacer() -def showVoltageCurve(deviceList): - """ Show the voltage curve points for the specified devices - - :param deviceList: List of DRM devices (can be a single-item list) - """ - printLogSpacer(' Voltage Curve Points ') - odvf = rsmi_od_volt_freq_data_t() - for device in deviceList: - ret = rocmsmi.rsmi_dev_od_volt_info_get(device, byref(odvf)) - if rsmi_ret_ok(ret, device, 'get_od_volt_info', silent=False) and odvf.num_regions > 0: - for position in range(3): - printLog(device, 'Voltage point %d: %sMhz %smV' % ( - position, int(list(odvf.curve.vc_points)[position].frequency / 1000000), - int(list(odvf.curve.vc_points)[position].voltage)), None) - else: - printErrLog(device, 'Voltage curve Points unsupported.') - printLogSpacer() - - def showXgmiErr(deviceList): """ Display the XGMI Error status @@ -3844,7 +3739,6 @@ if __name__ == '__main__': groupDisplayTop.add_argument('--showproductname', help='Show product details', action='store_true') groupDisplayTop.add_argument('--showserial', help='Show GPU\'s Serial Number', action='store_true') groupDisplayTop.add_argument('--showuniqueid', help='Show GPU\'s Unique ID', action='store_true') - groupDisplayTop.add_argument('--showvoltagerange', help='Show voltage range', action='store_true') groupDisplayTop.add_argument('--showbus', help='Show PCI bus number', action='store_true') groupDisplayPages.add_argument('--showpagesinfo', help='Show retired, pending and unreservable pages', action='store_true') @@ -3869,8 +3763,6 @@ if __name__ == '__main__': groupDisplay.add_argument('-o', '--showoverdrive', help='Show current GPU Clock OverDrive level', action='store_true') groupDisplay.add_argument('-p', '--showperflevel', help='Show current DPM Performance Level', action='store_true') - groupDisplay.add_argument('-S', '--showclkvolt', help='Show supported GPU and Memory Clocks and Voltages', - action='store_true') groupDisplay.add_argument('-s', '--showclkfrq', help='Show supported GPU and Memory Clock', action='store_true') groupDisplay.add_argument('--showmeminfo', help='Show Memory usage information for given block(s) TYPE', metavar='TYPE', type=str, nargs='+') @@ -3882,7 +3774,6 @@ if __name__ == '__main__': groupDisplay.add_argument('--showrasinfo', help='Show RAS enablement information and error counts for the specified block(s) (all if no arg given)', nargs='*') - groupDisplay.add_argument('--showvc', help='Show voltage curve', action='store_true') groupDisplay.add_argument('--showxgmierr', help='Show XGMI error information since last read', action='store_true') groupDisplay.add_argument('--showtopo', help='Show hardware topology information', action='store_true') groupDisplay.add_argument('--showtopoaccess', help='Shows the link accessibility between GPUs ', action='store_true') @@ -3922,8 +3813,6 @@ if __name__ == '__main__': groupAction.add_argument('--setmlevel', help='Change GPU Memory clock frequency (MHz) and Voltage for (mV) a specific Level', metavar=('MCLKLEVEL', 'MCLK', 'MVOLT'), nargs=3) - groupAction.add_argument('--setvc', help='Change SCLK Voltage Curve (MHz mV) for a specific point', - metavar=('POINT', 'SCLK', 'SVOLT'), nargs=3) groupAction.add_argument('--setsrange', help='Set min and max SCLK speed', metavar=('SCLKMIN', 'SCLKMAX'), nargs=2) groupAction.add_argument('--setextremum', help='Set min/max of SCLK/MCLK speed', metavar=('min|max', "sclk|mclk", 'CLK'), nargs=3) groupAction.add_argument('--setmrange', help='Set min and max MCLK speed', metavar=('MCLKMIN', 'MCLKMAX'), nargs=2) @@ -3993,7 +3882,7 @@ if __name__ == '__main__': or args.resetclocks or args.setprofile or args.resetprofile or args.setoverdrive or args.setmemoverdrive \ or args.setpoweroverdrive or args.resetpoweroverdrive or args.rasenable or args.rasdisable or \ args.rasinject or args.gpureset or args.setperfdeterminism or args.setslevel or args.setmlevel or \ - args.setvc or args.setsrange or args.setextremum or args.setmrange or args.setclock or \ + args.setsrange or args.setextremum or args.setmrange or args.setclock or \ args.setcomputepartition or args.setmemorypartition or args.resetcomputepartition or args.resetmemorypartition: relaunchAsSudo() @@ -4040,7 +3929,6 @@ if __name__ == '__main__': args.showproductname = True args.showserial = True args.showuniqueid = True - args.showvoltagerange = True args.showbus = True args.showpagesinfo = True args.showfan = True @@ -4058,14 +3946,12 @@ if __name__ == '__main__': args.showpids = "summary" args.showpidgpus = [] args.showreplaycount = True - args.showvc = True args.showcomputepartition = True args.showmemorypartition = True if not PRINT_JSON: args.showprofile = True args.showclkfrq = True - args.showclkvolt = True # Don't do reset in combination with any other command if args.gpureset: @@ -4136,8 +4022,6 @@ if __name__ == '__main__': showPids(args.showpids) if args.showpidgpus or str(args.showpidgpus) == '[]': showGpusByPid(args.showpidgpus) - if args.showclkvolt: - showPowerPlayTable(deviceList) if args.showvoltage: showVoltage(deviceList) if args.showbus: @@ -4181,10 +4065,6 @@ if __name__ == '__main__': showRange(deviceList, 'sclk') if args.showmclkrange: showRange(deviceList, 'mclk') - if args.showvoltagerange: - showRange(deviceList, 'voltage') - if args.showvc: - showVoltageCurve(deviceList) if args.showenergycounter: showEnergy(deviceList) if args.showcomputepartition: @@ -4221,8 +4101,6 @@ if __name__ == '__main__': resetPowerOverDrive(deviceList, args.autorespond) if args.setprofile: setProfile(deviceList, args.setprofile) - if args.setvc: - setVoltageCurve(deviceList, args.setvc[0], args.setvc[1], args.setvc[2], args.autorespond) if args.setextremum: setClockExtremum(deviceList, args.setextremum[0], args.setextremum[1], args.setextremum[2], args.autorespond) if args.setsrange: diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index fb38823d13..1b96059ccd 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -1262,18 +1262,6 @@ For the new format, GFXCLK field will show min and max values(0/1). If the curre frequency in neither min/max but lies within the range, this is indicated by an additional value followed by * at index 1 and max value at index 2. */ -constexpr uint32_t kOD_SCLK_label_array_index = 0; -constexpr uint32_t kOD_MCLK_label_array_index = - kOD_SCLK_label_array_index + 3; - -constexpr uint32_t kOD_VDDC_CURVE_label_array_index = - kOD_MCLK_label_array_index + 2; -constexpr uint32_t kOD_OD_RANGE_label_array_index = - kOD_VDDC_CURVE_label_array_index + 4; -constexpr uint32_t kOD_VDDC_CURVE_start_index = - kOD_OD_RANGE_label_array_index + 3; -// constexpr uint32_t kOD_VDDC_CURVE_num_lines = -// kOD_VDDC_CURVE_start_index + 4; constexpr uint32_t kMIN_VALID_LINES = 2; static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, @@ -1298,62 +1286,75 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, return RSMI_STATUS_NOT_YET_IMPLEMENTED; } - assert(val_vec[kOD_SCLK_label_array_index] == "OD_SCLK:" || - val_vec[kOD_SCLK_label_array_index] == "GFXCLK:"); - if ((val_vec[kOD_SCLK_label_array_index] != "OD_SCLK:") && - (val_vec[kOD_SCLK_label_array_index] != "GFXCLK:")) { - return RSMI_STATUS_UNEXPECTED_DATA; + // + const std::string kTAG_OD_SCLK{"OD_SCLK:"}; + const std::string kTAG_GFXCLK{"GFXCLK:"}; + const std::string KTAG_OD_MCLK{"OD_MCLK:"}; + const std::string KTAG_MCLK{"MCLK:"}; + const std::string KTAG_FIRST_FREQ_IDX{"0:"}; + amd::smi::TextFileTagContents_t txt_power_dev_od_voltage(val_vec); + txt_power_dev_od_voltage + .set_title_terminator(":", amd::smi::TagSplitterPositional_t::kLAST) + .set_key_data_splitter(":", amd::smi::TagSplitterPositional_t::kBETWEEN) + .structure_content(); + + // + // Note: We must have minimum of 'GFXCLK:' && 'MCLK:' OR: + // 'OD_SCLK:' && 'OD_MCLK:' tags. + if (txt_power_dev_od_voltage.get_title_size() < kMIN_VALID_LINES) { + return rsmi_status_t::RSMI_STATUS_NO_DATA; } - - // find last_item but skip empty lines - int last_item = val_vec.size()-1; - while (val_vec[last_item].empty() || val_vec[last_item][0] == 0) - last_item--; - - p->curr_sclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 1); - p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 2); - - if (val_vec.size() < (kOD_MCLK_label_array_index + 1)) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } - // The condition below checks if it is the old style or new style format. - if (val_vec[kOD_MCLK_label_array_index] == "OD_MCLK:") { - p->curr_mclk_range.lower_bound = 0; - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 1); - } else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") { - p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 1); - // the upper memory frequency is the last - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, last_item); - return RSMI_STATUS_SUCCESS; - } else { - if (val_vec.size() < (kOD_MCLK_label_array_index + 3)) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } - if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") { - p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_SCLK_label_array_index + 3); - p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 2); - // the upper memory frequency is the last - p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, last_item); - return RSMI_STATUS_SUCCESS; - } - return RSMI_STATUS_NOT_YET_IMPLEMENTED; + // Note: For debug builds/purposes only. + assert(txt_power_dev_od_voltage.contains_title_key(kTAG_GFXCLK) || + txt_power_dev_od_voltage.contains_title_key(kTAG_OD_SCLK)); + // Note: For release builds/purposes. + if (!txt_power_dev_od_voltage.contains_title_key(kTAG_GFXCLK) && + !txt_power_dev_od_voltage.contains_title_key(kTAG_OD_SCLK)) { + return rsmi_status_t::RSMI_STATUS_UNEXPECTED_DATA; } - if (val_vec.size() < kOD_VDDC_CURVE_label_array_index) { - return RSMI_STATUS_UNEXPECTED_SIZE; - } + // Note: Quick helpers for getting 1st and last elements found + auto build_lower_bound = [&](const std::string& prim_key) { + auto lower_bound_data = txt_power_dev_od_voltage.get_structured_data_subkey_first(prim_key); + return std::vector{lower_bound_data}; + }; - p->num_regions = - static_cast((val_vec.size()) / 2); + auto build_upper_bound = [&](const std::string& prim_key) { + auto upper_bound_data = txt_power_dev_od_voltage.get_structured_data_subkey_last(prim_key); + return std::vector{upper_bound_data}; + }; + + // Validates 'OD_SCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(kTAG_OD_SCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_sclk_range.lower_bound = freq_string_to_int(build_lower_bound(kTAG_OD_SCLK), nullptr, nullptr, 0); + p->curr_sclk_range.upper_bound = freq_string_to_int(build_upper_bound(kTAG_OD_SCLK), nullptr, nullptr, 0); + + // Validates 'OD_MCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_OD_MCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_mclk_range.lower_bound = freq_string_to_int(build_lower_bound(KTAG_OD_MCLK), nullptr, nullptr, 0); + p->curr_mclk_range.upper_bound = freq_string_to_int(build_upper_bound(KTAG_OD_MCLK), nullptr, nullptr, 0); + } + } + // Validates 'GFXCLK' is in the structure + else if (txt_power_dev_od_voltage.contains_structured_key(kTAG_GFXCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_sclk_range.lower_bound = freq_string_to_int(build_lower_bound(kTAG_GFXCLK), nullptr, nullptr, 0); + p->curr_sclk_range.upper_bound = freq_string_to_int(build_upper_bound(kTAG_GFXCLK), nullptr, nullptr, 0); + + // Validates 'MCLK' is in the structure + if (txt_power_dev_od_voltage.contains_structured_key(KTAG_MCLK, + KTAG_FIRST_FREQ_IDX)) { + p->curr_mclk_range.lower_bound = freq_string_to_int(build_lower_bound(KTAG_MCLK), nullptr, nullptr, 0); + p->curr_mclk_range.upper_bound = freq_string_to_int(build_upper_bound(KTAG_MCLK), nullptr, nullptr, 0); + } + } + else { + return RSMI_STATUS_NOT_YET_IMPLEMENTED; + } + p->num_regions = 0; return RSMI_STATUS_SUCCESS; CATCH @@ -1561,30 +1562,6 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, } -static void get_vc_region(uint32_t start_ind, - std::vector *val_vec, rsmi_freq_volt_region_t *p) { - std::ostringstream ss; - ss << __PRETTY_FUNCTION__ << " | ======= start ======="; - LOG_TRACE(ss); - assert(p != nullptr); - assert(val_vec != nullptr); - THROW_IF_NULLPTR_DEREF(p) - THROW_IF_NULLPTR_DEREF(val_vec) - - // There must be at least 1 region to read in - assert(val_vec->size() >= kOD_OD_RANGE_label_array_index + 2); - assert((*val_vec)[kOD_OD_RANGE_label_array_index] == "OD_RANGE:"); - if ((val_vec->size() < kOD_OD_RANGE_label_array_index + 2) || - ((*val_vec)[kOD_OD_RANGE_label_array_index] != "OD_RANGE:") ) { - ss << __PRETTY_FUNCTION__ << " | ======= end ======= | returning " - << getRSMIStatusString(RSMI_STATUS_UNEXPECTED_DATA); - LOG_TRACE(ss); - throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__); - } - od_value_pair_str_to_range((*val_vec)[start_ind], &p->freq_range); - od_value_pair_str_to_range((*val_vec)[start_ind + 1], &p->volt_range); -} - /* * num_regions [inout] on calling, the number of regions requested to be read * in. At completion, the number of regions actually read in @@ -1616,23 +1593,20 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind, // This is a work-around to handle systems where kDevPowerODVoltage is not // fully supported yet. - if (val_vec.size() < 2) { + if (val_vec.size() < kMIN_VALID_LINES) { ss << __PRETTY_FUNCTION__ - << " | Issue: val_vec.size() < 2" << "; returning " + << " | Issue: val_vec.size() < " << kMIN_VALID_LINES << "; returning " << getRSMIStatusString(RSMI_STATUS_NOT_YET_IMPLEMENTED); LOG_ERROR(ss); return RSMI_STATUS_NOT_YET_IMPLEMENTED; } uint32_t val_vec_size = static_cast(val_vec.size()); - assert((val_vec_size - kOD_VDDC_CURVE_start_index) > 0); - ss << __PRETTY_FUNCTION__ << " | val_vec_size = " << std::dec - << val_vec_size - << " | kOD_VDDC_CURVE_start_index = " << kOD_VDDC_CURVE_start_index; + << val_vec_size; LOG_DEBUG(ss); - *num_regions = std::min((val_vec_size) / 2, *num_regions); + *num_regions = 0; return RSMI_STATUS_SUCCESS; CATCH diff --git a/projects/rocm-smi-lib/src/rocm_smi_utils.cc b/projects/rocm-smi-lib/src/rocm_smi_utils.cc index 6eeffe2863..8f13606e87 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_utils.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_utils.cc @@ -1098,14 +1098,6 @@ std::string print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv) { ss << pt_rng_Mhz("\t**Current SCLK frequency range: ", &odv->curr_sclk_range); ss << pt_rng_Mhz("\t**Current MCLK frequency range: ", &odv->curr_mclk_range); - ss << pt_rng_Mhz("\t**Min/Max Possible SCLK frequency range: ", - &odv->sclk_freq_limits); - ss << pt_rng_Mhz("\t**Min/Max Possible MCLK frequency range: ", - &odv->mclk_freq_limits); - - ss << "\t**Current Freq/Volt. curve: " << "\n"; - ss << pt_vddc_curve(&odv->curve); - ss << "\t**Number of Freq./Volt. regions: " << odv->num_regions << "\n\n"; return ss.str(); } @@ -1203,5 +1195,6 @@ std::queue getAllDeviceGfxVers() { return deviceGfxVersions; } + } // namespace smi } // namespace amd diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc index 026182eba0..348b169d97 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/mutual_exclusion.cc @@ -186,10 +186,10 @@ void TestMutualExclusion::Run(void) { int64_t dmy_i64 = 0; char dmy_str[10]; rsmi_dev_perf_level_t dmy_perf_lvl; - rsmi_frequencies_t dmy_freqs; - rsmi_od_volt_freq_data_t dmy_od_volt; - rsmi_freq_volt_region_t dmy_vlt_reg; - rsmi_error_count_t dmy_err_cnt; + rsmi_frequencies_t dmy_freqs{}; + rsmi_od_volt_freq_data_t dmy_od_volt{}; + rsmi_freq_volt_region_t dmy_vlt_reg{}; + rsmi_error_count_t dmy_err_cnt{}; rsmi_ras_err_state_t dmy_ras_err_st; // This can be replaced with ASSERT_EQ() once env. stabilizes diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/volt_freq_curv_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/volt_freq_curv_read.cc index 50b6ac057c..360ad7e8b9 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/volt_freq_curv_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/volt_freq_curv_read.cc @@ -87,7 +87,7 @@ void TestVoltCurvRead::Close() { void TestVoltCurvRead::Run(void) { rsmi_status_t err, ret; - rsmi_od_volt_freq_data_t odv; + rsmi_od_volt_freq_data_t odv{}; rsmi_dev_perf_level_t pfl; TestBase::Run(); @@ -134,9 +134,6 @@ void TestVoltCurvRead::Run(void) { IF_VERB(STANDARD) { std::cout << "\t**rsmi_dev_od_volt_info_get(i, nullptr): " << amd::smi::getRSMIStatusString(err, false) << "\n"; - // << "\n" - // << amd::smi::print_rsmi_od_volt_freq_data_t(&odv) - // << "\n"; } ASSERT_TRUE(err == RSMI_STATUS_INVALID_ARGS); err = rsmi_dev_od_volt_info_get(i, &odv); @@ -147,43 +144,5 @@ void TestVoltCurvRead::Run(void) { << "\t**odv.num_regions = " << std::dec << odv.num_regions << "\n"; } - if (err == RSMI_STATUS_SUCCESS) { - std::cout << "\t**Frequency-voltage curve data:" << "\n"; - std::cout << amd::smi::print_rsmi_od_volt_freq_data_t(&odv); - - rsmi_freq_volt_region_t *regions; - uint32_t num_regions; - regions = new rsmi_freq_volt_region_t[odv.num_regions]; - ASSERT_TRUE(regions != nullptr); - - num_regions = odv.num_regions; - err = rsmi_dev_od_volt_curve_regions_get(i, &num_regions, regions); - IF_VERB(STANDARD) { - std::cout << "\t**rsmi_dev_od_volt_curve_regions_get(" - << "i, &num_regions, regions): " - << amd::smi::getRSMIStatusString(err, false) << "\n" - << "\t**Number of regions: " << std::dec << num_regions - << "\n"; - } - ASSERT_TRUE(err == RSMI_STATUS_SUCCESS - || err == RSMI_STATUS_NOT_SUPPORTED - || err == RSMI_STATUS_UNEXPECTED_DATA - || err == RSMI_STATUS_UNEXPECTED_SIZE); - if (err != RSMI_STATUS_SUCCESS) { - IF_VERB(STANDARD) { - std::cout << "\t**rsmi_dev_od_volt_curve_regions_get: " - "Not supported on this machine" << std::endl; - } - continue; - } - CHK_ERR_ASRT(err) - ASSERT_TRUE(num_regions == odv.num_regions); - - std::cout << "\t**Frequency-voltage curve regions:" << std::endl; - std::cout << amd::smi::print_rsmi_od_volt_freq_regions(num_regions, - regions); - - delete []regions; - } } }