[SWDEV-484382] Fix VCLK/DCLK outputs for monitor, static, metric
Units were off and VCLK/DCLK outputs were not coming in
properly through amdsmi_get_clk_freq()
Now we match units sent back through rsmi_dev_gpu_clk_freq_get (MHz).
CLI now shows maximum of 2 VCLK/DCLKs otherwise shows N/A if there
is no current_freq listed.
Change-Id: I8a7b66cbb5263e8d396f8568c104e1ce3512923d
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/amdsmi commit: 3226a1d0ea]
Этот коммит содержится в:
коммит произвёл
Arif, Maisam
родитель
9d5eada975
Коммит
bf4bbef085
@@ -846,9 +846,14 @@ class AMDSMICommands():
|
||||
logging.debug("Failed to get cache info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
static_dict['cache_info'] = cache_info_list
|
||||
if 'clock' in current_platform_args:
|
||||
if isinstance(args.clock, bool) and args.clock == True:
|
||||
|
||||
# default to printing all clocks, if in current_platform_args; otherwise print specific clocks
|
||||
if ((args.clock == True or isinstance(args.clock, list))
|
||||
and 'clock' in current_platform_args):
|
||||
original_clock_args = args.clock #save original args.clock value, so we can reset for multiple devices
|
||||
if isinstance(args.clock, bool):
|
||||
args.clock = ['sys', 'mem', 'df', 'soc', 'dcef', 'vclk0', 'vclk1', 'dclk0', 'dclk1']
|
||||
|
||||
if isinstance(args.clock, list):
|
||||
# remove potential duplicates from list
|
||||
args.clock = list(set(args.clock))
|
||||
@@ -888,9 +893,15 @@ class AMDSMICommands():
|
||||
freq_dict = {}
|
||||
freq_dict.update({'current level':frequencies['current']})
|
||||
freq_dict.update({'frequency_levels':{}})
|
||||
for level in range(len(frequencies['frequency'])):
|
||||
freq = str(self.helpers.convert_SI_unit(frequencies['frequency'][level], AMDSMIHelpers.SI_Unit.MICRO)) + " MHz"
|
||||
freq_dict['frequency_levels'].update({level:freq})
|
||||
if frequencies["num_supported"] != 0:
|
||||
for level in range(len(frequencies['frequency'])):
|
||||
if frequencies['frequency'][level] != "N/A":
|
||||
freq = str(self.helpers.convert_SI_unit(frequencies['frequency'][level], AMDSMIHelpers.SI_Unit.MICRO)) + " MHz"
|
||||
freq_dict['frequency_levels'].update({level:freq})
|
||||
else:
|
||||
freq_dict['frequency_levels'].update("N/A")
|
||||
else:
|
||||
freq_dict = "N/A"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
freq_dict = "N/A"
|
||||
clk_dict.update({clk:freq_dict})
|
||||
@@ -898,6 +909,9 @@ class AMDSMICommands():
|
||||
static_dict['clock'] = clk_dict
|
||||
else:
|
||||
raise amdsmi_exception.AmdSmiParameterException(args.clock, list[str])
|
||||
# if original_clock_args is a boolean, set it back to the original value
|
||||
if isinstance(original_clock_args, bool):
|
||||
args.clock = original_clock_args
|
||||
|
||||
# Convert and store output by pid for csv format
|
||||
multiple_devices_csv_override = False
|
||||
@@ -1678,7 +1692,8 @@ class AMDSMICommands():
|
||||
"clk_locked" : "N/A",
|
||||
"deep_sleep" : "N/A"}
|
||||
|
||||
for clock_index in range(amdsmi_interface.AMDSMI_MAX_NUM_CLKS):
|
||||
kMAX_NUM_VCLKS = 2
|
||||
for clock_index in range(kMAX_NUM_VCLKS):
|
||||
vclk_index = f"vclk_{clock_index}"
|
||||
clocks[vclk_index] = {"clk" : "N/A",
|
||||
"min_clk" : "N/A",
|
||||
@@ -1686,7 +1701,8 @@ class AMDSMICommands():
|
||||
"clk_locked" : "N/A",
|
||||
"deep_sleep" : "N/A"}
|
||||
|
||||
for clock_index in range(amdsmi_interface.AMDSMI_MAX_NUM_CLKS):
|
||||
kMAX_NUM_DCLKS = 2
|
||||
for clock_index in range(kMAX_NUM_DCLKS):
|
||||
dclk_index = f"dclk_{clock_index}"
|
||||
clocks[dclk_index] = {"clk" : "N/A",
|
||||
"min_clk" : "N/A",
|
||||
@@ -1851,34 +1867,43 @@ class AMDSMICommands():
|
||||
logging.debug("Failed to get mem clock info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
# VCLK & DCLK min and max clocks
|
||||
try:
|
||||
vclk0_clock_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu,
|
||||
amdsmi_interface.AmdSmiClkType.VCLK0)
|
||||
for clock_index in range(kMAX_NUM_DCLKS):
|
||||
vclk_index = f"vclk_{clock_index}"
|
||||
dclk_index = f"dclk_{clock_index}"
|
||||
vclk_clock_info_dict = {"min_clk": "N/A", "max_clk": "N/A"}
|
||||
dclk_clock_info_dict = {"min_clk": "N/A", "max_clk": "N/A"}
|
||||
if clock_index == 0:
|
||||
try:
|
||||
vclk_clock_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu,
|
||||
amdsmi_interface.AmdSmiClkType.VCLK0)
|
||||
dclk_clock_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu,
|
||||
amdsmi_interface.AmdSmiClkType.DCLK0)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
logging.debug("Failed to get vclk0 and/or dclk0 clock info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
if clock_index == 1:
|
||||
try:
|
||||
vclk_clock_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu,
|
||||
amdsmi_interface.AmdSmiClkType.VCLK1)
|
||||
dclk_clock_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu,
|
||||
amdsmi_interface.AmdSmiClkType.DCLK1)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
logging.debug("Failed to get vclk1 and/or dclk1 clock info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
dclk0_clock_info_dict = amdsmi_interface.amdsmi_get_clock_info(args.gpu,
|
||||
amdsmi_interface.AmdSmiClkType.DCLK0)
|
||||
|
||||
for clock_index in range(amdsmi_interface.AMDSMI_MAX_NUM_CLKS):
|
||||
vclk_index = f"vclk_{clock_index}"
|
||||
# if the current clock is N/A then we shouldn't populate the max and min values
|
||||
if clocks[vclk_index]["clk"] != "N/A":
|
||||
clocks[vclk_index]["min_clk"] = self.helpers.unit_format(self.logger,
|
||||
vclk0_clock_info_dict["min_clk"],
|
||||
clock_unit)
|
||||
clocks[vclk_index]["max_clk"] = self.helpers.unit_format(self.logger,
|
||||
vclk0_clock_info_dict["max_clk"],
|
||||
clock_unit)
|
||||
|
||||
dclk_index = f"dclk_{clock_index}"
|
||||
if clocks[dclk_index]["clk"] != "N/A":
|
||||
clocks[dclk_index]["min_clk"] = self.helpers.unit_format(self.logger,
|
||||
dclk0_clock_info_dict["min_clk"],
|
||||
clock_unit)
|
||||
clocks[dclk_index]["max_clk"] = self.helpers.unit_format(self.logger,
|
||||
dclk0_clock_info_dict["max_clk"],
|
||||
clock_unit)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
logging.debug("Failed to get vclk and/or dclk clock info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
# if the current clock is N/A then we shouldn't populate the max and min values
|
||||
if (vclk_clock_info_dict["min_clk"] != "N/A" or vclk_clock_info_dict["max_clk"] != "N/A") and clock_index == 0:
|
||||
clocks[vclk_index]["min_clk"] = self.helpers.unit_format(self.logger,
|
||||
vclk_clock_info_dict["min_clk"],
|
||||
clock_unit)
|
||||
clocks[vclk_index]["max_clk"] = self.helpers.unit_format(self.logger,
|
||||
vclk_clock_info_dict["max_clk"],
|
||||
clock_unit)
|
||||
if (dclk_clock_info_dict["min_clk"] != "N/A" or dclk_clock_info_dict["max_clk"] != "N/A") and clock_index == 1:
|
||||
clocks[dclk_index]["min_clk"] = self.helpers.unit_format(self.logger,
|
||||
dclk_clock_info_dict["min_clk"],
|
||||
clock_unit)
|
||||
clocks[dclk_index]["max_clk"] = self.helpers.unit_format(self.logger,
|
||||
dclk_clock_info_dict["max_clk"],
|
||||
clock_unit)
|
||||
|
||||
# FCLK min and max clocks
|
||||
try:
|
||||
@@ -5145,7 +5170,7 @@ class AMDSMICommands():
|
||||
monitor_values['vclock'] = "N/A"
|
||||
logging.debug("Failed to get dclock on gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
self.logger.table_header += 'VCLOCK'.rjust(8)
|
||||
self.logger.table_header += 'VCLOCK'.rjust(10)
|
||||
|
||||
try:
|
||||
dclock = amdsmi_interface.amdsmi_get_gpu_metrics_info(args.gpu)['current_dclk0']
|
||||
@@ -5162,7 +5187,7 @@ class AMDSMICommands():
|
||||
monitor_values['dclock'] = "N/A"
|
||||
logging.debug("Failed to get vclock on gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
self.logger.table_header += 'DCLOCK'.rjust(8)
|
||||
self.logger.table_header += 'DCLOCK'.rjust(10)
|
||||
|
||||
if args.ecc:
|
||||
try:
|
||||
|
||||
@@ -120,7 +120,7 @@ class AMDSMILogger():
|
||||
elif key in ('gfx_clock', 'mem_clock', 'vram_used'):
|
||||
table_values += string_value.rjust(11)
|
||||
elif key in ('vclock', 'dclock'):
|
||||
table_values += string_value.rjust(8)
|
||||
table_values += string_value.rjust(10)
|
||||
elif key == 'vram_total' or 'ecc' in key or key == 'pcie_bw':
|
||||
table_values += string_value.rjust(12)
|
||||
elif key in ['pcie_replay']:
|
||||
|
||||
@@ -3688,11 +3688,12 @@ def amdsmi_get_clk_freq(
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
dict_ret = {
|
||||
"num_supported": freq.num_supported,
|
||||
"current": freq.current,
|
||||
"frequency": list(freq.frequency)[: freq.num_supported],
|
||||
}
|
||||
return dict_ret
|
||||
|
||||
|
||||
def amdsmi_get_soc_pstate(
|
||||
|
||||
@@ -124,6 +124,7 @@ std::string removeString(const std::string origStr,
|
||||
const std::string &removeMe);
|
||||
void system_wait(int milli_seconds);
|
||||
int countDigit(uint64_t n);
|
||||
uint64_t get_multiplier_from_str(char units_char);
|
||||
template <typename T>
|
||||
std::string print_int_as_hex(T i, bool showHexNotation = true,
|
||||
int overloadBitSize = 0) {
|
||||
|
||||
@@ -1212,7 +1212,7 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
memset(f, 0, sizeof(rsmi_frequencies_t));
|
||||
f->current=0;
|
||||
f->current = 0;
|
||||
|
||||
ret = GetDevValueVec(type, dv_ind, &val_vec);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
|
||||
@@ -1280,5 +1280,35 @@ int countDigit(uint64_t n) {
|
||||
return static_cast<int>(std::floor(log10(static_cast<double>(n)) + 1));
|
||||
}
|
||||
|
||||
uint64_t get_multiplier_from_str(char units_char) {
|
||||
uint32_t multiplier = 0;
|
||||
|
||||
switch (units_char) {
|
||||
case 'G': // GT or GHz
|
||||
multiplier = 1000000000;
|
||||
break;
|
||||
|
||||
case 'M': // MT or MHz
|
||||
multiplier = 1000000;
|
||||
break;
|
||||
|
||||
case 'K': // KT or KHz
|
||||
case 'V': // default unit for voltage is mV
|
||||
multiplier = 1000;
|
||||
break;
|
||||
|
||||
case 'T': // Transactions
|
||||
case 'H': // Hertz
|
||||
case 'm': // mV (we will make mV the default unit for voltage)
|
||||
multiplier = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false); // Unexpected units for frequency
|
||||
throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__);
|
||||
}
|
||||
return multiplier;
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
@@ -1868,6 +1868,9 @@ amdsmi_status_t amdsmi_get_clk_freq(amdsmi_processor_handle processor_handle,
|
||||
clk_type == AMDSMI_CLK_TYPE_VCLK1 ||
|
||||
clk_type == AMDSMI_CLK_TYPE_DCLK0 ||
|
||||
clk_type == AMDSMI_CLK_TYPE_DCLK1 ) {
|
||||
// Default unit is MHz
|
||||
char unit = 'M';
|
||||
|
||||
// when f == nullptr -> check if metrics are supported
|
||||
amdsmi_gpu_metrics_t metric_info;
|
||||
amdsmi_gpu_metrics_t * metric_info_p = nullptr;
|
||||
@@ -1882,22 +1885,42 @@ amdsmi_status_t amdsmi_get_clk_freq(amdsmi_processor_handle processor_handle,
|
||||
if (r_status != AMDSMI_STATUS_SUCCESS)
|
||||
return r_status;
|
||||
|
||||
f->num_supported = 1;
|
||||
f->num_supported = 0;
|
||||
if (clk_type == AMDSMI_CLK_TYPE_VCLK0) {
|
||||
f->current = metric_info_p->current_vclk0;
|
||||
f->frequency[0] = metric_info_p->average_vclk0_frequency;
|
||||
f->current = 0;
|
||||
f->frequency[0] = std::numeric_limits<uint64_t>::max();
|
||||
if (metric_info_p->current_vclk0 != std::numeric_limits<uint16_t>::max()) {
|
||||
f->frequency[0] = static_cast<uint64_t>(metric_info_p->current_vclk0)
|
||||
* amd::smi::get_multiplier_from_str(unit); // match MHz ROCm SMI provides
|
||||
f->num_supported = 1;
|
||||
}
|
||||
}
|
||||
if (clk_type == AMDSMI_CLK_TYPE_VCLK1) {
|
||||
f->current = metric_info_p->current_vclk1;
|
||||
f->frequency[0] = metric_info_p->average_vclk1_frequency;
|
||||
f->current = 0;
|
||||
f->frequency[0] = std::numeric_limits<uint64_t>::max();
|
||||
if (metric_info_p->current_vclk1 != std::numeric_limits<uint16_t>::max()) {
|
||||
f->frequency[0] = static_cast<uint64_t>(metric_info_p->current_vclk1)
|
||||
* amd::smi::get_multiplier_from_str(unit); // match MHz ROCm SMI provides
|
||||
f->num_supported = 1;
|
||||
}
|
||||
}
|
||||
if (clk_type == AMDSMI_CLK_TYPE_DCLK0) {
|
||||
f->current = metric_info_p->current_dclk0;
|
||||
f->frequency[0] = metric_info_p->average_dclk0_frequency;
|
||||
f->current = 0;
|
||||
f->frequency[0] = std::numeric_limits<uint64_t>::max();
|
||||
if (metric_info_p->current_dclk0 != std::numeric_limits<uint16_t>::max()) {
|
||||
f->frequency[0] = static_cast<uint64_t>(metric_info_p->current_dclk0)
|
||||
* amd::smi::get_multiplier_from_str(unit); // match MHz ROCm SMI provides
|
||||
f->num_supported = 1;
|
||||
}
|
||||
}
|
||||
if (clk_type == AMDSMI_CLK_TYPE_DCLK1) {
|
||||
f->current = metric_info_p->current_dclk1;
|
||||
f->frequency[0] = metric_info_p->average_dclk1_frequency;
|
||||
f->current = 0;
|
||||
f->frequency[0] = std::numeric_limits<uint64_t>::max();
|
||||
if (metric_info_p->current_dclk1 != std::numeric_limits<uint16_t>::max()) {
|
||||
f->frequency[0] = static_cast<uint64_t>(metric_info_p->current_dclk1)
|
||||
* amd::smi::get_multiplier_from_str(unit); // match MHz ROCm SMI provides
|
||||
f->num_supported = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return r_status;
|
||||
|
||||
Ссылка в новой задаче
Block a user