Update get_frequencies to handle failures.

Show an optional debug log (RSMI_DEBUG_BITFIELD=2) to
the user in the following scenarios:
1. If more than one current frequency is found
2. If frequencies are not read in increasing order of
   their value
If current frequency is not available, index for it is
set to -1, values will not have * next to it in the
output. This will also be handled in rocm_smi.py.

Signed-off-by: Divya Shikre <DivyaUday.Shikre@amd.com>
Change-Id: I477ec065f7513c8045d6392f12ef6cb835a6b8f6


[ROCm/amdsmi commit: afe996c2ed]
Этот коммит содержится в:
Divya Shikre
2022-05-03 18:41:45 -04:00
коммит произвёл Divya Uday Shikre
родитель 3cbd1652de
Коммит f4e33b90c9
3 изменённых файлов: 45 добавлений и 9 удалений
+4 -1
Просмотреть файл
@@ -2269,11 +2269,14 @@ rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od);
*
* @param[inout] f a pointer to a caller provided ::rsmi_frequencies_t structure
* to which the frequency information will be written. Frequency values are in
* Hz.
* Hz.
* If this parameter is nullptr, this function will return
* ::RSMI_STATUS_INVALID_ARGS if the function is supported with the provided,
* arguments and ::RSMI_STATUS_NOT_SUPPORTED if it is not supported with the
* provided arguments.
* If multiple current frequencies are found, a warning is shown. If no
* current frequency is found, it is reflected as -1. If frequencies are not
* read from low to high a warning is shown as well.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
+9
Просмотреть файл
@@ -1453,6 +1453,9 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
ret = rocmsmi.rsmi_dev_gpu_clk_freq_get(device, rsmi_clk_names_dict[clk_defined], byref(freq))
if rsmi_ret_ok(ret, device, clk_defined, True):
levl = freq.current
if levl >= freq.num_supported:
printLog(device, '%s current clock frequency not found' % (clk_defined), None)
continue
fr = freq.frequency[levl] / 1000000
if concise: # in case function is used for concise output, no need to print.
return '{:.0f}Mhz'.format(fr)
@@ -1466,6 +1469,9 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
ret = rocmsmi.rsmi_dev_gpu_clk_freq_get(device, rsmi_clk_names_dict[clk_type], byref(freq))
if rsmi_ret_ok(ret, device, clk_type, True):
levl = freq.current
if levl >= freq.num_supported:
printLog(device, '%s current clock frequency not found' % (clk_type), None)
continue
fr = freq.frequency[levl] / 1000000
if PRINT_JSON:
printLog(device, '%s clock speed:' % (clk_type), '(%sMhz)' % (str(fr)[:-2]))
@@ -1479,6 +1485,9 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
ret = rocmsmi.rsmi_dev_pci_bandwidth_get(device, byref(bw))
if rsmi_ret_ok(ret, device, 'PCIe', True):
current_f = bw.transfer_rate.current
if current_f >= bw.transfer_rate.num_supported:
printLog(device, 'PCIe current clock frequency not found', None )
continue
fr = '{:.1f}GT/s x{}'.format(bw.transfer_rate.frequency[current_f] / 1000000000,
bw.lanes[current_f])
printLog(device, 'pcie clock level', '{} ({})'.format(current_f, fr))
+32 -8
Просмотреть файл
@@ -77,6 +77,15 @@
static const uint32_t kMaxOverdriveLevel = 20;
static const float kEnergyCounterResolution = 15.3f;
std::map<rsmi_clk_type_t, std::string> ClkStateMap = {
{RSMI_CLK_TYPE_SYS, "SCLK"},
{RSMI_CLK_TYPE_DF, "DFCLK"},
{RSMI_CLK_TYPE_DCEF, "DCEFCLK"},
{RSMI_CLK_TYPE_SOC, "SOCCLK"},
{RSMI_CLK_TYPE_MEM, "MCLK"},
{RSMI_CLK_TYPE_PCIE, "PCIECLK"},
};
#define TRY try {
#define CATCH } catch (...) {return amd::smi::handleException();}
@@ -895,7 +904,7 @@ rsmi_dev_perf_level_set_v1(uint32_t dv_ind, rsmi_dev_perf_level_t perf_level) {
}
static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type,
static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_t clk_type,
uint32_t dv_ind, rsmi_frequencies_t *f, uint32_t *lanes = nullptr) {
TRY
std::vector<std::string> val_vec;
@@ -925,19 +934,34 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type,
// Our assumption is that frequencies are read in from lowest to highest.
// Check that that is true.
if (i > 0) {
assert(f->frequency[i-1] <= f->frequency[i]);
if (f->frequency[i] < f->frequency[i-1]) {
std::string sysvalue = ClkStateMap[clk_type];
sysvalue += " Current Value";
sysvalue += ' ' + std::to_string(f->frequency[i]);
sysvalue += " Previous Value";
sysvalue += ' ' + std::to_string(f->frequency[i-1]);
DEBUG_LOG("Frequencies are not read from lowest to highest. ", sysvalue);
}
}
if (current) {
// Should only be 1 current frequency
assert(f->current == RSMI_MAX_NUM_FREQUENCIES + 1);
f->current = i;
// set the current frequency
if (f->current != RSMI_MAX_NUM_FREQUENCIES + 1) {
std::string sysvalue = ClkStateMap[clk_type];
sysvalue += " Current Value";
sysvalue += ' ' + std::to_string(f->frequency[i]);
sysvalue += " Previous Value";
sysvalue += ' ' + std::to_string(f->frequency[f->current]);
DEBUG_LOG("More than one current clock. ", sysvalue);
}
else
f->current = i;
}
}
// Some older drivers will not have the current frequency set
// assert(f->current < f->num_supported);
if (f->current >= f->num_supported) {
return RSMI_STATUS_NOT_SUPPORTED;
f->current = -1;
}
return RSMI_STATUS_SUCCESS;
@@ -1444,7 +1468,7 @@ rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type,
DEVICE_MUTEX
return get_frequencies(dev_type, dv_ind, f);
return get_frequencies(dev_type, clk_type, dv_ind, f);
CATCH
}
@@ -2006,7 +2030,7 @@ rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *b) {
DEVICE_MUTEX
return get_frequencies(amd::smi::kDevPCIEClk, dv_ind,
return get_frequencies(amd::smi::kDevPCIEClk, RSMI_CLK_TYPE_PCIE, dv_ind,
&b->transfer_rate, b->lanes);
CATCH