From 759d14709d11d1032920f81acb65c69982939a3e Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Tue, 4 Apr 2023 12:46:38 -0500 Subject: [PATCH] Validate the clock frequency when set it Add the check of the clock frequency when set it. Change-Id: I707291bfb5007bb69100c780af50a4b0f697bb37 [ROCm/amdsmi commit: b6789891b05e9097fd2c197bb77a77ae6b8e3d73] --- projects/amdsmi/python_smi_tools/rocm_smi.py | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/projects/amdsmi/python_smi_tools/rocm_smi.py b/projects/amdsmi/python_smi_tools/rocm_smi.py index d682ab4490..665ba2feb3 100755 --- a/projects/amdsmi/python_smi_tools/rocm_smi.py +++ b/projects/amdsmi/python_smi_tools/rocm_smi.py @@ -1076,6 +1076,19 @@ def setClocks(deviceList, clktype, clk): RETCODE = 1 return if clktype != 'pcie': + # Validate frequency bitmask + freq = rsmi_frequencies_t() + ret = rocmsmi.rsmi_dev_gpu_clk_freq_get(device, rsmi_clk_names_dict[clktype], byref(freq)) + if rsmi_ret_ok(ret, device, clktype) == False: + RETCODE = 1 + return + # The freq_bitmask should be less than 2^(freqs.num_supported) + # For example, num_supported == 3, the max bitmask is 0111 + if freq_bitmask >= (1 << freq.num_supported): + printErrLog(device, 'Invalid clock frequency %s' % hex(freq_bitmask)) + RETCODE = 1 + return + ret = rocmsmi.rsmi_dev_gpu_clk_freq_set(device, rsmi_clk_names_dict[clktype], freq_bitmask) if rsmi_ret_ok(ret, device): printLog(device, 'Successfully set %s bitmask to' % (clktype), hex(freq_bitmask)) @@ -1083,6 +1096,19 @@ def setClocks(deviceList, clktype, clk): printErrLog(device, 'Unable to set %s bitmask to: %s' % (clktype, hex(freq_bitmask))) RETCODE = 1 else: + # Validate the bandwidth bitmask + bw = rsmi_pcie_bandwidth_t() + ret = rocmsmi.rsmi_dev_pci_bandwidth_get(device, byref(bw)) + if rsmi_ret_ok(ret, device, 'PCIe') == False: + RETCODE = 1 + return + # The freq_bitmask should be less than 2^(bw.transfer_rate.num_supported) + # For example, num_supported == 3, the max bitmask is 0111 + if freq_bitmask >= (1 << bw.transfer_rate.num_supported): + printErrLog(device, 'Invalid PCIe frequency %s' % hex(freq_bitmask)) + RETCODE = 1 + return + ret = rocmsmi.rsmi_dev_pci_bandwidth_set(device, freq_bitmask) if rsmi_ret_ok(ret, device): printLog(device, 'Successfully set %s to level bitmask' % (clktype), hex(freq_bitmask))