From bc13dfe3c8d894e553dec7803d6160e304cef4ca Mon Sep 17 00:00:00 2001 From: "Galantsev, Dmitrii" Date: Tue, 14 Jan 2025 17:15:18 -0600 Subject: [PATCH] [SWDEV-495169] Update ROCm SMI CLI and Error handling (#3) Issues include: Update ROCm SMI displaying None or Not Supported to N/A Update ROCm SMI displaying err msg to instead log err Signed-off-by: Juan Castillo juan.castillo@amd.com Change-Id: I1a2ce6e4f329666b5666664a7d7b4475d6c1cbc7 [ROCm/rocm_smi_lib commit: 55ee3cc4428027f22506d977f1e05a9f9e8eb1f1] --- projects/rocm-smi-lib/python_smi_tools/rocm_smi.py | 12 +++++++++--- projects/rocm-smi-lib/src/rocm_smi.cc | 10 ++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py index e8408fac3d..c9ca318e5d 100755 --- a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py +++ b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py @@ -1981,22 +1981,28 @@ def showAllConcise(deviceList): + getComputePartition(device, silent) + ", " + getPartitionId(device, silent)) sclk = showCurrentClocks([device], 'sclk', concise=silent) + if not sclk: + sclk = 'N/A' mclk = showCurrentClocks([device], 'mclk', concise=silent) + if not mclk: + mclk = 'N/A' (retCode, fanLevel, fanSpeed) = getFanSpeed(device, silent) fan = str(fanSpeed) + '%' if getPerfLevel(device, silent) != -1: perf = getPerfLevel(device, silent) else: - perf = 'Unsupported' + perf = 'N/A' if getMaxPower(device, silent) != -1: pwrCap = str(getMaxPower(device, silent)) + 'W' else: - pwrCap = 'Unsupported' + pwrCap = 'N/A' if getGpuUse(device, silent) != -1: gpu_busy = str(getGpuUse(device, silent)) + '%' else: - gpu_busy = 'Unsupported' + gpu_busy = 'N/A' allocated_mem_percent = getAllocatedMemoryPercent(device) + if allocated_mem_percent['ret'] != rsmi_status_t.RSMI_STATUS_SUCCESS: + allocated_mem_percent['combined'] = 'N/A' # Top Row - per device data values['card%s' % (str(device))] = [device, getNodeId(device), diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 6f355a576d..63c90d464f 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -368,8 +368,9 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type, } if (!amd::smi::IsInteger(val_str)) { - std::cerr << "Expected integer value from monitor," - " but got \"" << val_str << "\"" << std::endl; + std::ostringstream ss; + ss << "Expected integer value from monitor, but got \"" << val_str << "\""; + LOG_ERROR(ss); return RSMI_STATUS_UNEXPECTED_DATA; } @@ -397,8 +398,9 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type, } if (!amd::smi::IsInteger(val_str)) { - std::cerr << "Expected integer value from monitor," - " but got \"" << val_str << "\"" << std::endl; + std::ostringstream ss; + ss << "Expected integer value from monitor, but got \"" << val_str << "\""; + LOG_ERROR(ss); return RSMI_STATUS_UNEXPECTED_DATA; }