From 24b23e90a8ef90dff2479d7028389d0186d53595 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Thu, 10 Dec 2020 17:42:41 -0600 Subject: [PATCH] Make rocm_smi.py handle disappearing PIDs rocm_smi.py had an issue where it gets process information in 2 different places. If the process disappears in between those 2 places, a crash would occur. This fix gracefully returns in this scenario. Reading the file information from /proc instead of using the python subProcess() call was considered, but it has the drawback of not being able to read the process names of processes not owned by the caller. Change-Id: If812c4641f00da37e99defb0740f670107c8a797 [ROCm/amdsmi commit: db6d8d36eacf70e472cc7318190b6e021c9715fe] --- projects/amdsmi/python_smi_tools/rocm_smi.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/projects/amdsmi/python_smi_tools/rocm_smi.py b/projects/amdsmi/python_smi_tools/rocm_smi.py index 056cc8306a..1fdb0ba479 100755 --- a/projects/amdsmi/python_smi_tools/rocm_smi.py +++ b/projects/amdsmi/python_smi_tools/rocm_smi.py @@ -250,7 +250,7 @@ def getMemInfo(device, memType): return (memUsed, memTotal) -def getName(pid): +def getProcessName(pid): """ Get the process name of a specific pid @param pid: Process ID of a program to be parsed @@ -258,14 +258,21 @@ def getName(pid): if int(pid) < 1: logging.debug('PID must be greater than 0') return 'UNKNOWN' - pName = str(subprocess.check_output("ps -p %d -o comm=" % (int(pid)), shell=True)) + try: + pName = str(subprocess.check_output("ps -p %d -o comm=" % (int(pid)), shell=True)) + except subprocess.CalledProcessError as e: + print(e.output) + pName = 'UNKNOWN' + + if pName == None: + pName = 'UNKNOWN' + # Remove the substrings surrounding from process name (b' and \n') if str(pName).startswith('b\''): pName = pName[2:] if str(pName).endswith('\\n\''): pName = pName[:-3] - else: - pName = 'UNKNOWN' + return pName @@ -1664,7 +1671,7 @@ def showPids(): cuOccupancy = proc.cu_occupancy else: logging.debug('Unable to fetch process info by PID') - dataArray.append([pid, getName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)]) + dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)]) printLog(None, 'KFD process information:', None) print2DArray(dataArray) printLogSpacer()