Make rocm_smi.py handle disappearing PIDs
rocm_smi.py had an issue where it gets process information
in 2 different places. If the process disappears in between
those 2 places, a crash would occur.
This fix gracefully returns in this scenario.
Reading the file information from /proc instead of using
the python subProcess() call was considered, but it has the
drawback of not being able to read the process names of
processes not owned by the caller.
Change-Id: If812c4641f00da37e99defb0740f670107c8a797
[ROCm/amdsmi commit: db6d8d36ea]
Этот коммит содержится в:
@@ -250,7 +250,7 @@ def getMemInfo(device, memType):
|
||||
return (memUsed, memTotal)
|
||||
|
||||
|
||||
def getName(pid):
|
||||
def getProcessName(pid):
|
||||
""" Get the process name of a specific pid
|
||||
|
||||
@param pid: Process ID of a program to be parsed
|
||||
@@ -258,14 +258,21 @@ def getName(pid):
|
||||
if int(pid) < 1:
|
||||
logging.debug('PID must be greater than 0')
|
||||
return 'UNKNOWN'
|
||||
pName = str(subprocess.check_output("ps -p %d -o comm=" % (int(pid)), shell=True))
|
||||
try:
|
||||
pName = str(subprocess.check_output("ps -p %d -o comm=" % (int(pid)), shell=True))
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(e.output)
|
||||
pName = 'UNKNOWN'
|
||||
|
||||
if pName == None:
|
||||
pName = 'UNKNOWN'
|
||||
|
||||
# Remove the substrings surrounding from process name (b' and \n')
|
||||
if str(pName).startswith('b\''):
|
||||
pName = pName[2:]
|
||||
if str(pName).endswith('\\n\''):
|
||||
pName = pName[:-3]
|
||||
else:
|
||||
pName = 'UNKNOWN'
|
||||
|
||||
return pName
|
||||
|
||||
|
||||
@@ -1664,7 +1671,7 @@ def showPids():
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
else:
|
||||
logging.debug('Unable to fetch process info by PID')
|
||||
dataArray.append([pid, getName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
|
||||
dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
|
||||
printLog(None, 'KFD process information:', None)
|
||||
print2DArray(dataArray)
|
||||
printLogSpacer()
|
||||
|
||||
Ссылка в новой задаче
Block a user