[rocm_smi.py]: shownodesbw fix for non xgmi

Improve error output for non-xgmi nodes bandwidth

signed-off-by: Elena Sakhnovitch
Change-Id: I833970d3200a75c7639d33bf19e0e83afe176c8d


[ROCm/amdsmi commit: 44ea49eb01]
Этот коммит содержится в:
Elena Sakhnovitch
2022-05-19 15:26:21 -04:00
коммит произвёл Elena Sakhnovitch
родитель 23b3bcc038
Коммит ccf3ac2b15
+16 -3
Просмотреть файл
@@ -2548,13 +2548,25 @@ def showNodesBw(deviceList):
devices_ind = range(len(deviceList))
minBW = c_uint32()
maxBW = c_uint32()
hops = c_uint64()
linktype = c_uint64()
silent = False
nonXgmi = False
gpu_links_type = [[0 for x in devices_ind] for y in devices_ind]
printLogSpacer(' Bandwidth ')
for srcdevice in deviceList:
for destdevice in deviceList:
if srcdevice != destdevice:
ret = rocmsmi.rsmi_minmax_bandwidth_get(srcdevice, destdevice, byref(minBW), byref(maxBW))
if rsmi_ret_ok(ret, " {} to {}".format(srcdevice, destdevice),None ):
#verify that link type is xgmi
ret2 = rocmsmi.rsmi_topo_get_link_type(srcdevice, destdevice, byref(hops), byref(linktype))
if rsmi_ret_ok(ret2," {} to {}".format(srcdevice, destdevice), None, True):
if linktype.value != 2:
nonXgmi = True
silent= True
gpu_links_type[srcdevice][destdevice] = "N/A"
if rsmi_ret_ok(ret, " {} to {}".format(srcdevice, destdevice),None,silent):
gpu_links_type[srcdevice][destdevice] = "{}-{}".format(minBW.value, maxBW.value)
else:
gpu_links_type[srcdevice][destdevice] = "N/A"
@@ -2573,8 +2585,9 @@ def showNodesBw(deviceList):
printTableRow('%-12s', gpu_links_type[gpu1][gpu2])
printEmptyLine()
printLog(None,"Format: min-max; Units: mps", None)
printLog(None,'"0-0" min-max bandwidth indicates devices are not connected dirrectly', None)
printLog(None,'"0-0" min-max bandwidth indicates devices are not connected directly', None)
if nonXgmi:
printLog(None,"Non-xGMI links detected and is currently not supported", None)
def checkAmdGpus(deviceList):
""" Check if there are any AMD GPUs being queried,