Display min and max bandwidth between gpu nodes

Signed-off-by: Elena Sakhnovitch
Change-Id: I7289fb83f80e2f899996b7d7560ece670cc5f31f


[ROCm/rocm_smi_lib commit: 13cde8429d]
Этот коммит содержится в:
Elena Sakhnovitch
2021-10-26 18:39:23 -04:00
родитель f0a86d3d29
Коммит 8a5effb2e1
+40
Просмотреть файл
@@ -2485,6 +2485,43 @@ def showHwTopology(deviceList):
showNumaTopology(deviceList)
def showNodesBw(deviceList):
""" Display max and min bandwidth between nodes.
Currently supports XGMI only.
This reads the HW Topology file and displays the matrix for the nodes
@param deviceList: List of DRM devices (can be a single-item list)
"""
devices_ind = range(len(deviceList))
minBW = c_uint32()
maxBW = c_uint32()
gpu_links_type = [[0 for x in devices_ind] for y in devices_ind]
printLogSpacer(' Bandwidth ')
for srcdevice in deviceList:
for destdevice in deviceList:
if srcdevice != destdevice:
ret = rocmsmi.rsmi_minmax_bandwidth_get(srcdevice, destdevice, byref(minBW), byref(maxBW))
if rsmi_ret_ok(ret, " {} to {}".format(srcdevice, destdevice),None ):
gpu_links_type[srcdevice][destdevice] = "{}-{}".format(minBW.value, maxBW.value)
else:
gpu_links_type[srcdevice][destdevice] = "N/A"
if PRINT_JSON:
formatMatrixToJSON(deviceList, "{}-{}".format(minBW.value, maxBW.value), " min-max bandwidth between DRM devices {} and {}".format(srcdevice, destdevice))
return
printTableRow(None, ' ')
for row in deviceList:
tmp = 'GPU%d' % row
printTableRow('%-12s', tmp)
printEmptyLine()
for gpu1 in deviceList:
tmp = 'GPU%d' % gpu1
printTableRow('%-6s', tmp)
for gpu2 in deviceList:
printTableRow('%-12s', gpu_links_type[gpu1][gpu2])
printEmptyLine()
printLog(None,"Format: min-max; Units: mps", None)
printLog(None,'"0-0" min-max bandwidth indicates devices are not connected dirrectly', None)
def checkAmdGpus(deviceList):
""" Check if there are any AMD GPUs being queried,
return False if there are none
@@ -2828,6 +2865,7 @@ if __name__ == '__main__':
groupDisplay.add_argument('--showtoponuma', help='Shows the numa nodes ', action='store_true')
groupDisplay.add_argument('--showenergycounter', help='Energy accumulator that stores amount of energy consumed',
action='store_true')
groupDisplay.add_argument('--shownodesbw', help='Shows the numa nodes ', action='store_true')
groupActionReset.add_argument('-r', '--resetclocks', help='Reset clocks and OverDrive to default',
action='store_true')
@@ -3065,6 +3103,8 @@ if __name__ == '__main__':
showProductName(deviceList)
if args.showxgmierr:
showXgmiErr(deviceList)
if args.shownodesbw:
showNodesBw(deviceList)
if args.showtopo:
showHwTopology(deviceList)
if args.showtopoaccess: