[rocm_smi.py] Coarse Grain Utilization Counters

--showuse
--showmemuse

====================================
========= % time GPU is busy =======
GPU[0]          : GPU use (%): 0
GPU[0]          : GFX Activity: 0
====================================

Change-Id: I9db115ad78b394469206b22d195781a430b2f1d8


[ROCm/amdsmi commit: 771b4af95c]
Этот коммит содержится в:
Elena
2021-04-15 22:56:28 -04:00
коммит произвёл Elena Sakhnovitch
родитель 608afb879b
Коммит 23d7d4a5ff
2 изменённых файлов: 55 добавлений и 0 удалений
+43
Просмотреть файл
@@ -1499,6 +1499,41 @@ def showGpusByPid(pidList):
print(None, 'Unable to get list of KFD PIDs. A kernel update may be needed', None)
printLogSpacer()
def getCoarseGrainUtil(device, typeName = None):
""" Find Coarse Grain Utilization
If typeName is not given, will return array with of all available sensors,
where sensor type and value could be addressed like this:
for ut_counter in utilization_counters:
printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val)
@param device: DRM device identifier
@param typeName: 'GFX Activity', 'Memory Activity'
"""
timestamp = c_uint64(0)
if typeName != None:
try:
i = utilization_counter_name.index(typeName)
length = 1
utilization_counters = (rsmi_utilization_counter_t * length)()
utilization_counters[0].type = c_int(i)
except ValueError:
printLog(None,"No such coarse grain counter type")
return -1
else:
length = rsmi_utilization_counter_type.RSMI_UTILIZATION_COUNTER_LAST + 1
utilization_counters = (rsmi_utilization_counter_t * length)()
# populate array with all existing types to query
for i in range(0, length):
utilization_counters[i].type = c_int(i)
ret = rocmsmi.rsmi_utilization_count_get(device, utilization_counters, length, byref(timestamp))
if rsmi_ret_ok(ret, device):
return utilization_counters
return -1
def showGpuUse(deviceList):
""" Display GPU use for a list of devices
@@ -1511,6 +1546,10 @@ def showGpuUse(deviceList):
printLog(device, 'GPU use (%)', getGpuUse(device))
else:
printLog(device, 'GPU use Unsupported', None)
util_counters = getCoarseGrainUtil(device, "GFX Activity")
if util_counters != -1:
for ut_counter in util_counters:
printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val)
printLogSpacer()
@@ -1577,6 +1616,10 @@ def showMemUse(deviceList):
ret = rocmsmi.rsmi_dev_memory_busy_percent_get(device, byref(memoryUse))
if rsmi_ret_ok(ret, device, '% memory use'):
printLog(device, 'GPU memory use (%)', memoryUse.value)
util_counters = getCoarseGrainUtil(device, "Memory Activity")
if util_counters != -1:
for ut_counter in util_counters:
printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val)
printLogSpacer()
+12
Просмотреть файл
@@ -384,6 +384,18 @@ fw_block_names_l = ['ASD', 'CE', 'DMCU', 'MC', 'ME', 'MEC', 'MEC2', 'PFP',\
rsmi_bit_field_t = c_uint64()
rsmi_bit_field = rsmi_bit_field_t
class rsmi_utilization_counter_type(c_int):
RSMI_UTILIZATION_COUNTER_FIRST = 0
RSMI_COARSE_GRAIN_GFX_ACTIVITY = RSMI_UTILIZATION_COUNTER_FIRST
RSMI_COARSE_GRAIN_MEM_ACTIVITY = 1
RSMI_UTILIZATION_COUNTER_LAST = RSMI_COARSE_GRAIN_MEM_ACTIVITY
utilization_counter_name = ['GFX Activity', 'Memory Activity']
class rsmi_utilization_counter_t(Structure):
_fields_ = [('type', c_int),
('val', c_uint64)]
class rsmi_xgmi_status_t(c_int):
RSMI_XGMI_STATUS_NO_ERRORS = 0