From 23d7d4a5ffa0fcbe1436c298feae6b69fead32e9 Mon Sep 17 00:00:00 2001 From: Elena Date: Thu, 15 Apr 2021 22:56:28 -0400 Subject: [PATCH] [rocm_smi.py] Coarse Grain Utilization Counters --showuse --showmemuse ==================================== ========= % time GPU is busy ======= GPU[0] : GPU use (%): 0 GPU[0] : GFX Activity: 0 ==================================== Change-Id: I9db115ad78b394469206b22d195781a430b2f1d8 [ROCm/amdsmi commit: 771b4af95c8ab25576091ee133bf40883d2c5553] --- projects/amdsmi/python_smi_tools/rocm_smi.py | 43 +++++++++++++++++++ .../amdsmi/python_smi_tools/rsmiBindings.py | 12 ++++++ 2 files changed, 55 insertions(+) diff --git a/projects/amdsmi/python_smi_tools/rocm_smi.py b/projects/amdsmi/python_smi_tools/rocm_smi.py index 37fbc081fb..cbc639cb96 100755 --- a/projects/amdsmi/python_smi_tools/rocm_smi.py +++ b/projects/amdsmi/python_smi_tools/rocm_smi.py @@ -1499,6 +1499,41 @@ def showGpusByPid(pidList): print(None, 'Unable to get list of KFD PIDs. A kernel update may be needed', None) printLogSpacer() +def getCoarseGrainUtil(device, typeName = None): + """ Find Coarse Grain Utilization + If typeName is not given, will return array with of all available sensors, + where sensor type and value could be addressed like this: + + for ut_counter in utilization_counters: + printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val) + @param device: DRM device identifier + @param typeName: 'GFX Activity', 'Memory Activity' + """ + timestamp = c_uint64(0) + + if typeName != None: + + try: + i = utilization_counter_name.index(typeName) + length = 1 + utilization_counters = (rsmi_utilization_counter_t * length)() + utilization_counters[0].type = c_int(i) + except ValueError: + printLog(None,"No such coarse grain counter type") + return -1 + + else: + length = rsmi_utilization_counter_type.RSMI_UTILIZATION_COUNTER_LAST + 1 + utilization_counters = (rsmi_utilization_counter_t * length)() + # populate array with all existing types to query + for i in range(0, length): + utilization_counters[i].type = c_int(i) + + ret = rocmsmi.rsmi_utilization_count_get(device, utilization_counters, length, byref(timestamp)) + if rsmi_ret_ok(ret, device): + return utilization_counters + return -1 + def showGpuUse(deviceList): """ Display GPU use for a list of devices @@ -1511,6 +1546,10 @@ def showGpuUse(deviceList): printLog(device, 'GPU use (%)', getGpuUse(device)) else: printLog(device, 'GPU use Unsupported', None) + util_counters = getCoarseGrainUtil(device, "GFX Activity") + if util_counters != -1: + for ut_counter in util_counters: + printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val) printLogSpacer() @@ -1577,6 +1616,10 @@ def showMemUse(deviceList): ret = rocmsmi.rsmi_dev_memory_busy_percent_get(device, byref(memoryUse)) if rsmi_ret_ok(ret, device, '% memory use'): printLog(device, 'GPU memory use (%)', memoryUse.value) + util_counters = getCoarseGrainUtil(device, "Memory Activity") + if util_counters != -1: + for ut_counter in util_counters: + printLog(device, utilization_counter_name[ut_counter.type], ut_counter.val) printLogSpacer() diff --git a/projects/amdsmi/python_smi_tools/rsmiBindings.py b/projects/amdsmi/python_smi_tools/rsmiBindings.py index 09bfa6eb10..616b0bd960 100644 --- a/projects/amdsmi/python_smi_tools/rsmiBindings.py +++ b/projects/amdsmi/python_smi_tools/rsmiBindings.py @@ -384,6 +384,18 @@ fw_block_names_l = ['ASD', 'CE', 'DMCU', 'MC', 'ME', 'MEC', 'MEC2', 'PFP',\ rsmi_bit_field_t = c_uint64() rsmi_bit_field = rsmi_bit_field_t +class rsmi_utilization_counter_type(c_int): + RSMI_UTILIZATION_COUNTER_FIRST = 0 + RSMI_COARSE_GRAIN_GFX_ACTIVITY = RSMI_UTILIZATION_COUNTER_FIRST + RSMI_COARSE_GRAIN_MEM_ACTIVITY = 1 + RSMI_UTILIZATION_COUNTER_LAST = RSMI_COARSE_GRAIN_MEM_ACTIVITY + +utilization_counter_name = ['GFX Activity', 'Memory Activity'] + +class rsmi_utilization_counter_t(Structure): + _fields_ = [('type', c_int), + ('val', c_uint64)] + class rsmi_xgmi_status_t(c_int): RSMI_XGMI_STATUS_NO_ERRORS = 0