diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 28f0bd795b..77710ba7b3 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -1134,6 +1134,8 @@ typedef struct { uint32_t cu_occupancy; //!< Compute Unit usage in percent } rsmi_process_info_t; +//! CU occupancy invalidation value for the GFX revisions not providing cu_occupancy debugfs method +#define CU_OCCUPANCY_INVALID 0xFFFFFFFF /** * @brief Opaque handle to function-support object diff --git a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py index e910b0bc30..f6897897d8 100755 --- a/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py +++ b/projects/rocm-smi-lib/python_smi_tools/rocm_smi.py @@ -2476,6 +2476,7 @@ def showPids(verbose): vramUsage = 'UNKNOWN' sdmaUsage = 'UNKNOWN' cuOccupancy = 'UNKNOWN' + cuOccupancyInvalid = 0xFFFFFFFF dv_indices = (c_uint32 * num_devices.value)() ret = rocmsmi.rsmi_compute_process_gpus_get(int(pid), None, byref(num_devices)) if rsmi_ret_ok(ret, metric='get_gpu_compute_process'): @@ -2491,7 +2492,8 @@ def showPids(verbose): if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'): vramUsage = proc.vram_usage sdmaUsage = proc.sdma_usage - cuOccupancy = proc.cu_occupancy + if proc.cu_occupancy != cuOccupancyInvalid: + cuOccupancy = proc.cu_occupancy else: logging.debug('Unable to fetch process info by PID') dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)]) @@ -2500,7 +2502,8 @@ def showPids(verbose): if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'): vramUsage = proc.vram_usage sdmaUsage = proc.sdma_usage - cuOccupancy = proc.cu_occupancy + if proc.cu_occupancy != cuOccupancyInvalid: + cuOccupancy = proc.cu_occupancy else: logging.debug('Unable to fetch process info by PID') dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)]) diff --git a/projects/rocm-smi-lib/src/rocm_smi_kfd.cc b/projects/rocm-smi-lib/src/rocm_smi_kfd.cc index 3109781e39..a4eaf43137 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_kfd.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_kfd.cc @@ -507,7 +507,9 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc, // Collect count of compute units cu_count += kfd_node_map[gpu_id]->cu_count(); } else { - return err; + //Some GFX revisions do not provide cu_occupancy debugfs method + proc->cu_occupancy = CU_OCCUPANCY_INVALID; + cu_count = 0; } }