Fix [Not supported] status for get_compute_process_info_by_pid
On some systems [rocm-smi --showpids] reports get_compute_process_info_by_pid, Not supported on the given system [PID] [PROCESS NAME] 1 UNKNOWN UNKNOWN UNKNOWN get_compute_process_info_by_pid fails because cu_occupancy debugfs method is not provided on some graphics cards and GFX revisions by design Proposing a change to return success status when only cu_occupancy debugfs method is not found and provide cu_occupancy invalidation value to mark only this parameter as UNKNOWN Change-Id: Iae37070d9bd19483b4e6c8ee24c7d9a4c92f00d7 Signed-off-by: Vladimir Stempen <Vladimir.Stempen@amd.com> Reviewed-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
This commit is contained in:
committed by
Dmitrii Galantsev
szülő
de9eaaac8c
commit
677433b367
@@ -1134,6 +1134,8 @@ typedef struct {
|
||||
uint32_t cu_occupancy; //!< Compute Unit usage in percent
|
||||
} rsmi_process_info_t;
|
||||
|
||||
//! CU occupancy invalidation value for the GFX revisions not providing cu_occupancy debugfs method
|
||||
#define CU_OCCUPANCY_INVALID 0xFFFFFFFF
|
||||
|
||||
/**
|
||||
* @brief Opaque handle to function-support object
|
||||
|
||||
@@ -2476,6 +2476,7 @@ def showPids(verbose):
|
||||
vramUsage = 'UNKNOWN'
|
||||
sdmaUsage = 'UNKNOWN'
|
||||
cuOccupancy = 'UNKNOWN'
|
||||
cuOccupancyInvalid = 0xFFFFFFFF
|
||||
dv_indices = (c_uint32 * num_devices.value)()
|
||||
ret = rocmsmi.rsmi_compute_process_gpus_get(int(pid), None, byref(num_devices))
|
||||
if rsmi_ret_ok(ret, metric='get_gpu_compute_process'):
|
||||
@@ -2491,7 +2492,8 @@ def showPids(verbose):
|
||||
if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'):
|
||||
vramUsage = proc.vram_usage
|
||||
sdmaUsage = proc.sdma_usage
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
if proc.cu_occupancy != cuOccupancyInvalid:
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
else:
|
||||
logging.debug('Unable to fetch process info by PID')
|
||||
dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
|
||||
@@ -2500,7 +2502,8 @@ def showPids(verbose):
|
||||
if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'):
|
||||
vramUsage = proc.vram_usage
|
||||
sdmaUsage = proc.sdma_usage
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
if proc.cu_occupancy != cuOccupancyInvalid:
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
else:
|
||||
logging.debug('Unable to fetch process info by PID')
|
||||
dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
|
||||
|
||||
@@ -507,7 +507,9 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc,
|
||||
// Collect count of compute units
|
||||
cu_count += kfd_node_map[gpu_id]->cu_count();
|
||||
} else {
|
||||
return err;
|
||||
//Some GFX revisions do not provide cu_occupancy debugfs method
|
||||
proc->cu_occupancy = CU_OCCUPANCY_INVALID;
|
||||
cu_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user