Fix [Not supported] status for get_compute_process_info_by_pid
On some systems [rocm-smi --showpids] reports
get_compute_process_info_by_pid, Not supported on the given system
[PID] [PROCESS NAME] 1 UNKNOWN UNKNOWN UNKNOWN
get_compute_process_info_by_pid fails because cu_occupancy debugfs method
is not provided on some graphics cards and GFX revisions by design
Proposing a change to return success status when only cu_occupancy debugfs method
is not found and provide cu_occupancy invalidation value to mark only
this parameter as UNKNOWN
Change-Id: Iae37070d9bd19483b4e6c8ee24c7d9a4c92f00d7
Signed-off-by: Vladimir Stempen <Vladimir.Stempen@amd.com>
Reviewed-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
[ROCm/rocm_smi_lib commit: 677433b367]
This commit is contained in:
committed by
Dmitrii Galantsev
orang tua
3564c1a430
melakukan
dc98babe34
@@ -1134,6 +1134,8 @@ typedef struct {
|
||||
uint32_t cu_occupancy; //!< Compute Unit usage in percent
|
||||
} rsmi_process_info_t;
|
||||
|
||||
//! CU occupancy invalidation value for the GFX revisions not providing cu_occupancy debugfs method
|
||||
#define CU_OCCUPANCY_INVALID 0xFFFFFFFF
|
||||
|
||||
/**
|
||||
* @brief Opaque handle to function-support object
|
||||
|
||||
@@ -2476,6 +2476,7 @@ def showPids(verbose):
|
||||
vramUsage = 'UNKNOWN'
|
||||
sdmaUsage = 'UNKNOWN'
|
||||
cuOccupancy = 'UNKNOWN'
|
||||
cuOccupancyInvalid = 0xFFFFFFFF
|
||||
dv_indices = (c_uint32 * num_devices.value)()
|
||||
ret = rocmsmi.rsmi_compute_process_gpus_get(int(pid), None, byref(num_devices))
|
||||
if rsmi_ret_ok(ret, metric='get_gpu_compute_process'):
|
||||
@@ -2491,7 +2492,8 @@ def showPids(verbose):
|
||||
if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'):
|
||||
vramUsage = proc.vram_usage
|
||||
sdmaUsage = proc.sdma_usage
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
if proc.cu_occupancy != cuOccupancyInvalid:
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
else:
|
||||
logging.debug('Unable to fetch process info by PID')
|
||||
dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
|
||||
@@ -2500,7 +2502,8 @@ def showPids(verbose):
|
||||
if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'):
|
||||
vramUsage = proc.vram_usage
|
||||
sdmaUsage = proc.sdma_usage
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
if proc.cu_occupancy != cuOccupancyInvalid:
|
||||
cuOccupancy = proc.cu_occupancy
|
||||
else:
|
||||
logging.debug('Unable to fetch process info by PID')
|
||||
dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)])
|
||||
|
||||
@@ -507,7 +507,9 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc,
|
||||
// Collect count of compute units
|
||||
cu_count += kfd_node_map[gpu_id]->cu_count();
|
||||
} else {
|
||||
return err;
|
||||
//Some GFX revisions do not provide cu_occupancy debugfs method
|
||||
proc->cu_occupancy = CU_OCCUPANCY_INVALID;
|
||||
cu_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user