SWDEV-518214: GPU Metrics 1.8 (#31)
* SWDEV-518214: GPU Metrics 1.8 (#31)
- Updates:
- Adding the following metrics to allow new calculations for violation status:
- Per XCP metrics gfx_below_host_limit_ppt_acc
- Per XCP metrics gfx_below_host_limit_thm_acc
- Per XCP metrics gfx_low_utilization_acc
- Per XCP metrics gfx_below_host_limit_total_acc
- Increasing available JPEG engines to 40. Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI.
Signed-off-by: Juan Castillo <juan.castillo@amd.com>
Co-authored-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/rocm_smi_lib commit: f69e65f7bd]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
6075f89576
Коммит
3aa80ec0e4
@@ -3795,9 +3795,41 @@ def showGPUMetrics(deviceList):
|
||||
},
|
||||
"xcp_stats.gfx_busy_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_ppt_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_thm_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_low_utilization_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_total_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_ppt_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_thm_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
"xcp_stats.gfx_low_utilization_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_total_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
@@ -3841,6 +3873,30 @@ def showGPUMetrics(deviceList):
|
||||
for _, val in enumerate(item.gfx_below_host_limit_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_below_host_limit_ppt_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_below_host_limit_ppt_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_below_host_limit_thm_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_below_host_limit_thm_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_low_utilization_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_low_utilization_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_below_host_limit_total_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_below_host_limit_total_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
|
||||
if int(device) < (len(deviceList) - 1):
|
||||
printLogSpacer()
|
||||
|
||||
@@ -662,10 +662,14 @@ class amdgpu_xcp_metrics_t(Structure):
|
||||
# amdgpu_xcp_metrics_t._pack_ = 1 # source:False
|
||||
amdgpu_xcp_metrics_t._fields_ = [
|
||||
('gfx_busy_inst', c_uint32 * 8),
|
||||
('jpeg_busy', c_uint16 * 32),
|
||||
('jpeg_busy', c_uint16 * 40),
|
||||
('vcn_busy', c_uint16 * 4),
|
||||
('gfx_busy_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_ppt_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_thm_acc', c_uint64 * 8),
|
||||
('gfx_low_utilization_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_total_acc', c_uint64 * 8),
|
||||
]
|
||||
xcp_stats_t = amdgpu_xcp_metrics_t
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user