From 30f9b2ac2f5f11f4af1ab604385194e2163618f9 Mon Sep 17 00:00:00 2001 From: Li Ma Date: Mon, 16 Dec 2024 09:00:28 +0800 Subject: [PATCH] SWDEV-475244 - Memory Usage and Bandwidth: max mem and current mem Implemented max memory bandwith and current memory bandwidth. Added two new field ids: RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH Signed-off-by: Li Ma Change-Id: I453e49937a84777146575f4f5bdd69fd4fe53bfc --- common/rdc_field.data | 2 ++ include/rdc/rdc.h | 2 ++ python_binding/RdcReader.py | 2 ++ python_binding/rdc_bootstrap.py | 2 ++ rdc_libs/rdc/src/RdcMetricFetcherImpl.cc | 26 ++++++++++++++++++++++++ rdc_libs/rdc/src/RdcSmiLib.cc | 1 + 6 files changed, 35 insertions(+) diff --git a/common/rdc_field.data b/common/rdc_field.data index 3fbb0b5710..985131272c 100644 --- a/common/rdc_field.data +++ b/common/rdc_field.data @@ -52,6 +52,8 @@ FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", FLD_DESC_ENT(RDC_FI_GPU_MM_ENC_UTIL, "Mutilmedia encoder busy percentage", "GPU_MM_ENC_UTIL", true) FLD_DESC_ENT(RDC_FI_GPU_MM_DEC_UTIL, "Mutilmedia decoder busy percentage", "GPU_MM_DEC_UTIL", true) FLD_DESC_ENT(RDC_FI_GPU_MEMORY_ACTIVITY, "Memory busy percentage", "GPU_MEM_UTIL", true) +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, "Memory max bandwidth", "GPU_MEM_MAX_BANDWIDTH", true) +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, "Memory current bandwidth", "GPU_MEM_CUR_BANDWIDTH", true) FLD_DESC_ENT(RDC_FI_GPU_PAGE_RETRIED, "Retried page of the GPU instance", "GPU_PAGE_RETRIED", true) diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index 237a2535e5..f3c044c957 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -188,6 +188,8 @@ typedef enum { RDC_FI_GPU_MM_ENC_UTIL, //!< Multimedia encoder busy percentage RDC_FI_GPU_MM_DEC_UTIL, //!< Multimedia decoder busy percentage RDC_FI_GPU_MEMORY_ACTIVITY, //!< Memory busy percentage + RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //status = amdsmi_get_gpu_vram_info(processor_handle, &vram_info); + value->type = INTEGER; + if (value->status == AMDSMI_STATUS_SUCCESS) { + value->value.l_int = vram_info.vram_max_bandwidth; + } + break; + } + case RDC_FI_GPU_MEMORY_CUR_BANDWIDTH: { + amdsmi_engine_usage_t engine_usage; + amdsmi_vram_info_t vram_info; + + value->status = amdsmi_get_gpu_activity(processor_handle, &engine_usage); + value->type = INTEGER; + if (value->status == AMDSMI_STATUS_SUCCESS) { + value->value.l_int = static_cast(engine_usage.umc_activity); + } + + value->status = amdsmi_get_gpu_vram_info(processor_handle, &vram_info); + if (value->status == AMDSMI_STATUS_SUCCESS) { + value->value.l_int = value->value.l_int / 100 * vram_info.vram_max_bandwidth; + } + break; + } case RDC_FI_GPU_COUNT: { uint32_t processor_count = 0; // amdsmi is initialized in AMDSMI_INIT_AMD_GPUS mode -> returned sockets are GPUs diff --git a/rdc_libs/rdc/src/RdcSmiLib.cc b/rdc_libs/rdc/src/RdcSmiLib.cc index 520efbee6b..91d9a4931f 100644 --- a/rdc_libs/rdc/src/RdcSmiLib.cc +++ b/rdc_libs/rdc/src/RdcSmiLib.cc @@ -183,6 +183,7 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM, RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_UNCORRECTABLE_PAGE_LIMIT, RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME, + RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, }; std::copy(fields.begin(), fields.end(), field_ids); *field_count = fields.size();