From 30f9b2ac2f5f11f4af1ab604385194e2163618f9 Mon Sep 17 00:00:00 2001
From: Li Ma
Date: Mon, 16 Dec 2024 09:00:28 +0800
Subject: [PATCH] SWDEV-475244 - Memory Usage and Bandwidth: max mem and
current mem
Implemented max memory bandwith and current memory bandwidth. Added two
new field ids: RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH
Signed-off-by: Li Ma
Change-Id: I453e49937a84777146575f4f5bdd69fd4fe53bfc
---
common/rdc_field.data | 2 ++
include/rdc/rdc.h | 2 ++
python_binding/RdcReader.py | 2 ++
python_binding/rdc_bootstrap.py | 2 ++
rdc_libs/rdc/src/RdcMetricFetcherImpl.cc | 26 ++++++++++++++++++++++++
rdc_libs/rdc/src/RdcSmiLib.cc | 1 +
6 files changed, 35 insertions(+)
diff --git a/common/rdc_field.data b/common/rdc_field.data
index 3fbb0b5710..985131272c 100644
--- a/common/rdc_field.data
+++ b/common/rdc_field.data
@@ -52,6 +52,8 @@ FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance",
FLD_DESC_ENT(RDC_FI_GPU_MM_ENC_UTIL, "Mutilmedia encoder busy percentage", "GPU_MM_ENC_UTIL", true)
FLD_DESC_ENT(RDC_FI_GPU_MM_DEC_UTIL, "Mutilmedia decoder busy percentage", "GPU_MM_DEC_UTIL", true)
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_ACTIVITY, "Memory busy percentage", "GPU_MEM_UTIL", true)
+FLD_DESC_ENT(RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, "Memory max bandwidth", "GPU_MEM_MAX_BANDWIDTH", true)
+FLD_DESC_ENT(RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, "Memory current bandwidth", "GPU_MEM_CUR_BANDWIDTH", true)
FLD_DESC_ENT(RDC_FI_GPU_PAGE_RETRIED, "Retried page of the GPU instance", "GPU_PAGE_RETRIED", true)
diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h
index 237a2535e5..f3c044c957 100644
--- a/include/rdc/rdc.h
+++ b/include/rdc/rdc.h
@@ -188,6 +188,8 @@ typedef enum {
RDC_FI_GPU_MM_ENC_UTIL, //!< Multimedia encoder busy percentage
RDC_FI_GPU_MM_DEC_UTIL, //!< Multimedia decoder busy percentage
RDC_FI_GPU_MEMORY_ACTIVITY, //!< Memory busy percentage
+ RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //status = amdsmi_get_gpu_vram_info(processor_handle, &vram_info);
+ value->type = INTEGER;
+ if (value->status == AMDSMI_STATUS_SUCCESS) {
+ value->value.l_int = vram_info.vram_max_bandwidth;
+ }
+ break;
+ }
+ case RDC_FI_GPU_MEMORY_CUR_BANDWIDTH: {
+ amdsmi_engine_usage_t engine_usage;
+ amdsmi_vram_info_t vram_info;
+
+ value->status = amdsmi_get_gpu_activity(processor_handle, &engine_usage);
+ value->type = INTEGER;
+ if (value->status == AMDSMI_STATUS_SUCCESS) {
+ value->value.l_int = static_cast(engine_usage.umc_activity);
+ }
+
+ value->status = amdsmi_get_gpu_vram_info(processor_handle, &vram_info);
+ if (value->status == AMDSMI_STATUS_SUCCESS) {
+ value->value.l_int = value->value.l_int / 100 * vram_info.vram_max_bandwidth;
+ }
+ break;
+ }
case RDC_FI_GPU_COUNT: {
uint32_t processor_count = 0;
// amdsmi is initialized in AMDSMI_INIT_AMD_GPUS mode -> returned sockets are GPUs
diff --git a/rdc_libs/rdc/src/RdcSmiLib.cc b/rdc_libs/rdc/src/RdcSmiLib.cc
index 520efbee6b..91d9a4931f 100644
--- a/rdc_libs/rdc/src/RdcSmiLib.cc
+++ b/rdc_libs/rdc/src/RdcSmiLib.cc
@@ -183,6 +183,7 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI
RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM,
RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_UNCORRECTABLE_PAGE_LIMIT,
RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME,
+ RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH,
};
std::copy(fields.begin(), fields.end(), field_ids);
*field_count = fields.size();