diff --git a/common/rdc_field.data b/common/rdc_field.data index 81ede2e440..1a2deb6c57 100644 --- a/common/rdc_field.data +++ b/common/rdc_field.data @@ -51,6 +51,7 @@ FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE, "Memory usage of the GPU instance in by FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL", true) FLD_DESC_ENT(RDC_FI_GPU_MM_ENC_UTIL, "Mutilmedia encoder busy percentage", "GPU_MM_ENC_UTIL", true) FLD_DESC_ENT(RDC_FI_GPU_MM_DEC_UTIL, "Mutilmedia decoder busy percentage", "GPU_MM_DEC_UTIL", true) +FLD_DESC_ENT(RDC_FI_GPU_MEMORY_ACTIVITY, "Memory busy percentage", "GPU_MEM_UTIL", true) // ECC totals FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated Single Error Correction", "ECC_CORRECT", true) diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index 7c681ee26c..28c969df18 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -187,6 +187,7 @@ typedef enum { RDC_FI_GPU_MEMORY_TOTAL, //!< Total memory of the GPU instance RDC_FI_GPU_MM_ENC_UTIL, //!< Multimedia encoder busy percentage RDC_FI_GPU_MM_DEC_UTIL, //!< Multimedia decoder busy percentage + RDC_FI_GPU_MEMORY_ACTIVITY, //!< Memory busy percentage /** * @brief ECC related fields diff --git a/python_binding/RdcReader.py b/python_binding/RdcReader.py index 4433a2efe7..5fc4754b40 100644 --- a/python_binding/RdcReader.py +++ b/python_binding/RdcReader.py @@ -7,6 +7,7 @@ default_field_ids = [ rdc_field_t.RDC_FI_GPU_MEMORY_TOTAL, rdc_field_t.RDC_FI_GPU_MM_ENC_UTIL, rdc_field_t.RDC_FI_GPU_MM_DEC_UTIL, + rdc_field_t.RDC_FI_GPU_MEMORY_ACTIVITY, rdc_field_t.RDC_FI_OAM_ID, rdc_field_t.RDC_FI_POWER_USAGE, rdc_field_t.RDC_FI_GPU_CLOCK, diff --git a/python_binding/rdc_bootstrap.py b/python_binding/rdc_bootstrap.py index cff29a084c..ee69240041 100644 --- a/python_binding/rdc_bootstrap.py +++ b/python_binding/rdc_bootstrap.py @@ -89,6 +89,7 @@ class rdc_field_t(c_int): RDC_FI_GPU_MEMORY_TOTAL = 502 RDC_FI_GPU_MM_ENC_UTIL = 503 RDC_FI_GPU_MM_DEC_UTIL = 504 + RDC_FI_GPU_MEMORY_ACTIVITY = 505 RDC_FI_ECC_CORRECT_TOTAL = 600 RDC_FI_ECC_UNCORRECT_TOTAL = 601 RDC_FI_ECC_SDMA_SEC = 602 diff --git a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc index 54329aacd3..6f1ea1620e 100644 --- a/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc +++ b/rdc_libs/rdc/src/RdcMetricFetcherImpl.cc @@ -564,6 +564,15 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field } break; } + case RDC_FI_GPU_MEMORY_ACTIVITY: { + amdsmi_engine_usage_t engine_usage; + value->status = amdsmi_get_gpu_activity(processor_handle, &engine_usage); + value->type = INTEGER; + if (value->status == AMDSMI_STATUS_SUCCESS) { + value->value.l_int = static_cast(engine_usage.umc_activity); + } + break; + } case RDC_FI_GPU_COUNT: { uint32_t processor_count = 0; // amdsmi is initialized in AMDSMI_INIT_AMD_GPUS mode -> returned sockets are GPUs diff --git a/rdc_libs/rdc/src/RdcSmiLib.cc b/rdc_libs/rdc/src/RdcSmiLib.cc index 2ea99795d5..9ddb79787c 100644 --- a/rdc_libs/rdc/src/RdcSmiLib.cc +++ b/rdc_libs/rdc/src/RdcSmiLib.cc @@ -178,7 +178,7 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, RDC_EVNT_XGMI_0_THRPUT, RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT, RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT, RDC_FI_OAM_ID, - RDC_FI_GPU_MM_ENC_UTIL, RDC_FI_GPU_MM_DEC_UTIL, + RDC_FI_GPU_MM_ENC_UTIL, RDC_FI_GPU_MM_DEC_UTIL, RDC_FI_GPU_MEMORY_ACTIVITY, }; std::copy(fields.begin(), fields.end(), field_ids); *field_count = fields.size();