SWDEV-475244 - Memory Usage and Bandwidth: max mem and current mem (#48)
[ROCm/rdc commit: 772481f952]
This commit is contained in:
@@ -52,6 +52,8 @@ FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance",
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MM_ENC_UTIL, "Mutilmedia encoder busy percentage", "GPU_MM_ENC_UTIL", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MM_DEC_UTIL, "Mutilmedia decoder busy percentage", "GPU_MM_DEC_UTIL", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_ACTIVITY, "Memory busy percentage", "GPU_MEM_UTIL", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, "Memory max bandwidth", "GPU_MEM_MAX_BANDWIDTH", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, "Memory current bandwidth", "GPU_MEM_CUR_BANDWIDTH", true)
|
||||
|
||||
FLD_DESC_ENT(RDC_FI_GPU_PAGE_RETRIED, "Retried page of the GPU instance", "GPU_PAGE_RETRIED", true)
|
||||
|
||||
|
||||
@@ -193,6 +193,8 @@ typedef enum {
|
||||
RDC_FI_GPU_MM_ENC_UTIL, //!< Multimedia encoder busy percentage
|
||||
RDC_FI_GPU_MM_DEC_UTIL, //!< Multimedia decoder busy percentage
|
||||
RDC_FI_GPU_MEMORY_ACTIVITY, //!< Memory busy percentage
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //<! The Memory max bandwidth at current memory clock in Mb/Second
|
||||
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, //<! The Memory current bandwidth in Mb/Second
|
||||
|
||||
/**
|
||||
* @brief GPU page related fields
|
||||
|
||||
@@ -8,6 +8,8 @@ default_field_ids = [
|
||||
rdc_field_t.RDC_FI_GPU_MM_ENC_UTIL,
|
||||
rdc_field_t.RDC_FI_GPU_MM_DEC_UTIL,
|
||||
rdc_field_t.RDC_FI_GPU_MEMORY_ACTIVITY,
|
||||
rdc_field_t.RDC_FI_GPU_MEMORY_MAX_BANDWIDTH,
|
||||
rdc_field_t.RDC_FI_GPU_MEMORY_CUR_BANDWIDTH,
|
||||
rdc_field_t.RDC_FI_OAM_ID,
|
||||
rdc_field_t.RDC_FI_POWER_USAGE,
|
||||
rdc_field_t.RDC_FI_GPU_CLOCK,
|
||||
|
||||
@@ -90,6 +90,8 @@ class rdc_field_t(c_int):
|
||||
RDC_FI_GPU_MM_ENC_UTIL = 503
|
||||
RDC_FI_GPU_MM_DEC_UTIL = 504
|
||||
RDC_FI_GPU_MEMORY_ACTIVITY = 505
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH = 506
|
||||
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH = 507
|
||||
RDC_FI_ECC_CORRECT_TOTAL = 600
|
||||
RDC_FI_ECC_UNCORRECT_TOTAL = 601
|
||||
RDC_FI_ECC_SDMA_SEC = 602
|
||||
|
||||
@@ -573,6 +573,32 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RDC_FI_GPU_MEMORY_MAX_BANDWIDTH: {
|
||||
amdsmi_vram_info_t vram_info;
|
||||
|
||||
value->status = amdsmi_get_gpu_vram_info(processor_handle, &vram_info);
|
||||
value->type = INTEGER;
|
||||
if (value->status == AMDSMI_STATUS_SUCCESS) {
|
||||
value->value.l_int = vram_info.vram_max_bandwidth;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RDC_FI_GPU_MEMORY_CUR_BANDWIDTH: {
|
||||
amdsmi_engine_usage_t engine_usage;
|
||||
amdsmi_vram_info_t vram_info;
|
||||
|
||||
value->status = amdsmi_get_gpu_activity(processor_handle, &engine_usage);
|
||||
value->type = INTEGER;
|
||||
if (value->status == AMDSMI_STATUS_SUCCESS) {
|
||||
value->value.l_int = static_cast<int64_t>(engine_usage.umc_activity);
|
||||
}
|
||||
|
||||
value->status = amdsmi_get_gpu_vram_info(processor_handle, &vram_info);
|
||||
if (value->status == AMDSMI_STATUS_SUCCESS) {
|
||||
value->value.l_int = value->value.l_int / 100 * vram_info.vram_max_bandwidth;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case RDC_FI_GPU_COUNT: {
|
||||
uint32_t processor_count = 0;
|
||||
// amdsmi is initialized in AMDSMI_INIT_AMD_GPUS mode -> returned sockets are GPUs
|
||||
|
||||
@@ -183,6 +183,7 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI
|
||||
RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM,
|
||||
RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_UNCORRECTABLE_PAGE_LIMIT,
|
||||
RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME,
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH,
|
||||
};
|
||||
std::copy(fields.begin(), fields.end(), field_ids);
|
||||
*field_count = fields.size();
|
||||
|
||||
Reference in New Issue
Block a user