SWDEV-518209: GPU Metrics 1.8 (#177)

- Updates:
    - Adding the following metrics to allow new calculations for violation status:
        - Per XCP metrics gfx_below_host_limit_ppt_acc
        - Per XCP metrics gfx_below_host_limit_thm_acc
        - Per XCP metrics gfx_low_utilization_acc
        - Per XCP metrics gfx_below_host_limit_total_acc
    - Increasing available JPEG engines to 40. Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI.

Signed-off-by: Juan Castillo <juan.castillo@amd.com>
Co-authored-by: Charis Poag <Charis.Poag@amd.com>

[ROCm/amdsmi commit: 7c882b2f69]
This commit is contained in:
Castillo, Juan
2025-03-19 10:24:02 -05:00
committed by GitHub
parent 2f5792e208
commit fff2d21baf
9 changed files with 857 additions and 690 deletions
+14
View File
@@ -4,6 +4,20 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr
***All information listed below is for reference and subject to change.***
## amd_smi_lib for ROCm 6.5.0
### Added
- **Added support for GPU metrics 1.8**.
- Added new fields for `amdsmi_gpu_xcp_metrics_t` including:
- Adding the following metrics to allow new calculations for violation status:
- Per XCP metrics `gfx_below_host_limit_ppt_acc[XCP][MAX_XCC]` - GFX Clock Host limit Package Power Tracking violation counts
- Per XCP metrics `gfx_below_host_limit_thm_acc[XCP][MAX_XCC]` - GFX Clock Host limit Thermal (TVIOL) violation counts
- Per XCP metrics `gfx_low_utilization_acc[XCP][MAX_XCC]` - violation counts for how did low utilization caused the GPU to be below application clocks.
- Per XCP metrics `gfx_below_host_limit_total_acc[XCP][MAX_XCC]`- violation counts for how long GPU was held below application clocks any limiter (see above new violation metrics).
- Increasing available JPEG engines to 40.
Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI.
## amd_smi_lib for ROCm 6.4.0
### Added
@@ -1256,6 +1256,79 @@ int main() {
idx++;
}
/*New scp stats v1.8*/
idx = 0;
idy = 0;
std::cout << "\txcp_stats.gfx_below_host_limit_ppt_acc: " << "\n";
for (auto& row : smu.xcp_stats) {
std::cout << "\t XCP [" << idx << "] : [";
for (auto& col : row.gfx_below_host_limit_ppt_acc) {
if ((idy + 1) != static_cast<int>(std::size(row.gfx_below_host_limit_ppt_acc))) {
std::cout << col << ", ";
} else {
std::cout << col;
}
idy++;
}
std::cout << "]\n";
idy = 0;
idx++;
}
idx = 0;
idy = 0;
std::cout << "\txcp_stats.gfx_below_host_limit_thm_acc: " << "\n";
for (auto& row : smu.xcp_stats) {
std::cout << "\t XCP [" << idx << "] : [";
for (auto& col : row.gfx_below_host_limit_thm_acc) {
if ((idy + 1) != static_cast<int>(std::size(row.gfx_below_host_limit_thm_acc))) {
std::cout << col << ", ";
} else {
std::cout << col;
}
idy++;
}
std::cout << "]\n";
idy = 0;
idx++;
}
idx = 0;
idy = 0;
std::cout << "\txcp_stats.gfx_low_utilization_acc: " << "\n";
for (auto& row : smu.xcp_stats) {
std::cout << "\t XCP [" << idx << "] : [";
for (auto& col : row.gfx_low_utilization_acc) {
if ((idy + 1) != static_cast<int>(std::size(row.gfx_low_utilization_acc))) {
std::cout << col << ", ";
} else {
std::cout << col;
}
idy++;
}
std::cout << "]\n";
idy = 0;
idx++;
}
idx = 0;
idy = 0;
std::cout << "\txcp_stats.gfx_below_host_limit_total_acc: " << "\n";
for (auto& row : smu.xcp_stats) {
std::cout << "\t XCP [" << idx << "] : [";
for (auto& col : row.gfx_below_host_limit_total_acc) {
if ((idy + 1) != static_cast<int>(std::size(row.gfx_below_host_limit_total_acc))) {
std::cout << col << ", ";
} else {
std::cout << col;
}
idy++;
}
std::cout << "]\n";
idy = 0;
idx++;
}
std::cout << "\n\n";
std::cout << "\t ** -> Checking metrics with constant changes ** " << "\n";
constexpr uint16_t kMAX_ITER_TEST = 10;
+21 -4
View File
@@ -152,6 +152,12 @@ typedef enum {
*/
#define AMDSMI_MAX_NUM_JPEG 32
/**
* @brief new for gpu metrics v1.8, document presents NUM_JPEG_ENG_V1
* but will change to AMDSMI_MAX_NUM_JPEG_ENG_V1 for continuity
*/
#define AMDSMI_MAX_NUM_JPEG_ENG_V1 40
/**
* @brief This should match AMDSMI_MAX_NUM_XCC;
* XCC - Accelerated Compute Core, the collection of compute units,
@@ -1688,9 +1694,9 @@ typedef struct {
* @brief v1.6 additions
* The max uint32_t will be used if that information is N/A
*/
uint32_t gfx_busy_inst[AMDSMI_MAX_NUM_XCC]; //!< Utilization Instantaneous in %
uint16_t jpeg_busy[AMDSMI_MAX_NUM_JPEG]; //!< Utilization Instantaneous in %
uint16_t vcn_busy[AMDSMI_MAX_NUM_VCN]; //!< Utilization Instantaneous in %
uint32_t gfx_busy_inst[AMDSMI_MAX_NUM_XCC]; //!< Utilization Instantaneous in %
uint16_t jpeg_busy[AMDSMI_MAX_NUM_JPEG_ENG_V1]; //!< Utilization Instantaneous in % (UPDATED: to 40 in v1.8)
uint16_t vcn_busy[AMDSMI_MAX_NUM_VCN]; //!< Utilization Instantaneous in %
uint64_t gfx_busy_acc[AMDSMI_MAX_NUM_XCC]; //!< Utilization Accumulated in %
@@ -1699,6 +1705,17 @@ typedef struct {
*/
/* Total App Clock Counter Accumulated */
uint64_t gfx_below_host_limit_acc[AMDSMI_MAX_NUM_XCC]; //!< Total App Clock Counter Accumulated
/**
* @brief v1.8 additions
*/
/* Total App Clock Counter Accumulated */
uint64_t gfx_below_host_limit_ppt_acc[AMDSMI_MAX_NUM_XCC];
uint64_t gfx_below_host_limit_thm_acc[AMDSMI_MAX_NUM_XCC];
uint64_t gfx_low_utilization_acc[AMDSMI_MAX_NUM_XCC];
uint64_t gfx_below_host_limit_total_acc[AMDSMI_MAX_NUM_XCC];
} amdsmi_gpu_xcp_metrics_t;
/**
@@ -1889,7 +1906,7 @@ typedef struct {
uint32_t pcie_lc_perf_other_end_recovery; //!< PCIE other end recovery counter
/*
/**
* @brief v1.7 additions
*/
uint64_t vram_max_bandwidth; //!< VRAM max bandwidth at max memory clock (GB/s)
@@ -4200,6 +4200,10 @@ def amdsmi_get_gpu_metrics_info(
"xcp_stats.vcn_busy": list(gpu_metrics.xcp_stats),
"xcp_stats.gfx_busy_acc": list(gpu_metrics.xcp_stats),
"xcp_stats.gfx_below_host_limit_acc": list(gpu_metrics.xcp_stats),
"xcp_stats.gfx_below_host_limit_ppt_acc": list(gpu_metrics.xcp_stats),
"xcp_stats.gfx_below_host_limit_thm_acc": list(gpu_metrics.xcp_stats),
"xcp_stats.gfx_low_utilization_acc": list(gpu_metrics.xcp_stats),
"xcp_stats.gfx_below_host_limit_total_acc": list(gpu_metrics.xcp_stats),
"pcie_lc_perf_other_end_recovery": _validate_if_max_uint(gpu_metrics.pcie_lc_perf_other_end_recovery, MaxUIntegerTypes.UINT32_T),
"vram_max_bandwidth": _validate_if_max_uint(gpu_metrics.vram_max_bandwidth, MaxUIntegerTypes.UINT64_T),
"xgmi_link_status": _validate_if_max_uint(list(gpu_metrics.xgmi_link_status), MaxUIntegerTypes.UINT16_T),
@@ -4231,15 +4235,40 @@ def amdsmi_get_gpu_metrics_info(
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_busy_acc']):
xcp_detail = []
for val in xcp_metrics.gfx_busy_acc:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
gpu_metrics_output["xcp_stats.gfx_busy_acc"][xcp_index] = xcp_detail
if 'xcp_stats.gfx_below_host_limit_acc' in gpu_metrics_output:
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_acc']):
xcp_detail = []
for val in xcp_metrics.gfx_below_host_limit_acc:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
gpu_metrics_output['xcp_stats.gfx_below_host_limit_acc'][xcp_index] = xcp_detail
# new for gpu metrics v1.8
if 'xcp_stats.gfx_below_host_limit_ppt_acc' in gpu_metrics_output:
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_ppt_acc']):
xcp_detail = []
for val in xcp_metrics.gfx_below_host_limit_ppt_acc:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
gpu_metrics_output['xcp_stats.gfx_below_host_limit_ppt_acc'][xcp_index] = xcp_detail
if 'xcp_stats.gfx_below_host_limit_thm_acc' in gpu_metrics_output:
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_thm_acc']):
xcp_detail = []
for val in xcp_metrics.gfx_below_host_limit_thm_acc:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
gpu_metrics_output['xcp_stats.gfx_below_host_limit_thm_acc'][xcp_index] = xcp_detail
if 'xcp_stats.gfx_low_utilization_acc' in gpu_metrics_output:
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_low_utilization_acc']):
xcp_detail = []
for val in xcp_metrics.gfx_low_utilization_acc:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
gpu_metrics_output['xcp_stats.gfx_low_utilization_acc'][xcp_index] = xcp_detail
if 'xcp_stats.gfx_below_host_limit_total_acc' in gpu_metrics_output:
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_total_acc']):
xcp_detail = []
for val in xcp_metrics.gfx_below_host_limit_total_acc:
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
gpu_metrics_output['xcp_stats.gfx_below_host_limit_total_acc'][xcp_index] = xcp_detail
return gpu_metrics_output
@@ -1849,10 +1849,14 @@ class struct_amdsmi_gpu_xcp_metrics_t(Structure):
struct_amdsmi_gpu_xcp_metrics_t._pack_ = 1 # source:False
struct_amdsmi_gpu_xcp_metrics_t._fields_ = [
('gfx_busy_inst', ctypes.c_uint32 * 8),
('jpeg_busy', ctypes.c_uint16 * 32),
('jpeg_busy', ctypes.c_uint16 * 40),
('vcn_busy', ctypes.c_uint16 * 4),
('gfx_busy_acc', ctypes.c_uint64 * 8),
('gfx_below_host_limit_acc', ctypes.c_uint64 * 8),
('gfx_below_host_limit_ppt_acc', ctypes.c_uint64 * 8),
('gfx_below_host_limit_thm_acc', ctypes.c_uint64 * 8),
('gfx_low_utilization_acc', ctypes.c_uint64 * 8),
('gfx_below_host_limit_total_acc', ctypes.c_uint64 * 8),
]
amdsmi_gpu_xcp_metrics_t = struct_amdsmi_gpu_xcp_metrics_t
@@ -1068,6 +1068,11 @@ typedef struct metrics_table_header_t metrics_table_header_t;
*/
#define RSMI_MAX_NUM_JPEG_ENGS 32
/**
* @brief This should match kRSMI_MAX_NUM_JPEG_ENG_V1
*/
#define RSMI_MAX_NUM_JPEG_ENG_V1 40
/**
* @brief This should match kRSMI_MAX_NUM_CLKS
*/
@@ -1115,7 +1120,7 @@ struct amdgpu_xcp_metrics_t {
*/
/* Utilization Instantaneous (%) */
uint32_t gfx_busy_inst[RSMI_MAX_NUM_XCC];
uint16_t jpeg_busy[RSMI_MAX_NUM_JPEG_ENGS];
uint16_t jpeg_busy[RSMI_MAX_NUM_JPEG_ENG_V1];
uint16_t vcn_busy[RSMI_MAX_NUM_VCNS];
/* Utilization Accumulated (%) */
@@ -1126,6 +1131,14 @@ struct amdgpu_xcp_metrics_t {
*/
/* Total App Clock Counter Accumulated */
uint64_t gfx_below_host_limit_acc[RSMI_MAX_NUM_XCC];
/**
* v1.8 additions
*/
uint64_t gfx_below_host_limit_ppt_acc[RSMI_MAX_NUM_XCC];
uint64_t gfx_below_host_limit_thm_acc[RSMI_MAX_NUM_XCC];
uint64_t gfx_low_utilization_acc[RSMI_MAX_NUM_XCC];
uint64_t gfx_below_host_limit_total_acc[RSMI_MAX_NUM_XCC];
};
typedef struct {
@@ -54,10 +54,11 @@ constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_1 = 1;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_2 = 2;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_3 = 3;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_4 = 4;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_8 = 8;
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MAJOR_VER
= kRSMI_GPU_METRICS_API_CONTENT_MAJOR_VER_1;
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MINON_VER
= kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_4;
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MINOR_VER
= kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_8;
// Note: This *must* match NUM_HBM_INSTANCES
@@ -78,6 +79,10 @@ constexpr uint32_t kRSMI_MAX_NUM_VCNS = 4;
// Note: This *must* match NUM_JPEG_ENG
constexpr uint32_t kRSMI_MAX_JPEG_ENGINES = 32;
// Note: Updated for amdgpu_xcp_metrics_v1_2.
// Document provides NUM_JPEG_ENG_V1 but will rename to kRSMI_MAX_NUM_JPEG_ENG_V1
constexpr uint32_t kRSMI_MAX_NUM_JPEG_ENG_V1 = 40;
// Note: This *must* match MAX_XCC
constexpr uint32_t kRSMI_MAX_NUM_XCC = 8;
@@ -90,6 +95,15 @@ struct AMDGpuMetricsHeader_v1_t {
uint8_t m_format_revision;
uint8_t m_content_revision;
};
struct amdgpu_xcp_metrics {
/* Utilization Instantaneous (%) */
uint32_t gfx_busy_inst[kRSMI_MAX_NUM_XCC];
uint16_t jpeg_busy[kRSMI_MAX_JPEG_ENGINES];
uint16_t vcn_busy[kRSMI_MAX_NUM_VCNS];
/* Utilization Accumulated (%) */
uint64_t gfx_busy_acc[kRSMI_MAX_NUM_XCC];
};
struct amdgpu_xcp_metrics_v1_1 {
/* Utilization Instantaneous (%) */
@@ -104,14 +118,21 @@ struct amdgpu_xcp_metrics_v1_1 {
uint64_t gfx_below_host_limit_acc[kRSMI_MAX_NUM_XCC];
};
struct amdgpu_xcp_metrics {
/* new for gpu metrics v1.8 */
struct amdgpu_xcp_metrics_v1_2 {
/* Utilization Instantaneous (%) */
uint32_t gfx_busy_inst[kRSMI_MAX_NUM_XCC];
uint16_t jpeg_busy[kRSMI_MAX_JPEG_ENGINES];
uint16_t jpeg_busy[kRSMI_MAX_NUM_JPEG_ENG_V1];
uint16_t vcn_busy[kRSMI_MAX_NUM_VCNS];
/* Utilization Accumulated (%) */
uint64_t gfx_busy_acc[kRSMI_MAX_NUM_XCC];
/* Total App Clock Counter Accumulated */
uint64_t gfx_below_host_limit_ppt_acc[kRSMI_MAX_NUM_XCC];
uint64_t gfx_below_host_limit_thm_acc[kRSMI_MAX_NUM_XCC];
uint64_t gfx_low_utilization_acc[kRSMI_MAX_NUM_XCC];
uint64_t gfx_below_host_limit_total_acc[kRSMI_MAX_NUM_XCC];
};
struct AMDGpuMetricsBase_t {
@@ -667,7 +688,107 @@ struct AMDGpuMetrics_v17_t {
/* PCIE other end recovery counter */
uint32_t m_pcie_lc_perf_other_end_recovery;
};
using AMGpuMetricsLatest_t = AMDGpuMetrics_v17_t;
struct AMDGpuMetrics_v18_t {
~AMDGpuMetrics_v18_t() = default;
struct AMDGpuMetricsHeader_v1_t m_common_header;
/* Temperature (Celsius) */
uint16_t m_temperature_hotspot;
uint16_t m_temperature_mem;
uint16_t m_temperature_vrsoc;
/* Power (Watts) */
uint16_t m_current_socket_power;
/* Utilization (%) */
uint16_t m_average_gfx_activity;
uint16_t m_average_umc_activity; // memory controller
/* VRAM max bandwidthi (in GB/sec) at max memory clock */
uint64_t m_mem_max_bandwidth;
/* Energy (15.259uJ (2^-16) units) */
uint64_t m_energy_accumulator;
/* Driver attached timestamp (in ns) */
uint64_t m_system_clock_counter;
/* Accumulation cycle counter */
uint32_t m_accumulation_counter;
/* Accumulated throttler residencies */
uint32_t m_prochot_residency_acc;
uint32_t m_ppt_residency_acc;
uint32_t m_socket_thm_residency_acc;
uint32_t m_vr_thm_residency_acc;
uint32_t m_hbm_thm_residency_acc;
/* Clock Lock Status. Each bit corresponds to clock instance */
uint32_t m_gfxclk_lock_status;
/* Link width (number of lanes) and speed (in 0.1 GT/s) */
uint16_t m_pcie_link_width;
uint16_t m_pcie_link_speed;
/* XGMI bus width and bitrate (in Gbps) */
uint16_t m_xgmi_link_width;
uint16_t m_xgmi_link_speed;
/* Utilization Accumulated (%) */
uint32_t m_gfx_activity_acc;
uint32_t m_mem_activity_acc;
/*PCIE accumulated bandwidth (GB/sec) */
uint64_t m_pcie_bandwidth_acc;
/*PCIE instantaneous bandwidth (GB/sec) */
uint64_t m_pcie_bandwidth_inst;
/* PCIE L0 to recovery state transition accumulated count */
uint64_t m_pcie_l0_to_recov_count_acc;
/* PCIE replay accumulated count */
uint64_t m_pcie_replay_count_acc;
/* PCIE replay rollover accumulated count */
uint64_t m_pcie_replay_rover_count_acc;
/* PCIE NAK sent accumulated count */
uint32_t m_pcie_nak_sent_count_acc;
/* PCIE NAK received accumulated count */
uint32_t m_pcie_nak_rcvd_count_acc;
/* XGMI accumulated data transfer size(KiloBytes) */
uint64_t m_xgmi_read_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
uint64_t m_xgmi_write_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
/* XGMI link status(active/inactive) */
uint16_t m_xgmi_link_status[kRSMI_MAX_NUM_XGMI_LINKS];
uint16_t m_padding;
/* PMFW attached timestamp (10ns resolution) */
uint64_t m_firmware_timestamp;
/* Current clocks (Mhz) */
uint16_t m_current_gfxclk[kRSMI_MAX_NUM_GFX_CLKS];
uint16_t m_current_socclk[kRSMI_MAX_NUM_CLKS];
uint16_t m_current_vclk0[kRSMI_MAX_NUM_CLKS];
uint16_t m_current_dclk0[kRSMI_MAX_NUM_CLKS];
uint16_t m_current_uclk;
/* Number of current partition */
uint16_t m_num_partition;
/* XCP metrics stats */
struct amdgpu_xcp_metrics_v1_2 m_xcp_stats[kRSMI_MAX_NUM_XCP];
/* PCIE other end recovery counter */
uint32_t m_pcie_lc_perf_other_end_recovery;
};
using AMGpuMetricsLatest_t = AMDGpuMetrics_v18_t;
/**
* This is GPU Metrics version that gets to public access.
@@ -882,11 +1003,18 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
kMetricJpegBusy, // v1.6
kMetricVcnBusy, // v1.6
kMetricGfxBusyAcc, // v1.6
kMetricGfxBelowHostLimitAccumulator, // v1.7
kMetricPcieLCPerfOtherEndRecov, // v1.6
kMetricVramMaxBandwidth, // v1.7
kMetricXgmiLinkStatus, // v1.7
kMetricGfxBelowHostLimitAccumulator, // v1.7
kMetricGfxBelowHostLimitPptAcc, // v1.8
kMetricGfxBelowHostLimitThmAcc, // v1.8
kMetricGfxBelowHostLimitTotalAcc, // v1.8
kMetricGfxLowUtilitizationAcc, // v1.8
};
using AMDGpuMetricsUnitTypeTranslationTbl_t = std::map<AMDGpuMetricsUnitType_t, std::string>;
@@ -925,6 +1053,7 @@ enum class AMDGpuMetricVersionFlags_t : AMDGpuMetricVersionFlagId_t
kGpuMetricV15 = (0x1 << 5),
kGpuMetricV16 = (0x1 << 6),
kGpuMetricV17 = (0x1 << 7),
kGpuMetricV18 = (0x1 << 8), // Added new version flag
};
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint16_t, AMDGpuMetricVersionFlags_t>;
using GpuMetricTypePtr_t = std::shared_ptr<void>;
@@ -934,7 +1063,6 @@ class GpuMetricsBase_t {
virtual ~GpuMetricsBase_t() = default;
virtual size_t sizeof_metric_table() = 0;
virtual GpuMetricTypePtr_t get_metrics_table() = 0;
virtual void dump_internal_metrics_table() = 0;
virtual AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() = 0;
virtual rsmi_status_t populate_metrics_dynamic_tbl() = 0;
virtual AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() = 0;
@@ -969,10 +1097,6 @@ class GpuMetricsBase_v11_t final : public GpuMetricsBase_t {
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override {
return;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV11;
}
@@ -1002,10 +1126,6 @@ class GpuMetricsBase_v12_t final : public GpuMetricsBase_t {
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override {
return;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV12;
}
@@ -1034,8 +1154,6 @@ class GpuMetricsBase_v13_t final : public GpuMetricsBase_t {
return (m_gpu_metric_ptr);
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV13;
}
@@ -1065,8 +1183,6 @@ class GpuMetricsBase_v14_t final : public GpuMetricsBase_t {
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV14;
}
@@ -1096,8 +1212,6 @@ class GpuMetricsBase_v15_t final : public GpuMetricsBase_t {
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV15;
}
@@ -1127,8 +1241,6 @@ class GpuMetricsBase_v16_t final : public GpuMetricsBase_t {
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV16;
}
@@ -1157,8 +1269,6 @@ class GpuMetricsBase_v17_t final : public GpuMetricsBase_t {
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV17;
}
@@ -1171,6 +1281,34 @@ class GpuMetricsBase_v17_t final : public GpuMetricsBase_t {
std::shared_ptr<AMDGpuMetrics_v17_t> m_gpu_metric_ptr;
};
class GpuMetricsBase_v18_t final : public GpuMetricsBase_t {
public:
~GpuMetricsBase_v18_t() = default;
size_t sizeof_metric_table() override {
return sizeof(AMDGpuMetrics_v18_t);
}
GpuMetricTypePtr_t get_metrics_table() override {
if (!m_gpu_metric_ptr) {
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v18_t*){});
}
assert(m_gpu_metric_ptr != nullptr);
return m_gpu_metric_ptr;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
return AMDGpuMetricVersionFlags_t::kGpuMetricV18;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
private:
AMDGpuMetrics_v18_t m_gpu_metrics_tbl;
std::shared_ptr<AMDGpuMetrics_v18_t> m_gpu_metric_ptr;
};
template<typename T>
rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind,
AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
File diff suppressed because it is too large Load Diff
@@ -358,6 +358,50 @@ void TestGpuMetricsRead::Run(void) {
std::cout << " ]\n";
xcp++;
}
// new for gpu metrics v1.8
xcp = 0;
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_ppt_acc = \n";
for (auto& row : smu.xcp_stats) {
std::cout << "XCP[" << xcp << "] = " << "[ ";
std::copy(std::begin(row.gfx_below_host_limit_ppt_acc),
std::end(row.gfx_below_host_limit_ppt_acc),
amd::smi::make_ostream_joiner(&std::cout, ", "));
std::cout << " ]\n";
xcp++;
}
xcp = 0;
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_thm_acc = \n";
for (auto& row : smu.xcp_stats) {
std::cout << "XCP[" << xcp << "] = " << "[ ";
std::copy(std::begin(row.gfx_below_host_limit_thm_acc),
std::end(row.gfx_below_host_limit_thm_acc),
amd::smi::make_ostream_joiner(&std::cout, ", "));
std::cout << " ]\n";
xcp++;
}
xcp = 0;
std::cout << std::dec << "xcp_stats.gfx_low_utilization_acc = \n";
for (auto& row : smu.xcp_stats) {
std::cout << "XCP[" << xcp << "] = " << "[ ";
std::copy(std::begin(row.gfx_low_utilization_acc),
std::end(row.gfx_low_utilization_acc),
amd::smi::make_ostream_joiner(&std::cout, ", "));
std::cout << " ]\n";
xcp++;
}
xcp = 0;
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_total_acc = \n";
for (auto& row : smu.xcp_stats) {
std::cout << "XCP[" << xcp << "] = " << "[ ";
std::copy(std::begin(row.gfx_below_host_limit_total_acc),
std::end(row.gfx_below_host_limit_total_acc),
amd::smi::make_ostream_joiner(&std::cout, ", "));
std::cout << " ]\n";
xcp++;
}
std::cout << "\n\n";
std::cout << "\t ** -> Checking metrics with constant changes ** " << "\n";