SWDEV-518214: GPU Metrics 1.8 (#31)
* SWDEV-518214: GPU Metrics 1.8 (#31) - Updates: - Adding the following metrics to allow new calculations for violation status: - Per XCP metrics gfx_below_host_limit_ppt_acc - Per XCP metrics gfx_below_host_limit_thm_acc - Per XCP metrics gfx_low_utilization_acc - Per XCP metrics gfx_below_host_limit_total_acc - Increasing available JPEG engines to 40. Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI. Signed-off-by: Juan Castillo <juan.castillo@amd.com> Co-authored-by: Charis Poag <Charis.Poag@amd.com>
이 커밋은 다음에 포함됨:
+38
@@ -4,6 +4,44 @@ Full documentation for rocm_smi_lib is available at [https://rocm.docs.amd.com/]
|
||||
|
||||
***All information listed below is for reference and subject to change.***
|
||||
|
||||
## amd_smi_lib for ROCm 6.5.0
|
||||
|
||||
### Added
|
||||
|
||||
- **Added support for GPU metrics 1.8**.
|
||||
- Added new fields for `rsmi_gpu_metrics_t` including:
|
||||
- Adding the following metrics to allow new calculations for violation status:
|
||||
- Per XCP metrics `gfx_below_host_limit_ppt_acc[XCP][MAX_XCC]` - GFX Clock Host limit Package Power Tracking violation counts
|
||||
- Per XCP metrics `gfx_below_host_limit_thm_acc[XCP][MAX_XCC]` - GFX Clock Host limit Thermal (TVIOL) violation counts
|
||||
- Per XCP metrics `gfx_low_utilization_acc[XCP][MAX_XCC]` - violation counts for how did low utilization caused the GPU to be below application clocks.
|
||||
- Per XCP metrics `gfx_below_host_limit_total_acc[XCP][MAX_XCC]`- violation counts for how long GPU was held below application clocks any limiter (see above new violation metrics).
|
||||
- Increasing available JPEG engines to 40.
|
||||
Current ASICs may not support all 40. These will be indicated as UINT16_MAX or N/A in CLI.
|
||||
|
||||
### Changed
|
||||
|
||||
- N/A
|
||||
|
||||
### Removed
|
||||
|
||||
- N/A
|
||||
|
||||
### Optimized
|
||||
|
||||
- N/A
|
||||
|
||||
### Resolved issues
|
||||
|
||||
- N/A
|
||||
|
||||
### Upcoming changes
|
||||
|
||||
- N/A
|
||||
|
||||
### Known issues
|
||||
|
||||
- N/A
|
||||
|
||||
## rocm_smi_lib for ROCm 6.4
|
||||
|
||||
### Added
|
||||
|
||||
+15
-2
@@ -954,6 +954,11 @@ typedef struct metrics_table_header_t metrics_table_header_t;
|
||||
*/
|
||||
#define RSMI_MAX_NUM_JPEG_ENGS 32
|
||||
|
||||
/**
|
||||
* @brief This should match kRSMI_MAX_NUM_JPEG_ENG_V1
|
||||
*/
|
||||
#define RSMI_MAX_NUM_JPEG_ENG_V1 40
|
||||
|
||||
/**
|
||||
* @brief This should match kRSMI_MAX_NUM_CLKS
|
||||
*/
|
||||
@@ -1001,7 +1006,7 @@ struct amdgpu_xcp_metrics_t {
|
||||
*/
|
||||
/* Utilization Instantaneous (%) */
|
||||
uint32_t gfx_busy_inst[RSMI_MAX_NUM_XCC];
|
||||
uint16_t jpeg_busy[RSMI_MAX_NUM_JPEG_ENGS];
|
||||
uint16_t jpeg_busy[RSMI_MAX_NUM_JPEG_ENG_V1];
|
||||
uint16_t vcn_busy[RSMI_MAX_NUM_VCNS];
|
||||
|
||||
/* Utilization Accumulated (%) */
|
||||
@@ -1012,6 +1017,14 @@ struct amdgpu_xcp_metrics_t {
|
||||
*/
|
||||
/* Total App Clock Counter Accumulated */
|
||||
uint64_t gfx_below_host_limit_acc[RSMI_MAX_NUM_XCC];
|
||||
|
||||
/**
|
||||
* v1.8 additions
|
||||
*/
|
||||
uint64_t gfx_below_host_limit_ppt_acc[RSMI_MAX_NUM_XCC];
|
||||
uint64_t gfx_below_host_limit_thm_acc[RSMI_MAX_NUM_XCC];
|
||||
uint64_t gfx_low_utilization_acc[RSMI_MAX_NUM_XCC];
|
||||
uint64_t gfx_below_host_limit_total_acc[RSMI_MAX_NUM_XCC];
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@@ -1218,7 +1231,7 @@ typedef struct {
|
||||
/*
|
||||
* v1.7 additions
|
||||
*/
|
||||
/* VRAM max bandwidth at max memory clock (GB/s) */
|
||||
/* VRAM max bandwidth at max memory clock */
|
||||
uint64_t vram_max_bandwidth;
|
||||
|
||||
/* XGMI link status(up/down) */
|
||||
|
||||
@@ -1,44 +1,23 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017-2024, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
* Copyright (c) Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef ROCM_SMI_ROCM_SMI_GPU_METRICS_H_
|
||||
@@ -52,9 +31,12 @@
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <type_traits>
|
||||
#include <tuple>
|
||||
#include <variant>
|
||||
@@ -72,10 +54,11 @@ constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_1 = 1;
|
||||
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_2 = 2;
|
||||
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_3 = 3;
|
||||
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_4 = 4;
|
||||
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_8 = 8;
|
||||
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MAJOR_VER
|
||||
= kRSMI_GPU_METRICS_API_CONTENT_MAJOR_VER_1;
|
||||
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MINON_VER
|
||||
= kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_4;
|
||||
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MINOR_VER
|
||||
= kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_8;
|
||||
|
||||
|
||||
// Note: This *must* match NUM_HBM_INSTANCES
|
||||
@@ -96,6 +79,10 @@ constexpr uint32_t kRSMI_MAX_NUM_VCNS = 4;
|
||||
// Note: This *must* match NUM_JPEG_ENG
|
||||
constexpr uint32_t kRSMI_MAX_JPEG_ENGINES = 32;
|
||||
|
||||
// Note: Updated for amdgpu_xcp_metrics_v1_2.
|
||||
// Document provides NUM_JPEG_ENG_V1 but will rename to kRSMI_MAX_NUM_JPEG_ENG_V1
|
||||
constexpr uint32_t kRSMI_MAX_NUM_JPEG_ENG_V1 = 40;
|
||||
|
||||
// Note: This *must* match MAX_XCC
|
||||
constexpr uint32_t kRSMI_MAX_NUM_XCC = 8;
|
||||
|
||||
@@ -108,6 +95,15 @@ struct AMDGpuMetricsHeader_v1_t {
|
||||
uint8_t m_format_revision;
|
||||
uint8_t m_content_revision;
|
||||
};
|
||||
struct amdgpu_xcp_metrics {
|
||||
/* Utilization Instantaneous (%) */
|
||||
uint32_t gfx_busy_inst[kRSMI_MAX_NUM_XCC];
|
||||
uint16_t jpeg_busy[kRSMI_MAX_JPEG_ENGINES];
|
||||
uint16_t vcn_busy[kRSMI_MAX_NUM_VCNS];
|
||||
|
||||
/* Utilization Accumulated (%) */
|
||||
uint64_t gfx_busy_acc[kRSMI_MAX_NUM_XCC];
|
||||
};
|
||||
|
||||
struct amdgpu_xcp_metrics_v1_1 {
|
||||
/* Utilization Instantaneous (%) */
|
||||
@@ -122,14 +118,21 @@ struct amdgpu_xcp_metrics_v1_1 {
|
||||
uint64_t gfx_below_host_limit_acc[kRSMI_MAX_NUM_XCC];
|
||||
};
|
||||
|
||||
struct amdgpu_xcp_metrics {
|
||||
/* new for gpu metrics v1.8 */
|
||||
struct amdgpu_xcp_metrics_v1_2 {
|
||||
/* Utilization Instantaneous (%) */
|
||||
uint32_t gfx_busy_inst[kRSMI_MAX_NUM_XCC];
|
||||
uint16_t jpeg_busy[kRSMI_MAX_JPEG_ENGINES];
|
||||
uint16_t jpeg_busy[kRSMI_MAX_NUM_JPEG_ENG_V1];
|
||||
uint16_t vcn_busy[kRSMI_MAX_NUM_VCNS];
|
||||
|
||||
/* Utilization Accumulated (%) */
|
||||
uint64_t gfx_busy_acc[kRSMI_MAX_NUM_XCC];
|
||||
|
||||
/* Total App Clock Counter Accumulated */
|
||||
uint64_t gfx_below_host_limit_ppt_acc[kRSMI_MAX_NUM_XCC];
|
||||
uint64_t gfx_below_host_limit_thm_acc[kRSMI_MAX_NUM_XCC];
|
||||
uint64_t gfx_low_utilization_acc[kRSMI_MAX_NUM_XCC];
|
||||
uint64_t gfx_below_host_limit_total_acc[kRSMI_MAX_NUM_XCC];
|
||||
};
|
||||
|
||||
struct AMDGpuMetricsBase_t {
|
||||
@@ -602,7 +605,7 @@ struct AMDGpuMetrics_v17_t {
|
||||
uint16_t m_average_gfx_activity;
|
||||
uint16_t m_average_umc_activity; // memory controller
|
||||
|
||||
/* VRAM max bandwidth at max memory clock */
|
||||
/* VRAM max bandwidth at max memory clock (GB/s) */
|
||||
uint64_t m_vram_max_bandwidth; // new for 1.7
|
||||
|
||||
/* Energy (15.259uJ (2^-16) units) */
|
||||
@@ -685,7 +688,107 @@ struct AMDGpuMetrics_v17_t {
|
||||
/* PCIE other end recovery counter */
|
||||
uint32_t m_pcie_lc_perf_other_end_recovery;
|
||||
};
|
||||
using AMGpuMetricsLatest_t = AMDGpuMetrics_v17_t;
|
||||
|
||||
struct AMDGpuMetrics_v18_t {
|
||||
~AMDGpuMetrics_v18_t() = default;
|
||||
struct AMDGpuMetricsHeader_v1_t m_common_header;
|
||||
|
||||
/* Temperature (Celsius) */
|
||||
uint16_t m_temperature_hotspot;
|
||||
uint16_t m_temperature_mem;
|
||||
uint16_t m_temperature_vrsoc;
|
||||
|
||||
/* Power (Watts) */
|
||||
uint16_t m_current_socket_power;
|
||||
|
||||
/* Utilization (%) */
|
||||
uint16_t m_average_gfx_activity;
|
||||
uint16_t m_average_umc_activity; // memory controller
|
||||
|
||||
/* VRAM max bandwidthi (in GB/sec) at max memory clock */
|
||||
uint64_t m_mem_max_bandwidth;
|
||||
|
||||
/* Energy (15.259uJ (2^-16) units) */
|
||||
uint64_t m_energy_accumulator;
|
||||
|
||||
/* Driver attached timestamp (in ns) */
|
||||
uint64_t m_system_clock_counter;
|
||||
|
||||
/* Accumulation cycle counter */
|
||||
uint32_t m_accumulation_counter;
|
||||
|
||||
/* Accumulated throttler residencies */
|
||||
uint32_t m_prochot_residency_acc;
|
||||
uint32_t m_ppt_residency_acc;
|
||||
uint32_t m_socket_thm_residency_acc;
|
||||
uint32_t m_vr_thm_residency_acc;
|
||||
uint32_t m_hbm_thm_residency_acc;
|
||||
|
||||
/* Clock Lock Status. Each bit corresponds to clock instance */
|
||||
uint32_t m_gfxclk_lock_status;
|
||||
|
||||
/* Link width (number of lanes) and speed (in 0.1 GT/s) */
|
||||
uint16_t m_pcie_link_width;
|
||||
uint16_t m_pcie_link_speed;
|
||||
|
||||
/* XGMI bus width and bitrate (in Gbps) */
|
||||
uint16_t m_xgmi_link_width;
|
||||
uint16_t m_xgmi_link_speed;
|
||||
|
||||
/* Utilization Accumulated (%) */
|
||||
uint32_t m_gfx_activity_acc;
|
||||
uint32_t m_mem_activity_acc;
|
||||
|
||||
/*PCIE accumulated bandwidth (GB/sec) */
|
||||
uint64_t m_pcie_bandwidth_acc;
|
||||
|
||||
/*PCIE instantaneous bandwidth (GB/sec) */
|
||||
uint64_t m_pcie_bandwidth_inst;
|
||||
|
||||
/* PCIE L0 to recovery state transition accumulated count */
|
||||
uint64_t m_pcie_l0_to_recov_count_acc;
|
||||
|
||||
/* PCIE replay accumulated count */
|
||||
uint64_t m_pcie_replay_count_acc;
|
||||
|
||||
/* PCIE replay rollover accumulated count */
|
||||
uint64_t m_pcie_replay_rover_count_acc;
|
||||
|
||||
/* PCIE NAK sent accumulated count */
|
||||
uint32_t m_pcie_nak_sent_count_acc;
|
||||
|
||||
/* PCIE NAK received accumulated count */
|
||||
uint32_t m_pcie_nak_rcvd_count_acc;
|
||||
|
||||
/* XGMI accumulated data transfer size(KiloBytes) */
|
||||
uint64_t m_xgmi_read_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
|
||||
uint64_t m_xgmi_write_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
|
||||
|
||||
/* XGMI link status(active/inactive) */
|
||||
uint16_t m_xgmi_link_status[kRSMI_MAX_NUM_XGMI_LINKS];
|
||||
|
||||
uint16_t m_padding;
|
||||
|
||||
/* PMFW attached timestamp (10ns resolution) */
|
||||
uint64_t m_firmware_timestamp;
|
||||
|
||||
/* Current clocks (Mhz) */
|
||||
uint16_t m_current_gfxclk[kRSMI_MAX_NUM_GFX_CLKS];
|
||||
uint16_t m_current_socclk[kRSMI_MAX_NUM_CLKS];
|
||||
uint16_t m_current_vclk0[kRSMI_MAX_NUM_CLKS];
|
||||
uint16_t m_current_dclk0[kRSMI_MAX_NUM_CLKS];
|
||||
uint16_t m_current_uclk;
|
||||
|
||||
/* Number of current partition */
|
||||
uint16_t m_num_partition;
|
||||
|
||||
/* XCP metrics stats */
|
||||
struct amdgpu_xcp_metrics_v1_2 m_xcp_stats[kRSMI_MAX_NUM_XCP];
|
||||
|
||||
/* PCIE other end recovery counter */
|
||||
uint32_t m_pcie_lc_perf_other_end_recovery;
|
||||
};
|
||||
using AMGpuMetricsLatest_t = AMDGpuMetrics_v18_t;
|
||||
|
||||
/**
|
||||
* This is GPU Metrics version that gets to public access.
|
||||
@@ -900,11 +1003,18 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
|
||||
kMetricJpegBusy, // v1.6
|
||||
kMetricVcnBusy, // v1.6
|
||||
kMetricGfxBusyAcc, // v1.6
|
||||
kMetricGfxBelowHostLimitAccumulator, // v1.7
|
||||
|
||||
kMetricPcieLCPerfOtherEndRecov, // v1.6
|
||||
|
||||
kMetricVramMaxBandwidth, // v1.7
|
||||
kMetricXgmiLinkStatus, // v1.7
|
||||
kMetricGfxBelowHostLimitAccumulator, // v1.7
|
||||
|
||||
kMetricGfxBelowHostLimitPptAcc, // v1.8
|
||||
kMetricGfxBelowHostLimitThmAcc, // v1.8
|
||||
kMetricGfxBelowHostLimitTotalAcc, // v1.8
|
||||
kMetricGfxLowUtilitizationAcc, // v1.8
|
||||
|
||||
};
|
||||
using AMDGpuMetricsUnitTypeTranslationTbl_t = std::map<AMDGpuMetricsUnitType_t, std::string>;
|
||||
|
||||
@@ -943,6 +1053,7 @@ enum class AMDGpuMetricVersionFlags_t : AMDGpuMetricVersionFlagId_t
|
||||
kGpuMetricV15 = (0x1 << 5),
|
||||
kGpuMetricV16 = (0x1 << 6),
|
||||
kGpuMetricV17 = (0x1 << 7),
|
||||
kGpuMetricV18 = (0x1 << 8), // Added new version flag
|
||||
};
|
||||
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint16_t, AMDGpuMetricVersionFlags_t>;
|
||||
using GpuMetricTypePtr_t = std::shared_ptr<void>;
|
||||
@@ -952,27 +1063,24 @@ class GpuMetricsBase_t {
|
||||
virtual ~GpuMetricsBase_t() = default;
|
||||
virtual size_t sizeof_metric_table() = 0;
|
||||
virtual GpuMetricTypePtr_t get_metrics_table() = 0;
|
||||
virtual void dump_internal_metrics_table() = 0;
|
||||
virtual AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() = 0;
|
||||
virtual rsmi_status_t populate_metrics_dynamic_tbl() = 0;
|
||||
virtual AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() = 0;
|
||||
virtual void set_device_id(uint32_t device_id) { m_device_id = device_id; }
|
||||
virtual void set_partition_id(uint32_t partition_id) { m_partition_id = partition_id; }
|
||||
virtual AMDGpuDynamicMetricsTbl_t get_metrics_dynamic_tbl() {
|
||||
return m_metrics_dynamic_tbl;
|
||||
return m_base_metrics_dynamic_tbl;
|
||||
}
|
||||
|
||||
protected:
|
||||
AMDGpuDynamicMetricsTbl_t m_metrics_dynamic_tbl;
|
||||
AMDGpuDynamicMetricsTbl_t m_base_metrics_dynamic_tbl;
|
||||
uint64_t m_metrics_timestamp;
|
||||
uint32_t m_device_id;
|
||||
uint32_t m_partition_id;
|
||||
|
||||
};
|
||||
using GpuMetricsBasePtr = std::shared_ptr<GpuMetricsBase_t>;
|
||||
using AMDGpuMetricFactories_t = const std::map<AMDGpuMetricVersionFlags_t, GpuMetricsBasePtr>;
|
||||
|
||||
|
||||
class GpuMetricsBase_v11_t final : public GpuMetricsBase_t {
|
||||
public:
|
||||
virtual ~GpuMetricsBase_v11_t() = default;
|
||||
@@ -989,10 +1097,6 @@ class GpuMetricsBase_v11_t final : public GpuMetricsBase_t {
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override {
|
||||
return;
|
||||
}
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV11;
|
||||
}
|
||||
@@ -1022,10 +1126,6 @@ class GpuMetricsBase_v12_t final : public GpuMetricsBase_t {
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override {
|
||||
return;
|
||||
}
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV12;
|
||||
}
|
||||
@@ -1054,8 +1154,6 @@ class GpuMetricsBase_v13_t final : public GpuMetricsBase_t {
|
||||
return (m_gpu_metric_ptr);
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override;
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV13;
|
||||
}
|
||||
@@ -1085,8 +1183,6 @@ class GpuMetricsBase_v14_t final : public GpuMetricsBase_t {
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override;
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV14;
|
||||
}
|
||||
@@ -1116,8 +1212,6 @@ class GpuMetricsBase_v15_t final : public GpuMetricsBase_t {
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override;
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV15;
|
||||
}
|
||||
@@ -1147,8 +1241,6 @@ class GpuMetricsBase_v16_t final : public GpuMetricsBase_t {
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override;
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV16;
|
||||
}
|
||||
@@ -1177,8 +1269,6 @@ class GpuMetricsBase_v17_t final : public GpuMetricsBase_t {
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
void dump_internal_metrics_table() override;
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV17;
|
||||
}
|
||||
@@ -1191,6 +1281,34 @@ class GpuMetricsBase_v17_t final : public GpuMetricsBase_t {
|
||||
std::shared_ptr<AMDGpuMetrics_v17_t> m_gpu_metric_ptr;
|
||||
};
|
||||
|
||||
class GpuMetricsBase_v18_t final : public GpuMetricsBase_t {
|
||||
public:
|
||||
~GpuMetricsBase_v18_t() = default;
|
||||
|
||||
size_t sizeof_metric_table() override {
|
||||
return sizeof(AMDGpuMetrics_v18_t);
|
||||
}
|
||||
|
||||
GpuMetricTypePtr_t get_metrics_table() override {
|
||||
if (!m_gpu_metric_ptr) {
|
||||
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v18_t*){});
|
||||
}
|
||||
assert(m_gpu_metric_ptr != nullptr);
|
||||
return m_gpu_metric_ptr;
|
||||
}
|
||||
|
||||
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override {
|
||||
return AMDGpuMetricVersionFlags_t::kGpuMetricV18;
|
||||
}
|
||||
|
||||
rsmi_status_t populate_metrics_dynamic_tbl() override;
|
||||
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
|
||||
|
||||
private:
|
||||
AMDGpuMetrics_v18_t m_gpu_metrics_tbl;
|
||||
std::shared_ptr<AMDGpuMetrics_v18_t> m_gpu_metric_ptr;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind,
|
||||
AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
|
||||
|
||||
@@ -3795,9 +3795,41 @@ def showGPUMetrics(deviceList):
|
||||
},
|
||||
"xcp_stats.gfx_busy_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_ppt_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_thm_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_low_utilization_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_total_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": count,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_ppt_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_thm_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
"xcp_stats.gfx_low_utilization_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
"xcp_stats.gfx_below_host_limit_total_acc": {
|
||||
"value": gpu_metrics.xcp_stats,
|
||||
"unit": percent_unit,
|
||||
},
|
||||
@@ -3841,6 +3873,30 @@ def showGPUMetrics(deviceList):
|
||||
for _, val in enumerate(item.gfx_below_host_limit_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_below_host_limit_ppt_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_below_host_limit_ppt_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_below_host_limit_thm_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_below_host_limit_thm_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_low_utilization_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_low_utilization_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
if 'xcp_stats.gfx_below_host_limit_total_acc' in k:
|
||||
for curr_xcp, item in enumerate(v['value']):
|
||||
print_xcp_detail = []
|
||||
for _, val in enumerate(item.gfx_below_host_limit_total_acc):
|
||||
print_xcp_detail.append(validateIfMaxUint(val, UIntegerTypes.UINT64_T))
|
||||
printLog(device, k + " (" + str(v["unit"]) + ")", str(print_xcp_detail), xcp=str(curr_xcp))
|
||||
|
||||
if int(device) < (len(deviceList) - 1):
|
||||
printLogSpacer()
|
||||
|
||||
@@ -662,10 +662,14 @@ class amdgpu_xcp_metrics_t(Structure):
|
||||
# amdgpu_xcp_metrics_t._pack_ = 1 # source:False
|
||||
amdgpu_xcp_metrics_t._fields_ = [
|
||||
('gfx_busy_inst', c_uint32 * 8),
|
||||
('jpeg_busy', c_uint16 * 32),
|
||||
('jpeg_busy', c_uint16 * 40),
|
||||
('vcn_busy', c_uint16 * 4),
|
||||
('gfx_busy_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_ppt_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_thm_acc', c_uint64 * 8),
|
||||
('gfx_low_utilization_acc', c_uint64 * 8),
|
||||
('gfx_below_host_limit_total_acc', c_uint64 * 8),
|
||||
]
|
||||
xcp_stats_t = amdgpu_xcp_metrics_t
|
||||
|
||||
|
||||
@@ -991,6 +991,11 @@ int main() {
|
||||
std::cout << "\t -> " << std::dec << dclk << "\n";
|
||||
}
|
||||
|
||||
std::cout << "\t**.jpeg_activity[] : " << std::dec << "\n";
|
||||
for (const auto& jpeg : gpu_metrics.jpeg_activity) {
|
||||
std::cout << "\t -> " << std::dec << jpeg << "\n";
|
||||
}
|
||||
|
||||
std::cout << std::dec << "xcp_stats.gfx_busy_inst = \n";
|
||||
auto xcp = 0;
|
||||
for (auto& row : gpu_metrics.xcp_stats) {
|
||||
@@ -1046,6 +1051,50 @@ int main() {
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_ppt_acc = \n"; // new for 1.8
|
||||
for (auto& row : gpu_metrics.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_below_host_limit_ppt_acc),
|
||||
std::end(row.gfx_below_host_limit_ppt_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_thm_acc = \n"; // new for 1.8
|
||||
for (auto& row : gpu_metrics.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_below_host_limit_thm_acc),
|
||||
std::end(row.gfx_below_host_limit_thm_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_low_utilization_acc = \n";
|
||||
for (auto& row : gpu_metrics.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_low_utilization_acc),
|
||||
std::end(row.gfx_low_utilization_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_total_acc = \n";
|
||||
for (auto& row : gpu_metrics.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_below_host_limit_total_acc),
|
||||
std::end(row.gfx_below_host_limit_total_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
std::cout << "\n";
|
||||
std::cout << "\t ** -> Checking metrics with constant changes ** " << "\n";
|
||||
constexpr uint16_t kMAX_ITER_TEST = 10;
|
||||
|
||||
+564
-717
파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
Diff 로드
@@ -379,6 +379,50 @@ void TestGpuMetricsRead::Run(void) {
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
// new for gpu metrics v1.8
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_ppt_acc = \n";
|
||||
for (auto& row : smu.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_below_host_limit_ppt_acc),
|
||||
std::end(row.gfx_below_host_limit_ppt_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_thm_acc = \n";
|
||||
for (auto& row : smu.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_below_host_limit_thm_acc),
|
||||
std::end(row.gfx_below_host_limit_thm_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_low_utilization_acc = \n";
|
||||
for (auto& row : smu.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_low_utilization_acc),
|
||||
std::end(row.gfx_low_utilization_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
xcp = 0;
|
||||
std::cout << std::dec << "xcp_stats.gfx_below_host_limit_total_acc = \n";
|
||||
for (auto& row : smu.xcp_stats) {
|
||||
std::cout << "XCP[" << xcp << "] = " << "[ ";
|
||||
std::copy(std::begin(row.gfx_below_host_limit_total_acc),
|
||||
std::end(row.gfx_below_host_limit_total_acc),
|
||||
amd::smi::make_ostream_joiner(&std::cout, ", "));
|
||||
std::cout << " ]\n";
|
||||
xcp++;
|
||||
}
|
||||
|
||||
std::cout << "\n\n";
|
||||
std::cout << "\t ** -> Checking metrics with constant changes ** " << "\n";
|
||||
|
||||
새 이슈에서 참조
사용자 차단