rocm_smi_lib: Fix Modernize and refactor gpu_metrics

Adds support for 'gpu_metrics_v1_4' and new counters

Code changes related to the following:
  * rsmi gpu_metrics APIs
  * rsmi gpu_metrics Logs
  * The new gpu_metrics are now part of the Device

Build changes related to the following: None

Change-Id: Ie748e977cd0a01c6a2fb82260014c0699605dbb3
Signed-off-by: Oliveira, Daniel <daniel.oliveira@amd.com>


[ROCm/rocm_smi_lib commit: 4e4ebde640]
Этот коммит содержится в:
Oliveira, Daniel
2023-09-29 12:49:48 -05:00
родитель d57d65a607
Коммит 5e444f87ad
6 изменённых файлов: 2422 добавлений и 10 удалений
+2
Просмотреть файл
@@ -73,6 +73,7 @@ endif()
## Compiler flags
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -m64 -msse -msse2 ")
# Security options
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wconversion -Wcast-align ")
@@ -148,6 +149,7 @@ set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_exception.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_counters.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_kfd.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_io_link.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_gpu_metrics.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_logger.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_properties.h")
-2
Просмотреть файл
@@ -929,12 +929,10 @@ struct metrics_table_header_t {
* @brief The GPU metrics version 3
*/
#define RSMI_GPU_METRICS_API_CONTENT_VER_3 3
/**
* @brief This should match NUM_HBM_INSTANCES
*/
#define RSMI_NUM_HBM_INSTANCES 4
/**
* @brief Unit conversion factor for HBM temperatures
*/
+20
Просмотреть файл
@@ -60,6 +60,7 @@
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_counters.h"
#include "rocm_smi/rocm_smi_properties.h"
#include "rocm_smi/rocm_smi_gpu_metrics.h"
#include "shared_mutex.h" //NOLINT
namespace amd {
@@ -228,6 +229,18 @@ class Device {
template <typename T> std::string readBootPartitionState(uint32_t dv_ind);
rsmi_status_t check_amdgpu_property_reinforcement_query(uint32_t dev_idx, AMDGpuVerbTypes_t verb_type);
void dev_set_gpu_metric(GpuMetricsBasePtr gpu_metrics_ptr) { m_gpu_metrics_ptr = gpu_metrics_ptr; };
GpuMetricsBasePtr& dev_get_gpu_metric() { return m_gpu_metrics_ptr; };
const AMDGpuMetricsHeader_v1_t& dev_get_metrics_header() {return m_gpu_metrics_header; }
rsmi_status_t setup_gpu_metrics_reading();
rsmi_status_t dev_read_gpu_metrics_header_data();
rsmi_status_t dev_read_gpu_metrics_all_data();
rsmi_status_t dev_log_gpu_metrics();
rsmi_status_t run_internal_gpu_metrics_query(AMDGpuMetricsUnitType_t metric_counter, AMDGpuDynamicMetricTblValues_t& values);
template<typename T>
rsmi_status_t dev_run_gpu_metrics_query(AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
private:
std::shared_ptr<Monitor> monitor_;
@@ -249,6 +262,8 @@ class Device {
void *p_binary_data);
int writeDevInfoStr(DevInfoTypes type, std::string valStr);
rsmi_status_t run_amdgpu_property_reinforcement_query(const AMDGpuPropertyQuery_t& amdgpu_property_query);
uint64_t bdfid_;
uint64_t kfd_gpu_id_;
std::unordered_set<rsmi_event_group_t,
@@ -258,7 +273,12 @@ class Device {
int evt_notif_anon_fd_;
FILE *evt_notif_anon_file_ptr_;
struct metrics_table_header_t gpu_metrics_ver_;
GpuMetricsBasePtr m_gpu_metrics_ptr;
AMDGpuMetricsHeader_v1_t m_gpu_metrics_header;
uint64_t m_gpu_metrics_updated_timestamp;
};
+681
Просмотреть файл
@@ -0,0 +1,681 @@
/*
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef ROCM_SMI_ROCM_SMI_GPU_METRICS_H_
#define ROCM_SMI_ROCM_SMI_GPU_METRICS_H_
#include "rocm_smi/rocm_smi_common.h"
#include "rocm_smi/rocm_smi.h"
#include <cstdint>
#include <map>
#include <memory>
#include <tuple>
#include <vector>
/**
* All 1.4 and newer GPU metrics are now defined in this header.
*
*/
namespace amd::smi
{
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MAJOR_VER_1 = 1;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_1 = 1;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_2 = 2;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_3 = 3;
constexpr uint32_t kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_4 = 4;
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MAJOR_VER = kRSMI_GPU_METRICS_API_CONTENT_MAJOR_VER_1;
constexpr uint32_t kRSMI_LATEST_GPU_METRICS_API_CONTENT_MINON_VER = kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_4;
// Note: As gpu metrics are updating
constexpr uint32_t kRSMI_GPU_METRICS_EXPIRATION_SECS = 5;
// Note: This *must* match NUM_HBM_INSTANCES
constexpr uint32_t kRSMI_MAX_NUM_HBM_INSTANCES = 4;
// Note: This *must* match NUM_XGMI_LINKS
constexpr uint32_t kRSMI_MAX_NUM_XGMI_LINKS = 8;
// Note: This *must* match MAX_GFX_CLKS
constexpr uint32_t kRSMI_MAX_NUM_GFX_CLKS = 8;
// Note: This *must* match MAX_CLKS
constexpr uint32_t kRSMI_MAX_NUM_CLKS = 4;
// Note: This *must* match NUM_VCN
constexpr uint32_t kRSMI_MAX_NUM_VCN = 4;
struct AMDGpuMetricsHeader_v1_t
{
uint16_t m_structure_size;
uint8_t m_format_revision;
uint8_t m_content_revision;
};
struct AMDGpuMetricsBase_t;
using AMDGpuMetricsBaseRef = AMDGpuMetricsBase_t&;
struct AMDGpuMetricsBase_t
{
virtual ~AMDGpuMetricsBase_t() = default;
};
struct AMDGpuMetrics_v11_t : AMDGpuMetricsBase_t
{
~AMDGpuMetrics_v11_t() = default;
struct AMDGpuMetricsHeader_v1_t m_common_header;
// Temperature
uint16_t m_temperature_edge;
uint16_t m_temperature_hotspot;
uint16_t m_temperature_mem;
uint16_t m_temperature_vrgfx;
uint16_t m_temperature_vrsoc;
uint16_t m_temperature_vrmem;
// Utilization
uint16_t m_average_gfx_activity;
uint16_t m_average_umc_activity; // memory controller
uint16_t m_average_mm_activity; // UVD or VCN
// Power/Energy
uint16_t m_average_socket_power;
uint64_t m_energy_accumulator;
// Driver attached timestamp (in ns)
uint64_t m_system_clock_counter;
// Average clocks
uint16_t m_average_gfxclk_frequency;
uint16_t m_average_socclk_frequency;
uint16_t m_average_uclk_frequency;
uint16_t m_average_vclk0_frequency;
uint16_t m_average_dclk0_frequency;
uint16_t m_average_vclk1_frequency;
uint16_t m_average_dclk1_frequency;
// Current clocks
uint16_t m_current_gfxclk;
uint16_t m_current_socclk;
uint16_t m_current_uclk;
uint16_t m_current_vclk0;
uint16_t m_current_dclk0;
uint16_t m_current_vclk1;
uint16_t m_current_dclk1;
// Throttle status
uint32_t m_throttle_status;
// Fans
uint16_t m_current_fan_speed;
// Link width/speed
uint16_t m_pcie_link_width;
uint16_t m_pcie_link_speed; // in 0.1 GT/s
uint16_t m_padding;
uint32_t m_gfx_activity_acc;
uint32_t m_mem_activity_acc;
uint16_t m_temperature_hbm[kRSMI_MAX_NUM_HBM_INSTANCES];
};
struct AMDGpuMetrics_v12_t : AMDGpuMetricsBase_t
{
~AMDGpuMetrics_v12_t() = default;
struct AMDGpuMetricsHeader_v1_t m_common_header;
// Temperature
uint16_t m_temperature_edge;
uint16_t m_temperature_hotspot;
uint16_t m_temperature_mem;
uint16_t m_temperature_vrgfx;
uint16_t m_temperature_vrsoc;
uint16_t m_temperature_vrmem;
// Utilization
uint16_t m_average_gfx_activity;
uint16_t m_average_umc_activity; // memory controller
uint16_t m_average_mm_activity; // UVD or VCN
// Power/Energy
uint16_t m_average_socket_power;
uint64_t m_energy_accumulator; // v1 mod. (32->64)
// Driver attached timestamp (in ns)
uint64_t m_system_clock_counter; // v1 mod. (moved from top of struct)
// Average clocks
uint16_t m_average_gfxclk_frequency;
uint16_t m_average_socclk_frequency;
uint16_t m_average_uclk_frequency;
uint16_t m_average_vclk0_frequency;
uint16_t m_average_dclk0_frequency;
uint16_t m_average_vclk1_frequency;
uint16_t m_average_dclk1_frequency;
// Current clocks
uint16_t m_current_gfxclk;
uint16_t m_current_socclk;
uint16_t m_current_uclk;
uint16_t m_current_vclk0;
uint16_t m_current_dclk0;
uint16_t m_current_vclk1;
uint16_t m_current_dclk1;
// Throttle status
uint32_t m_throttle_status;
// Fans
uint16_t m_current_fan_speed;
// Link width/speed
uint16_t m_pcie_link_width; // v1 mod.(8->16)
uint16_t m_pcie_link_speed; // in 0.1 GT/s; v1 mod. (8->16)
uint16_t m_padding; // new in v1
uint32_t m_gfx_activity_acc; // new in v1
uint32_t m_mem_activity_acc; // new in v1
uint16_t m_temperature_hbm[kRSMI_MAX_NUM_HBM_INSTANCES]; // new in v1
// PMFW attached timestamp (10ns resolution)
uint64_t m_firmware_timestamp;
};
struct AMDGpuMetrics_v13_t : AMDGpuMetricsBase_t
{
~AMDGpuMetrics_v13_t() = default;
struct AMDGpuMetricsHeader_v1_t m_common_header;
// Temperature
uint16_t m_temperature_edge;
uint16_t m_temperature_hotspot;
uint16_t m_temperature_mem;
uint16_t m_temperature_vrgfx;
uint16_t m_temperature_vrsoc;
uint16_t m_temperature_vrmem;
// Utilization
uint16_t m_average_gfx_activity;
uint16_t m_average_umc_activity; // memory controller
uint16_t m_average_mm_activity; // UVD or VCN
// Power/Energy
uint16_t m_average_socket_power;
uint64_t m_energy_accumulator; // v1 mod. (32->64)
// Driver attached timestamp (in ns)
uint64_t m_system_clock_counter; // v1 mod. (moved from top of struct)
// Average clocks
uint16_t m_average_gfxclk_frequency;
uint16_t m_average_socclk_frequency;
uint16_t m_average_uclk_frequency;
uint16_t m_average_vclk0_frequency;
uint16_t m_average_dclk0_frequency;
uint16_t m_average_vclk1_frequency;
uint16_t m_average_dclk1_frequency;
// Current clocks
uint16_t m_current_gfxclk;
uint16_t m_current_socclk;
uint16_t m_current_uclk;
uint16_t m_current_vclk0;
uint16_t m_current_dclk0;
uint16_t m_current_vclk1;
uint16_t m_current_dclk1;
// Throttle status
uint32_t m_throttle_status;
// Fans
uint16_t m_current_fan_speed;
// Link width/speed
uint16_t m_pcie_link_width; // v1 mod.(8->16)
uint16_t m_pcie_link_speed; // in 0.1 GT/s; v1 mod. (8->16)
uint16_t m_padding; // new in v1
uint32_t m_gfx_activity_acc; // new in v1
uint32_t m_mem_activity_acc; // new in v1
uint16_t m_temperature_hbm[kRSMI_MAX_NUM_HBM_INSTANCES]; // new in v1
// PMFW attached timestamp (10ns resolution)
uint64_t m_firmware_timestamp;
// Voltage (mV)
uint16_t m_voltage_soc;
uint16_t m_voltage_gfx;
uint16_t m_voltage_mem;
uint16_t m_padding1;
// Throttle status
uint64_t m_indep_throttle_status;
};
struct AMDGpuMetrics_v14_t : AMDGpuMetricsBase_t
{
~AMDGpuMetrics_v14_t() = default;
struct AMDGpuMetricsHeader_v1_t m_common_header;
// Temperature (Celsius). It will be zero (0) if unsupported.
uint16_t m_temperature_hotspot;
uint16_t m_temperature_mem;
uint16_t m_temperature_vrsoc;
// Power (Watts)
uint16_t m_curr_socket_power;
// Utilization (%)
uint16_t m_average_gfx_activity;
uint16_t m_average_umc_activity; // memory controller
uint16_t m_vcn_activity[kRSMI_MAX_NUM_VCN]; // VCN instances activity percent (encode/decode)
// Energy (15.259uJ (2^-16) units)
uint64_t m_energy_accumulator;
// Driver attached timestamp (in ns)
uint64_t m_system_clock_counter;
// Throttle status
uint32_t m_throttle_status;
// Clock Lock Status. Each bit corresponds to clock instance
uint32_t m_gfxclk_lock_status;
// Link width (number of lanes) and speed (in 0.1 GT/s)
uint16_t m_pcie_link_width;
uint16_t m_pcie_link_speed; // in 0.1 GT/s
// XGMI bus width and bitrate (in Gbps)
uint16_t m_xgmi_link_width;
uint16_t m_xgmi_link_speed;
// Utilization Accumulated (%)
uint32_t m_gfx_activity_acc;
uint32_t m_mem_activity_acc;
// PCIE accumulated bandwidth (GB/sec)
uint64_t m_pcie_bandwidth_acc;
// PCIE instantaneous bandwidth (GB/sec)
uint64_t m_pcie_bandwidth_inst;
// XGMI accumulated data transfer size(KiloBytes)
uint64_t m_xgmi_read_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
uint64_t m_xgmi_write_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
// PMFW attached timestamp (10ns resolution)
uint64_t m_firmware_timestamp;
// Current clocks (Mhz)
uint16_t m_current_gfxclk[kRSMI_MAX_NUM_GFX_CLKS];
uint16_t m_current_socclk[kRSMI_MAX_NUM_CLKS];
uint16_t m_current_vclk0[kRSMI_MAX_NUM_CLKS];
uint16_t m_current_dclk0[kRSMI_MAX_NUM_CLKS];
uint16_t m_current_uclk;
uint16_t m_padding;
};
using AMGpuMetricsLatest_t = AMDGpuMetrics_v14_t;
using GPUMetricTempHbm_t = decltype(AMDGpuMetrics_v13_t::m_temperature_hbm);
using GPUMetricTempHbmTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_HBM_INSTANCES>;
using GPUMetricVcnActivity_t = decltype(AMDGpuMetrics_v14_t::m_vcn_activity);
using GPUMetricVcnActivityTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_VCN>;
using GPUMetricXgmiReadDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_read_data_acc);
using GPUMetricXgmiWriteDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_write_data_acc);
using GPUMetricXgmiAccTbl_t = std::array<uint64_t, kRSMI_MAX_NUM_XGMI_LINKS>;
using GPUMetricCurrGfxClk_t = decltype(AMDGpuMetrics_v14_t::m_current_gfxclk);
using GPUMetricCurrGfxClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_GFX_CLKS>;
using GPUMetricCurrSocClk_t = decltype(AMDGpuMetrics_v14_t::m_current_socclk);
using GPUMetricCurrSocClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
using GPUMetricCurrVClk0_t = decltype(AMDGpuMetrics_v14_t::m_current_vclk0);
using GPUMetricCurrVClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
using GPUMetricCurrDClk0_t = decltype(AMDGpuMetrics_v14_t::m_current_dclk0);
using GPUMetricCurrDClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
/*
* When a new metric table is released, we have to update: *
1. Constants related to the new metrics added;
(ie: kRSMI_MAX_NUM_XGMI_LINKS)
2. Constants related to new version:
(ie: kRSMI_GPU_METRICS_API_CONTENT_MAJOR_VER_1)
(ie: kRSMI_GPU_METRICS_API_CONTENT_MINOR_VER_x)
(ie: kRSMI_LATEST_GPU_METRICS_API_CONTENT_MAJOR_VER)
(ie: kRSMI_LATEST_GPU_METRICS_API_CONTENT_MINOR_VER)
3. Check if still use the same existing header or if a new one is needed:
(ie: AMDGpuMetricsHeader_v1_t)
4. Create a new struct representing the new table format
(ie: AMDGpuMetrics_v13_t -> AMDGpuMetrics_v14_t)
5. AMGpuMetricsLatest_t -> Newest AMDGpuMetrics_v1x_t
6. AMDGpuMetricVersionFlags_t
(ie: AMDGpuMetricVersionFlags_t::kGpuMetricV14)
*/
using AMDGpuMetricTypeId_t = uint32_t;
using AMDGpuMetricTypeIdSeq_t = uint32_t;
using AMDGpuMetricVersionFlagId_t = uint32_t;
enum class AMDGpuMetricsClassId_t : AMDGpuMetricTypeId_t
{
kGpuMetricHeader = 0,
kGpuMetricTemperature,
kGpuMetricUtilization,
kGpuMetricPowerEnergy,
kGpuMetricSystemClockCounter,
kGpuMetricAverageClock,
kGpuMetricCurrentClock,
kGpuMetricThrottleStatus,
kGpuMetricGfxClkLockStatus,
kGpuMetricCurrentFanSpeed,
kGpuMetricLinkWidthSpeed,
kGpuMetricVoltage,
kGpuMetricTimestamp,
};
using AMDGpuMetricsClassIdTranslationTbl_t = std::map<AMDGpuMetricsClassId_t, std::string>;
enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
{
// kGpuMetricTemperature counters
kMetricTempEdge,
kMetricTempHotspot,
kMetricTempMem,
kMetricTempVrGfx,
kMetricTempVrSoc,
kMetricTempVrMem,
kMetricTempHbm,
// kGpuMetricUtilization counters
kMetricAvgGfxActivity,
kMetricAvgUmcActivity,
kMetricAvgMmActivity,
kMetricGfxActivityAccumulator,
kMetricMemActivityAccumulator,
kMetricVcnActivity,
// kGpuMetricAverageClock counters
kMetricAvgGfxClockFrequency,
kMetricAvgSocClockFrequency,
kMetricAvgUClockFrequency,
kMetricAvgVClock0Frequency,
kMetricAvgDClock0Frequency,
kMetricAvgVClock1Frequency,
kMetricAvgDClock1Frequency,
// kGpuMetricCurrentClock counters
kMetricCurrGfxClock,
kMetricCurrSocClock,
kMetricCurrUClock,
kMetricCurrVClock0,
kMetricCurrDClock0,
kMetricCurrVClock1,
kMetricCurrDClock1,
// kGpuMetricThrottleStatus counters
kMetricThrottleStatus,
kMetricIndepThrottleStatus,
// kGpuMetricGfxClkLockStatus counters
kMetricGfxClkLockStatus,
// kGpuMetricCurrentFanSpeed counters
kMetricCurrFanSpeed,
// kGpuMetricLinkWidthSpeed counters
kMetricPcieLinkWidth,
kMetricPcieLinkSpeed,
kMetricPcieBandwidthAccumulator,
kMetricPcieBandwidthInst,
kMetricXgmiLinkWidth,
kMetricXgmiLinkSpeed,
kMetricXgmiReadDataAccumulator,
kMetricXgmiWriteDataAccumulator,
// kGpuMetricPowerEnergy counters
kMetricAvgSocketPower,
kMetricCurrSocketPower,
kMetricEnergyAccumulator,
// kGpuMetricVoltage counters
kMetricVoltageSoc,
kMetricVoltageGfx,
kMetricVoltageMem,
// kGpuMetricTimestamp counters
kMetricTSClockCounter,
kMetricTSFirmware,
};
using AMDGpuMetricsUnitTypeTranslationTbl_t = std::map<AMDGpuMetricsUnitType_t, std::string>;
using AMDGpuMetricsDataTypeId_t = uint8_t;
enum class AMDGpuMetricsDataType_t : AMDGpuMetricsDataTypeId_t
{
kUInt8,
kUInt16,
kUInt32,
kUInt64,
};
struct AMDGpuDynamicMetricsValue_t
{
uint64_t m_value;
std::string m_info;
AMDGpuMetricsDataType_t m_original_type;
};
using AMDGpuDynamicMetricTblValues_t = std::vector<AMDGpuDynamicMetricsValue_t>;
using AMDGpuDynamicMetricsTbl_t = std::map<AMDGpuMetricsClassId_t, std::map<AMDGpuMetricsUnitType_t, AMDGpuDynamicMetricTblValues_t>>;
// Note: All supported metric versions are listed her
// If not here, they are not supported
enum class AMDGpuMetricVersionFlags_t : AMDGpuMetricVersionFlagId_t
{
kGpuMetricNone = 0x0,
kGpuMetricV10 = (0x1 << 0),
kGpuMetricV11 = (0x1 << 1),
kGpuMetricV12 = (0x1 << 2),
kGpuMetricV13 = (0x1 << 3),
kGpuMetricV14 = (0x1 << 4),
};
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint64_t, AMDGpuMetricVersionFlags_t>;
class GpuMetricsBase_t;
using GpuMetricsBasePtr = std::shared_ptr<GpuMetricsBase_t>;
class GpuMetricsBase_t
{
public:
virtual ~GpuMetricsBase_t() = default;
virtual size_t sizeof_metric_table() = 0;
virtual AMDGpuMetricsBaseRef get_metrics_table() = 0;
virtual AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() = 0;
virtual rsmi_status_t populate_metrics_dynamic_tbl() = 0;
virtual AMDGpuDynamicMetricsTbl_t get_metrics_dynamic_tbl() {
return m_metrics_dynamic_tbl;
}
protected:
AMDGpuDynamicMetricsTbl_t m_metrics_dynamic_tbl;
uint64_t m_metrics_timestamp;
};
using AMDGpuMetricFactories_t = std::map<AMDGpuMetricVersionFlags_t, GpuMetricsBasePtr>;
class GpuMetricsBase_v11_t final : public GpuMetricsBase_t
{
public:
~GpuMetricsBase_v11_t() = default;
size_t sizeof_metric_table() override {
return sizeof(AMDGpuMetrics_v11_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
{
return m_gpu_metrics_tbl;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
{
return AMDGpuMetricVersionFlags_t::kGpuMetricV11;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
private:
AMDGpuMetrics_v11_t m_gpu_metrics_tbl;
};
class GpuMetricsBase_v12_t final : public GpuMetricsBase_t
{
public:
~GpuMetricsBase_v12_t() = default;
size_t sizeof_metric_table() override {
return sizeof(AMDGpuMetrics_v12_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
{
return m_gpu_metrics_tbl;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
{
return AMDGpuMetricVersionFlags_t::kGpuMetricV12;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
private:
AMDGpuMetrics_v12_t m_gpu_metrics_tbl;
};
class GpuMetricsBase_v13_t final : public GpuMetricsBase_t
{
public:
~GpuMetricsBase_v13_t() = default;
size_t sizeof_metric_table() override {
return sizeof(AMDGpuMetrics_v13_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
{
return m_gpu_metrics_tbl;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
{
return AMDGpuMetricVersionFlags_t::kGpuMetricV13;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
private:
AMDGpuMetrics_v13_t m_gpu_metrics_tbl;
};
class GpuMetricsBase_v14_t final : public GpuMetricsBase_t
{
public:
~GpuMetricsBase_v14_t() = default;
size_t sizeof_metric_table() override {
return sizeof(AMDGpuMetrics_v14_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
{
return m_gpu_metrics_tbl;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
{
return AMDGpuMetricVersionFlags_t::kGpuMetricV14;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
private:
AMDGpuMetrics_v14_t m_gpu_metrics_tbl;
};
template<typename T>
rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind, AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
} // namespace amd::smi
#endif // ROCM_SMI_ROCM_SMI_GPU_METRICS_H_
+611 -2
Просмотреть файл
@@ -3517,7 +3517,6 @@ rsmi_dev_activity_avg_mm_get(uint32_t dv_ind, uint16_t* avg_activity) {
}
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
auto avg_mm_activity(uint16_t(0));
rsmi_activity_metric_counter_t activity_metric_counter;
status_code = rsmi_dev_activity_metric_get(dv_ind, rsmi_activity_metric_t::RSMI_ACTIVITY_MM, &activity_metric_counter);
avg_activity = &activity_metric_counter.average_mm_activity;
@@ -3535,7 +3534,6 @@ rsmi_dev_activity_avg_mm_get(uint32_t dv_ind, uint16_t* avg_activity) {
CATCH
}
rsmi_status_t
rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) {
TRY
@@ -5405,6 +5403,616 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) {
CATCH
}
//
// NOTE: APIs related to new 'GPU Metrics' related work are added here
// so they can be used/tested.
//
rsmi_status_t
rsmi_dev_temp_hotspot_get(uint32_t dv_ind, uint16_t* hotspot_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempHotspot);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *hotspot_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_temp_mem_get(uint32_t dv_ind, uint16_t* mem_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempMem);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *mem_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_temp_vrsoc_get(uint32_t dv_ind, uint16_t* vrsoc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempVrSoc);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *vrsoc_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_curr_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrSocketPower);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *socket_power_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_avg_gfx_activity_get(uint32_t dv_ind, uint16_t* gfx_activity_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgGfxActivity);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *gfx_activity_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_avg_umc_activity_get(uint32_t dv_ind, uint16_t* umc_activity_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgUmcActivity);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *umc_activity_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_energy_acc_get(uint32_t dv_ind, uint64_t* energy_acc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricEnergyAccumulator);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *energy_acc_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_system_clock_counter_get(uint32_t dv_ind, uint64_t* system_clock_counter_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTSClockCounter);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *system_clock_counter_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_firmware_timestamp_get(uint32_t dv_ind, uint64_t* firmware_timestamp_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTSFirmware);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *firmware_timestamp_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_throttle_status_get(uint32_t dv_ind, uint32_t* throttle_status_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricThrottleStatus);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *throttle_status_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_pcie_link_width_get(uint32_t dv_ind, uint16_t* pcie_link_width_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieLinkWidth);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_link_width_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_pcie_link_speed_get(uint32_t dv_ind, uint16_t* pcie_link_speed_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieLinkSpeed);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_link_speed_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_xgmi_link_width_get(uint32_t dv_ind, uint16_t* xgmi_link_width_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiLinkWidth);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *xgmi_link_width_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_xgmi_link_speed_get(uint32_t dv_ind, uint16_t* xgmi_link_speed_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiLinkSpeed);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *xgmi_link_speed_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_gfxclk_lock_status_get(uint32_t dv_ind, uint32_t* gfxclk_lock_status_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricGfxClkLockStatus);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *gfxclk_lock_status_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_gfx_activity_acc_get(uint32_t dv_ind, uint32_t* gfx_activity_acc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricGfxActivityAccumulator);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *gfx_activity_acc_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_mem_activity_acc_get(uint32_t dv_ind, uint32_t* mem_activity_acc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricMemActivityAccumulator);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *mem_activity_acc_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_pcie_bandwidth_acc_get(uint32_t dv_ind, uint64_t* pcie_bandwidth_acc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieBandwidthAccumulator);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_bandwidth_acc_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_pcie_bandwidth_inst_get(uint32_t dv_ind, uint64_t* pcie_bandwidth_inst_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieBandwidthInst);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_bandwidth_inst_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_curr_uclk_get(uint32_t dv_ind, uint16_t* uclk_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrUClock);
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *uclk_value);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_vcn_activity_get(uint32_t dv_ind, GPUMetricVcnActivityTbl_t* vcn_activity_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricVcnActivityTbl_t tmp_vcn_activity_value;
*vcn_activity_value = tmp_vcn_activity_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_xgmi_read_data_get(uint32_t dv_ind, GPUMetricXgmiAccTbl_t* xgmi_read_data_acc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricXgmiAccTbl_t tmp_xgmi_read_data_acc_value;
*xgmi_read_data_acc_value = tmp_xgmi_read_data_acc_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_xgmi_write_data_get(uint32_t dv_ind, GPUMetricXgmiAccTbl_t* xgmi_write_data_acc_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricXgmiAccTbl_t tmp_xgmi_write_data_acc_value;
*xgmi_write_data_acc_value = tmp_xgmi_write_data_acc_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_curr_gfxclk_get(uint32_t dv_ind, GPUMetricCurrGfxClkTbl_t* current_gfxclk_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricCurrGfxClkTbl_t tmp_current_gfxclk_value;
*current_gfxclk_value = tmp_current_gfxclk_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_curr_socclk_get(uint32_t dv_ind, GPUMetricCurrSocClkTbl_t* current_socclk_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricCurrSocClkTbl_t tmp_current_socclk_value;
*current_socclk_value = tmp_current_socclk_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_curr_vclk0_get(uint32_t dv_ind, GPUMetricCurrVClkTbl_t* current_vclk_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricCurrVClkTbl_t tmp_current_vclk_value;
*current_vclk_value = tmp_current_vclk_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
rsmi_status_t
rsmi_dev_curr_vdlk0_get(uint32_t dv_ind, GPUMetricCurrDClkTbl_t* current_dclk_value)
{
TRY
std::ostringstream ostrstream;
ostrstream << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ostrstream);
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity);
GPUMetricCurrDClkTbl_t tmp_current_dclk_value;
*current_dclk_value = tmp_current_dclk_value;
auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS);
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
LOG_INFO(ostrstream);
return status_code;
CATCH
}
//
// End of: new GPU Metrics related work.
//
// UNDOCUMENTED FUNCTIONS
// This functions are not declared in rocm_smi.h. They are either not fully
// supported, or to be used for test purposes.
@@ -5439,3 +6047,4 @@ rsmi_test_refcount(uint64_t refcnt_type) {
return static_cast<int32_t>(smi.ref_count());
}
Разница между файлами не показана из-за своего большого размера Загрузить разницу