Merge rocmsmi/amd-staging into amd-dev 20231121

Signed-off-by: Maisam Arif <maisarif@amd.com>
Change-Id: I5cc6accced971479583954e0b93cd90c510ca814
Signed-off-by: Maisam Arif <maisarif@amd.com>


[ROCm/amdsmi commit: 02d310e525]
Этот коммит содержится в:
Maisam Arif
2023-11-13 04:10:18 -06:00
родитель b2785f6b7b
Коммит 662eaa6ad3
8 изменённых файлов: 4576 добавлений и 947 удалений
+342 -41
Просмотреть файл
@@ -47,10 +47,12 @@
#include <stdint.h>
#include <unistd.h>
#include <vector>
#include <iostream>
#include <algorithm>
#include <bitset>
#include <iostream>
#include <map>
#include <vector>
#include <type_traits>
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_utils.h"
@@ -166,9 +168,9 @@ void print_function_header_with_rsmi_ret(
}
static void print_test_header(const char *str, uint32_t dv_ind) {
std::cout << "********************************" << "\n";
std::cout << "******************************************" << "\n";
std::cout << "*** " << str << "\n";
std::cout << "********************************" << "\n";
std::cout << "******************************************" << "\n";
std::cout << "Device index: " << dv_ind << "\n";
}
@@ -728,6 +730,40 @@ template<typename T> constexpr float convert_mw_to_w(T mw) {
return static_cast<float>(mw / 1000.0);
}
template <typename T>
auto print_error_or_value(rsmi_status_t status_code, const T& metric) {
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
if constexpr (std::is_array_v<T>) {
auto idx = uint16_t(0);
auto str_values = std::string();
const auto num_elems = static_cast<uint16_t>(std::end(metric) - std::begin(metric));
str_values = ("\n\t\t num of values: " + std::to_string(num_elems) + "\n");
for (const auto& el : metric) {
str_values += "\t\t [" + std::to_string(idx) + "]: " + std::to_string(el) + "\n";
++idx;
}
return str_values;
}
else if constexpr ((std::is_same_v<T, std::uint16_t>) ||
(std::is_same_v<T, std::uint32_t>) ||
(std::is_same_v<T, std::uint64_t>)) {
return std::to_string(metric);
}
}
else {
return ("\n\t\tStatus: [" + std::to_string(status_code) + "] " + "-> " + amd::smi::getRSMIStatusString(status_code));
}
};
template <typename T>
std::string print_unsigned_int(T value) {
std::stringstream ss;
ss << static_cast<uint64_t>(value | 0);
return ss.str();
}
int main() {
rsmi_status_t ret;
@@ -742,7 +778,7 @@ int main() {
rsmi_dev_perf_level_t pfl;
rsmi_frequencies_t f;
uint32_t num_monitor_devs = 0;
rsmi_gpu_metrics_t p;
rsmi_gpu_metrics_t gpu_metrics;
std::string val_str;
RSMI_POWER_TYPE power_type = RSMI_INVALID_POWER;
@@ -809,43 +845,308 @@ int main() {
std::cout << "Not Supported\n";
}
ret = rsmi_dev_gpu_metrics_info_get(i, &p);
print_test_header("GPU METRICS", i);
print_function_header_with_rsmi_ret(ret,
"rsmi_dev_gpu_metrics_info_get(" + std::to_string(i) + ", &p)");
std::cout << "\t**p.common_header.content_revision: " << std::dec
<< p.common_header.content_revision << "\n";
std::cout << "\t**p.common_header.format_revision: " << std::dec
<< p.common_header.format_revision << "\n";
std::cout << "\t**p.average_gfxclk_frequency: " << std::dec
<< p.average_gfxclk_frequency << "\n";
std::cout << "\t**p.average_socclk_frequency: " << std::dec
<< p.average_socclk_frequency << "\n";
std::cout << "\t**p.average_uclk_frequency: " << std::dec
<< p.average_uclk_frequency << "\n";
std::cout << "\t**p.average_vclk0_frequency: " << std::dec
<< p.average_vclk0_frequency << "\n";
std::cout << "\t**p.average_dclk0_frequency: " << std::dec
<< p.average_dclk0_frequency << "\n";
std::cout << "\t**p.average_vclk1_frequency: " << std::dec
<< p.average_vclk1_frequency << "\n";
std::cout << "\t**p.average_dclk1_frequency: " << std::dec
<< p.average_dclk1_frequency << "\n";
//
std::cout << "\n";
print_test_header("GPU METRICS: Using static struct (Backwards Compatibility) ", i);
print_function_header_with_rsmi_ret(ret, "rsmi_dev_gpu_metrics_info_get(" + std::to_string(i) + ", &gpu_metrics)");
rsmi_dev_gpu_metrics_info_get(i, &gpu_metrics);
std::cout << "\t**.common_header.format_revision : "
<< print_unsigned_int(gpu_metrics.common_header.format_revision) << "\n";
std::cout << "\t**.common_header.content_revision : "
<< print_unsigned_int(gpu_metrics.common_header.content_revision) << "\n";
std::cout << "\t**.temperature_edge : " << std::dec
<< gpu_metrics.temperature_edge << "\n";
std::cout << "\t**.temperature_hotspot : " << std::dec
<< gpu_metrics.temperature_hotspot << "\n";
std::cout << "\t**.temperature_mem : " << std::dec
<< gpu_metrics.temperature_mem << "\n";
std::cout << "\t**.temperature_vrgfx : " << std::dec
<< gpu_metrics.temperature_vrgfx << "\n";
std::cout << "\t**.temperature_vrsoc : " << std::dec
<< gpu_metrics.temperature_vrsoc << "\n";
std::cout << "\t**.temperature_vrmem : " << std::dec
<< gpu_metrics.temperature_vrmem << "\n";
std::cout << "\t**.average_gfx_activity : " << std::dec
<< gpu_metrics.average_gfx_activity << "\n";
std::cout << "\t**.average_umc_activity : " << std::dec
<< gpu_metrics.average_umc_activity << "\n";
std::cout << "\t**.average_mm_activity : " << std::dec
<< gpu_metrics.average_mm_activity << "\n";
std::cout << "\t**.average_socket_power : " << std::dec
<< gpu_metrics.average_socket_power << "\n";
std::cout << "\t**.energy_accumulator : " << std::dec
<< gpu_metrics.energy_accumulator << "\n";
std::cout << "\t**.system_clock_counter : " << std::dec
<< gpu_metrics.system_clock_counter << "\n";
std::cout << "\t**.average_gfxclk_frequency : " << std::dec
<< gpu_metrics.average_gfxclk_frequency << "\n";
std::cout << "\t**.average_socclk_frequency : " << std::dec
<< gpu_metrics.average_socclk_frequency << "\n";
std::cout << "\t**.average_uclk_frequency : " << std::dec
<< gpu_metrics.average_uclk_frequency << "\n";
std::cout << "\t**.average_vclk0_frequency : " << std::dec
<< gpu_metrics.average_vclk0_frequency<< "\n";
std::cout << "\t**.average_dclk0_frequency : " << std::dec
<< gpu_metrics.average_dclk0_frequency << "\n";
std::cout << "\t**.average_vclk1_frequency : " << std::dec
<< gpu_metrics.average_vclk1_frequency << "\n";
std::cout << "\t**.average_dclk1_frequency : " << std::dec
<< gpu_metrics.average_dclk1_frequency << "\n";
std::cout << "\t**.current_gfxclk : " << std::dec
<< gpu_metrics.current_gfxclk << "\n";
std::cout << "\t**.current_socclk : " << std::dec
<< gpu_metrics.current_socclk << "\n";
std::cout << "\t**.current_uclk : " << std::dec
<< gpu_metrics.current_uclk << "\n";
std::cout << "\t**.current_vclk0 : " << std::dec
<< gpu_metrics.current_vclk0 << "\n";
std::cout << "\t**.current_dclk0 : " << std::dec
<< gpu_metrics.current_dclk0 << "\n";
std::cout << "\t**.current_vclk1 : " << std::dec
<< gpu_metrics.current_vclk1 << "\n";
std::cout << "\t**.current_dclk1 : " << std::dec
<< gpu_metrics.current_dclk1 << "\n";
std::cout << "\t**.throttle_status : " << std::dec
<< gpu_metrics.throttle_status << "\n";
std::cout << "\t**.current_fan_speed : " << std::dec
<< gpu_metrics.current_fan_speed << "\n";
std::cout << "\t**.pcie_link_width : " << std::dec
<< gpu_metrics.pcie_link_width << "\n";
std::cout << "\t**.pcie_link_speed : " << std::dec
<< gpu_metrics.pcie_link_speed << "\n";
std::cout << "\t**.gfx_activity_acc : " << std::dec
<< gpu_metrics.gfx_activity_acc << "\n";
std::cout << "\t**.mem_activity_acc : " << std::dec
<< gpu_metrics.mem_activity_acc << "\n";
std::cout << "\t**.firmware_timestamp : " << std::dec
<< gpu_metrics.firmware_timestamp << "\n";
std::cout << "\t**.voltage_soc : " << std::dec
<< gpu_metrics.voltage_soc << "\n";
std::cout << "\t**.voltage_gfx : " << std::dec
<< gpu_metrics.voltage_gfx << "\n";
std::cout << "\t**.voltage_mem : " << std::dec
<< gpu_metrics.voltage_mem << "\n";
std::cout << "\t**.indep_throttle_status : " << std::dec
<< gpu_metrics.indep_throttle_status << "\n";
std::cout << "\t**.current_socket_power : " << std::dec
<< gpu_metrics.current_socket_power << "\n";
std::cout << "\t**.gfxclk_lock_status : " << std::dec
<< gpu_metrics.gfxclk_lock_status << "\n";
std::cout << "\t**.xgmi_link_width : " << std::dec
<< gpu_metrics.xgmi_link_width << "\n";
std::cout << "\t**.xgmi_link_speed : " << std::dec
<< gpu_metrics.xgmi_link_speed << "\n";
std::cout << "\t**.pcie_bandwidth_acc : " << std::dec
<< gpu_metrics.pcie_bandwidth_acc << "\n";
std::cout << "\t**.pcie_bandwidth_inst : " << std::dec
<< gpu_metrics.pcie_bandwidth_inst << "\n";
std::cout << "\t**.pcie_l0_to_recov_count_acc : " << std::dec
<< gpu_metrics.pcie_l0_to_recov_count_acc << "\n";
std::cout << "\t**.pcie_replay_count_acc : " << std::dec
<< gpu_metrics.pcie_replay_count_acc << "\n";
std::cout << "\t**.pcie_replay_rover_count_acc : " << std::dec
<< gpu_metrics.pcie_replay_rover_count_acc << "\n";
std::cout << "\t**.temperature_hbm[] : " << std::dec << "\n";
for (const auto& temp : gpu_metrics.temperature_hbm) {
std::cout << "\t -> " << std::dec << temp << "\n";
}
std::cout << "\t**.vcn_activity[] : " << std::dec << "\n";
for (const auto& vcn : gpu_metrics.vcn_activity) {
std::cout << "\t -> " << std::dec << vcn << "\n";
}
std::cout << "\t**.xgmi_read_data_acc[] : " << std::dec << "\n";
for (const auto& read_data : gpu_metrics.xgmi_read_data_acc) {
std::cout << "\t -> " << std::dec << read_data << "\n";
}
std::cout << "\t**.xgmi_write_data_acc[] : " << std::dec << "\n";
for (const auto& write_data : gpu_metrics.xgmi_write_data_acc) {
std::cout << "\t -> " << std::dec << write_data << "\n";
}
std::cout << "\t**.current_gfxclks[] : " << std::dec << "\n";
for (const auto& gfxclk : gpu_metrics.current_gfxclks) {
std::cout << "\t -> " << std::dec << gfxclk << "\n";
}
std::cout << "\t**.current_socclks[] : " << std::dec << "\n";
for (const auto& socclk : gpu_metrics.current_socclks) {
std::cout << "\t -> " << std::dec << socclk << "\n";
}
std::cout << "\t**.current_vclk0s[] : " << std::dec << "\n";
for (const auto& vclk : gpu_metrics.current_vclk0s) {
std::cout << "\t -> " << std::dec << vclk << "\n";
}
std::cout << "\t**.current_dclk0s[] : " << std::dec << "\n";
for (const auto& dclk : gpu_metrics.current_dclk0s) {
std::cout << "\t -> " << std::dec << dclk << "\n";
}
std::cout << " ** Note: Values MAX'ed out (UINTX MAX are unsupported for the version in question) ** " << "\n";
std::cout << "\n\n";
print_test_header("GPU METRICS: Using direct APIs (newer)", i);
metrics_table_header_t header_values;
GPUMetricTempHbm_t hbm_values;
GPUMetricVcnActivity_t vcn_values;
GPUMetricXgmiReadDataAcc_t xgmi_read_values;
GPUMetricXgmiWriteDataAcc_t xgmi_write_values;
GPUMetricCurrGfxClk_t curr_gfxclk_values;
GPUMetricCurrSocClk_t curr_socclk_values;
GPUMetricCurrVClk0_t curr_vclk0_values;
GPUMetricCurrDClk0_t curr_dclk0_values;
ret = rsmi_dev_metrics_header_info_get(i, &header_values);
std::cout << "\t[Metrics Header]" << "\n";
std::cout << "\t -> format_revision : " << print_unsigned_int(header_values.format_revision) << "\n";
std::cout << "\t -> content_revision : " << print_unsigned_int(header_values.content_revision) << "\n";
std::cout << "\t--------------------" << "\n";
std::cout << "\n";
std::cout << "\t[Temperature]" << "\n";
ret = rsmi_dev_metrics_temp_edge_get(i, &val_ui16);
std::cout << "\t -> temp_edge(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_temp_hotspot_get(i, &val_ui16);
std::cout << "\t -> temp_hotspot(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_temp_mem_get(i, &val_ui16);
std::cout << "\t -> temp_mem(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_temp_vrgfx_get(i, &val_ui16);
std::cout << "\t -> temp_vrgfx(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_temp_vrsoc_get(i, &val_ui16);
std::cout << "\t -> temp_vrsoc(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_temp_vrmem_get(i, &val_ui16);
std::cout << "\t -> temp_vrmem(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_temp_hbm_get(i, &hbm_values);
std::cout << "\t -> temp_hbm(): " << print_error_or_value(ret, hbm_values) << "\n";
std::cout << "\n";
std::cout << "\t[Power/Energy]" << "\n";
ret = rsmi_dev_metrics_curr_socket_power_get(i, &val_ui16);
std::cout << "\t -> current_socket_power(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_energy_acc_get(i, &val_ui64);
std::cout << "\t -> energy_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_avg_socket_power_get(i, &val_ui16);
std::cout << "\t -> average_socket_power(): " << print_error_or_value(ret, val_ui16) << "\n";
std::cout << "\n";
std::cout << "\t[Utilization]" << "\n";
ret = rsmi_dev_metrics_avg_gfx_activity_get(i, &val_ui16);
std::cout << "\t -> average_gfx_activity(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_umc_activity_get(i, &val_ui16);
std::cout << "\t -> average_umc_activity(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_mm_activity_get(i, &val_ui16);
std::cout << "\t -> average_mm_activity(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_vcn_activity_get(i, &vcn_values);
std::cout << "\t -> vcn_activity(): " << print_error_or_value(ret, vcn_values) << "\n";
ret = rsmi_dev_metrics_mem_activity_acc_get(i, &val_ui32);
std::cout << "\t -> mem_activity_accum(): " << print_error_or_value(ret, val_ui32) << "\n";
ret = rsmi_dev_metrics_gfx_activity_acc_get(i, &val_ui32);
std::cout << "\t -> gfx_activity_accum(): " << print_error_or_value(ret, val_ui32) << "\n";
std::cout << "\n";
std::cout << "\t[Average Clock]" << "\n";
ret = rsmi_dev_metrics_avg_gfx_clock_frequency_get(i, &val_ui16);
std::cout << "\t -> average_gfx_clock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_soc_clock_frequency_get(i, &val_ui16);
std::cout << "\t -> average_soc_clock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_uclock_frequency_get(i, &val_ui16);
std::cout << "\t -> average_uclock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_vclock0_frequency_get(i, &val_ui16);
std::cout << "\t -> average_vclock0_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_dclock0_frequency_get(i, &val_ui16);
std::cout << "\t -> average_dclock0_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_vclock1_frequency_get(i, &val_ui16);
std::cout << "\t -> average_vclock1_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_avg_dclock1_frequency_get(i, &val_ui16);
std::cout << "\t -> average_dclock1_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
std::cout << "\n";
std::cout << "\t[Current Clock]" << "\n";
ret = rsmi_dev_metrics_curr_vclk1_get(i, &val_ui16);
std::cout << "\t -> current_vclock1(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_curr_dclk1_get(i, &val_ui16);
std::cout << "\t -> current_dclock1(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_curr_uclk_get(i, &val_ui16);
std::cout << "\t -> current_uclock(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_curr_dclk0_get(i, &curr_dclk0_values);
std::cout << "\t -> current_dclk0(): " << print_error_or_value(ret, curr_dclk0_values) << "\n";
ret = rsmi_dev_metrics_curr_gfxclk_get(i, &curr_gfxclk_values);
std::cout << "\t -> current_gfxclk(): " << print_error_or_value(ret, curr_gfxclk_values) << "\n";
ret = rsmi_dev_metrics_curr_socclk_get(i, &curr_socclk_values);
std::cout << "\t -> current_soc_clock(): " << print_error_or_value(ret, curr_socclk_values) << "\n";
ret = rsmi_dev_metrics_curr_vclk0_get(i, &curr_vclk0_values);
std::cout << "\t -> current_vclk0(): " << print_error_or_value(ret, curr_vclk0_values) << "\n";
std::cout << "\n";
std::cout << "\t[Throttle]" << "\n";
ret = rsmi_dev_metrics_indep_throttle_status_get(i, &val_ui64);
std::cout << "\t -> indep_throttle_status(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_throttle_status_get(i, &val_ui32);
std::cout << "\t -> throttle_status(): " << print_error_or_value(ret, val_ui32) << "\n";
std::cout << "\n";
std::cout << "\t[Gfx Clock Lock]" << "\n";
ret = rsmi_dev_metrics_gfxclk_lock_status_get(i, &val_ui32);
std::cout << "\t -> gfxclk_lock_status(): " << print_error_or_value(ret, val_ui32) << "\n";
std::cout << "\n";
std::cout << "\t[Current Fan Speed]" << "\n";
ret = rsmi_dev_metrics_curr_fan_speed_get(i, &val_ui16);
std::cout << "\t -> current_fan_speed(): " << print_error_or_value(ret, val_ui16) << "\n";
std::cout << "\n";
std::cout << "\t[Link/Bandwidth/Speed]" << "\n";
ret = rsmi_dev_metrics_pcie_link_width_get(i, &val_ui16);
std::cout << "\t -> pcie_link_width(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_pcie_link_speed_get(i, &val_ui16);
std::cout << "\t -> pcie_link_speed(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_pcie_bandwidth_acc_get(i, &val_ui64);
std::cout << "\t -> pcie_bandwidth_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_pcie_bandwidth_inst_get(i, &val_ui64);
std::cout << "\t -> pcie_bandwidth_inst(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_pcie_l0_recov_count_acc_get(i, &val_ui64);
std::cout << "\t -> pcie_l0_recov_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_pcie_replay_count_acc_get(i, &val_ui64);
std::cout << "\t -> pcie_replay_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_pcie_replay_rover_count_acc_get(i, &val_ui64);
std::cout << "\t -> pcie_replay_rollover_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_xgmi_link_width_get(i, &val_ui16);
std::cout << "\t -> xgmi_link_width(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_xgmi_link_speed_get(i, &val_ui16);
std::cout << "\t -> xgmi_link_speed(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_xgmi_read_data_get(i, &xgmi_read_values);
std::cout << "\t -> xgmi_read_data(): " << print_error_or_value(ret, xgmi_read_values) << "\n";
ret = rsmi_dev_metrics_xgmi_write_data_get(i, &xgmi_write_values);
std::cout << "\t -> xgmi_write_data(): " << print_error_or_value(ret, xgmi_write_values) << "\n";
std::cout << "\n";
std::cout << "\t[Voltage]" << "\n";
ret = rsmi_dev_metrics_volt_soc_get(i, &val_ui16);
std::cout << "\t -> voltage_soc(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_volt_gfx_get(i, &val_ui16);
std::cout << "\t -> voltage_gfx(): " << print_error_or_value(ret, val_ui16) << "\n";
ret = rsmi_dev_metrics_volt_mem_get(i, &val_ui16);
std::cout << "\t -> voltage_mem(): " << print_error_or_value(ret, val_ui16) << "\n";
std::cout << "\n";
std::cout << "\t[Timestamp]" << "\n";
ret = rsmi_dev_metrics_system_clock_counter_get(i, &val_ui64);
std::cout << "\t -> system_clock_counter(): " << print_error_or_value(ret, val_ui64) << "\n";
ret = rsmi_dev_metrics_firmware_timestamp_get(i, &val_ui64);
std::cout << "\t -> firmware_timestamp(): " << print_error_or_value(ret, val_ui64) << "\n";
std::cout << "\n";
std::cout << "\t[XCD CounterVoltage]" << "\n";
ret = rsmi_dev_metrics_xcd_counter_get(i, &val_ui16);
std::cout << "\t -> xcd_counter(): " << print_error_or_value(ret, val_ui16) << "\n";
std::cout << "\n\n";
std::cout << "\t**p.current_gfxclk: " << std::dec
<< p.current_gfxclk << "\n";
std::cout << "\t**p.current_socclk: " << std::dec
<< p.current_socclk << "\n";
std::cout << "\t**p.current_uclk: " << std::dec
<< p.current_uclk << "\n";
std::cout << "\t**p.current_vclk0: " << std::dec
<< p.current_vclk0 << "\n";
std::cout << "\t**p.current_dclk0: " << std::dec
<< p.current_dclk0 << "\n";
std::cout << "\t**p.current_vclk1: " << std::dec
<< p.current_vclk1 << "\n";
std::cout << "\t**p.current_dclk1: " << std::dec
<< p.current_dclk1 << "\n";
ret = rsmi_dev_perf_level_get(i, &pfl);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+5 -9
Просмотреть файл
@@ -53,6 +53,7 @@
#include <unordered_set>
#include <map>
#include <type_traits>
#include <optional>
#include "rocm_smi/rocm_smi_monitor.h"
#include "rocm_smi/rocm_smi_power_mon.h"
@@ -234,7 +235,7 @@ class Device {
void set_evt_notif_anon_fd(uint32_t fd) {
evt_notif_anon_fd_ = static_cast<int>(fd);}
int evt_notif_anon_fd(void) const {return evt_notif_anon_fd_;}
metrics_table_header_t &gpu_metrics_ver(void) {return gpu_metrics_ver_;}
void fillSupportedFuncs(void);
void DumpSupportedFunctions(void);
bool DeviceAPISupported(std::string name, uint64_t variant,
@@ -244,18 +245,15 @@ class Device {
template <typename T> std::string readBootPartitionState(uint32_t dv_ind);
rsmi_status_t check_amdgpu_property_reinforcement_query(uint32_t dev_idx, AMDGpuVerbTypes_t verb_type);
void dev_set_gpu_metric(GpuMetricsBasePtr gpu_metrics_ptr) { m_gpu_metrics_ptr = gpu_metrics_ptr; };
void dev_set_gpu_metric(GpuMetricsBasePtr gpu_metrics_ptr) { m_gpu_metrics_ptr = std::move(gpu_metrics_ptr); };
GpuMetricsBasePtr& dev_get_gpu_metric() { return m_gpu_metrics_ptr; };
const AMDGpuMetricsHeader_v1_t& dev_get_metrics_header() {return m_gpu_metrics_header; }
rsmi_status_t setup_gpu_metrics_reading();
rsmi_status_t dev_read_gpu_metrics_header_data();
rsmi_status_t dev_read_gpu_metrics_all_data();
rsmi_status_t dev_log_gpu_metrics();
rsmi_status_t run_internal_gpu_metrics_query(AMDGpuMetricsUnitType_t metric_counter, AMDGpuDynamicMetricTblValues_t& values);
template<typename T>
rsmi_status_t dev_run_gpu_metrics_query(AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
rsmi_status_t dev_log_gpu_metrics(std::ostringstream& outstream_metrics);
AMGpuMetricsPublicLatestTupl_t dev_copy_internal_to_external_metrics();
private:
std::shared_ptr<Monitor> monitor_;
@@ -290,8 +288,6 @@ class Device {
int evt_notif_anon_fd_;
FILE *evt_notif_anon_file_ptr_;
struct metrics_table_header_t gpu_metrics_ver_;
GpuMetricsBasePtr m_gpu_metrics_ptr;
AMDGpuMetricsHeader_v1_t m_gpu_metrics_header;
uint64_t m_gpu_metrics_updated_timestamp;
+172 -59
Просмотреть файл
@@ -47,10 +47,16 @@
#include "rocm_smi/rocm_smi_common.h"
#include "rocm_smi/rocm_smi.h"
#include <array>
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <map>
#include <memory>
#include <type_traits>
#include <tuple>
#include <variant>
#include <vector>
@@ -96,15 +102,14 @@ struct AMDGpuMetricsHeader_v1_t
uint8_t m_content_revision;
};
struct AMDGpuMetricsBase_t;
using AMDGpuMetricsBaseRef = AMDGpuMetricsBase_t&;
struct AMDGpuMetricsBase_t
{
virtual ~AMDGpuMetricsBase_t() = default;
};
using AMDGpuMetricsBaseRef = AMDGpuMetricsBase_t&;
struct AMDGpuMetrics_v11_t : AMDGpuMetricsBase_t
struct AMDGpuMetrics_v11_t
{
~AMDGpuMetrics_v11_t() = default;
@@ -166,7 +171,7 @@ struct AMDGpuMetrics_v11_t : AMDGpuMetricsBase_t
uint16_t m_temperature_hbm[kRSMI_MAX_NUM_HBM_INSTANCES];
};
struct AMDGpuMetrics_v12_t : AMDGpuMetricsBase_t
struct AMDGpuMetrics_v12_t
{
~AMDGpuMetrics_v12_t() = default;
@@ -226,11 +231,11 @@ struct AMDGpuMetrics_v12_t : AMDGpuMetricsBase_t
uint32_t m_mem_activity_acc; // new in v1
uint16_t m_temperature_hbm[kRSMI_MAX_NUM_HBM_INSTANCES]; // new in v1
// PMFW attached timestamp (10ns resolution)
// PMFW attached timestamp (10ns resolution)
uint64_t m_firmware_timestamp;
};
struct AMDGpuMetrics_v13_t : AMDGpuMetricsBase_t
struct AMDGpuMetrics_v13_t
{
~AMDGpuMetrics_v13_t() = default;
@@ -304,7 +309,7 @@ struct AMDGpuMetrics_v13_t : AMDGpuMetricsBase_t
uint64_t m_indep_throttle_status;
};
struct AMDGpuMetrics_v14_t : AMDGpuMetricsBase_t
struct AMDGpuMetrics_v14_t
{
~AMDGpuMetrics_v14_t() = default;
@@ -316,7 +321,7 @@ struct AMDGpuMetrics_v14_t : AMDGpuMetricsBase_t
uint16_t m_temperature_vrsoc;
// Power (Watts)
uint16_t m_curr_socket_power;
uint16_t m_current_socket_power;
// Utilization (%)
uint16_t m_average_gfx_activity;
@@ -340,8 +345,8 @@ struct AMDGpuMetrics_v14_t : AMDGpuMetricsBase_t
uint16_t m_pcie_link_speed; // in 0.1 GT/s
// XGMI bus width and bitrate (in Gbps)
uint16_t m_xgmi_link_width;
uint16_t m_xgmi_link_speed;
uint16_t m_xgmi_link_width;
uint16_t m_xgmi_link_speed;
// Utilization Accumulated (%)
uint32_t m_gfx_activity_acc;
@@ -353,6 +358,15 @@ struct AMDGpuMetrics_v14_t : AMDGpuMetricsBase_t
// PCIE instantaneous bandwidth (GB/sec)
uint64_t m_pcie_bandwidth_inst;
// PCIE L0 to recovery state transition accumulated count
uint64_t m_pcie_l0_to_recov_count_acc;
// PCIE replay accumulated count
uint64_t m_pcie_replay_count_acc;
// PCIE replay rollover accumulated count
uint64_t m_pcie_replay_rover_count_acc;
// XGMI accumulated data transfer size(KiloBytes)
uint64_t m_xgmi_read_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
uint64_t m_xgmi_write_data_acc[kRSMI_MAX_NUM_XGMI_LINKS];
@@ -371,33 +385,52 @@ struct AMDGpuMetrics_v14_t : AMDGpuMetricsBase_t
};
using AMGpuMetricsLatest_t = AMDGpuMetrics_v14_t;
/**
* This is GPU Metrics version that gets to public access.
* It is a unique/unified version (joined) of the previous
* versions (1.2 to latest 1.4). Data fields not used/relevant
* for the current driver version and GPU metrics version will
* not be populated, and therefore 0s (zeroes).
*
* If/in case anything new is added to a new version and there is
* a requirement to make it publicly available, into a single static
* table/form/struct, then it should be added here.
*
*/
using AMGpuMetricsPublicLatest_t = rsmi_gpu_metrics_t;
using AMGpuMetricsPublicLatestTupl_t = std::tuple<rsmi_status_t, AMGpuMetricsPublicLatest_t>;
using GpuMetricU16Tbl_t = std::vector<uint16_t>;
using GpuMetricU32Tbl_t = std::vector<uint32_t>;
using GpuMetricU64Tbl_t = std::vector<uint64_t>;
using GPUMetricTempHbm_t = decltype(AMDGpuMetrics_v13_t::m_temperature_hbm);
using GPUMetricTempHbmTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_HBM_INSTANCES>;
using GPUMetricTempHbmTbl_t = GpuMetricU16Tbl_t;
using GPUMetricVcnActivity_t = decltype(AMDGpuMetrics_v14_t::m_vcn_activity);
using GPUMetricVcnActivityTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_VCN>;
using GPUMetricVcnActivityTbl_t = GpuMetricU16Tbl_t;
using GPUMetricXgmiReadDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_read_data_acc);
using GPUMetricXgmiWriteDataAcc_t = decltype(AMDGpuMetrics_v14_t::m_xgmi_write_data_acc);
using GPUMetricXgmiAccTbl_t = std::array<uint64_t, kRSMI_MAX_NUM_XGMI_LINKS>;
using GPUMetricXgmiAccTbl_t = GpuMetricU64Tbl_t;
using GPUMetricCurrGfxClk_t = decltype(AMDGpuMetrics_v14_t::m_current_gfxclk);
using GPUMetricCurrGfxClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_GFX_CLKS>;
using GPUMetricCurrGfxClkTbl_t = GpuMetricU16Tbl_t;
using GPUMetricCurrSocClk_t = decltype(AMDGpuMetrics_v14_t::m_current_socclk);
using GPUMetricCurrSocClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
using GPUMetricCurrSocClkTbl_t = GpuMetricU16Tbl_t;
using GPUMetricCurrVClk0_t = decltype(AMDGpuMetrics_v14_t::m_current_vclk0);
using GPUMetricCurrVClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
using GPUMetricCurrVClkTbl_t = GpuMetricU16Tbl_t;
using GPUMetricCurrDClk0_t = decltype(AMDGpuMetrics_v14_t::m_current_dclk0);
using GPUMetricCurrDClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
using GPUMetricCurrDClkTbl_t = GpuMetricU16Tbl_t;
/*
////
/************************************************************
* When a new metric table is released, we have to update: *
1. Constants related to the new metrics added;
1. Constants related to the new metrics added (if any);
(ie: kRSMI_MAX_NUM_XGMI_LINKS)
2. Constants related to new version:
(ie: kRSMI_GPU_METRICS_API_CONTENT_MAJOR_VER_1)
@@ -411,19 +444,35 @@ using GPUMetricCurrDClkTbl_t = std::array<uint16_t, kRSMI_MAX_NUM_CLKS>;
5. AMGpuMetricsLatest_t -> Newest AMDGpuMetrics_v1x_t
6. AMDGpuMetricVersionFlags_t
(ie: AMDGpuMetricVersionFlags_t::kGpuMetricV14)
7. Create the proper API using granular controls used by
rsmi_dev_gpu_metrics_info_query() (ie: rsmi_dev_temp_hotspot_get())
-> Remember to check/update:
- AMDGpuMetricsUnitType_t
- amdgpu_metrics_unit_type_translation_table
- AMDGpuMetrics_v1X_t structure in question
- populate_metrics_dynamic_tbl()
- copy_internal_to_external_metrics()
- init_max_public_gpu_matrics()
*/
using AMDGpuMetricTypeId_t = uint32_t;
using AMDGpuMetricTypeIdSeq_t = uint32_t;
using AMDGpuMetricVersionFlagId_t = uint32_t;
////
/*
*
* These are used as Metric class, so Metric Units can be properly grouped.
* Each Metric Unit (or a set of them) is related to a Metric class.
*
*/
enum class AMDGpuMetricsClassId_t : AMDGpuMetricTypeId_t
{
kGpuMetricHeader = 0,
kGpuMetricHeader,
kGpuMetricTemperature,
kGpuMetricUtilization,
kGpuMetricPowerEnergy,
kGpuMetricSystemClockCounter,
kGpuMetricAverageClock,
kGpuMetricCurrentClock,
kGpuMetricThrottleStatus,
@@ -435,6 +484,22 @@ enum class AMDGpuMetricsClassId_t : AMDGpuMetricTypeId_t
};
using AMDGpuMetricsClassIdTranslationTbl_t = std::map<AMDGpuMetricsClassId_t, std::string>;
/*
*
* These are the Metric units. Each one represents a specific metric we want
* to either store or retrieve.
*
* This also gives a more granular control over to what exactly is needed,
* helping to generalize metric queries.
*
* Each type a new (non-existing metric unit) metric is added, it should be
* updated here.
* - Their names matches (closely, regardless of their version) the name of
* the data structure members they represent.
*
* All metric units not flagged as v1.4 were either part of the base or
* added/changed up to v1.3
*/
enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
{
// kGpuMetricTemperature counters
@@ -452,7 +517,7 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
kMetricAvgMmActivity,
kMetricGfxActivityAccumulator,
kMetricMemActivityAccumulator,
kMetricVcnActivity,
kMetricVcnActivity, //v1.4
// kGpuMetricAverageClock counters
kMetricAvgGfxClockFrequency,
@@ -464,11 +529,11 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
kMetricAvgDClock1Frequency,
// kGpuMetricCurrentClock counters
kMetricCurrGfxClock,
kMetricCurrSocClock,
kMetricCurrGfxClock, //v1.4: Changed to multi-valued
kMetricCurrSocClock, //v1.4: Changed to multi-valued
kMetricCurrUClock,
kMetricCurrVClock0,
kMetricCurrDClock0,
kMetricCurrVClock0, //v1.4: Changed to multi-valued
kMetricCurrDClock0, //v1.4: Changed to multi-valued
kMetricCurrVClock1,
kMetricCurrDClock1,
@@ -477,7 +542,7 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
kMetricIndepThrottleStatus,
// kGpuMetricGfxClkLockStatus counters
kMetricGfxClkLockStatus,
kMetricGfxClkLockStatus, //v1.4
// kGpuMetricCurrentFanSpeed counters
kMetricCurrFanSpeed,
@@ -485,22 +550,25 @@ enum class AMDGpuMetricsUnitType_t : AMDGpuMetricTypeId_t
// kGpuMetricLinkWidthSpeed counters
kMetricPcieLinkWidth,
kMetricPcieLinkSpeed,
kMetricPcieBandwidthAccumulator,
kMetricPcieBandwidthInst,
kMetricXgmiLinkWidth,
kMetricXgmiLinkSpeed,
kMetricXgmiReadDataAccumulator,
kMetricXgmiWriteDataAccumulator,
kMetricPcieBandwidthAccumulator, //v1.4
kMetricPcieBandwidthInst, //v1.4
kMetricXgmiLinkWidth, //v1.4
kMetricXgmiLinkSpeed, //v1.4
kMetricXgmiReadDataAccumulator, //v1.4
kMetricXgmiWriteDataAccumulator, //v1.4
kMetricPcieL0RecovCountAccumulator, //v1.4
kMetricPcieReplayCountAccumulator, //v1.4
kMetricPcieReplayRollOverCountAccumulator, //v1.4
// kGpuMetricPowerEnergy counters
kMetricAvgSocketPower,
kMetricCurrSocketPower,
kMetricEnergyAccumulator,
kMetricCurrSocketPower, //v1.4
kMetricEnergyAccumulator, //v1.4
// kGpuMetricVoltage counters
kMetricVoltageSoc,
kMetricVoltageGfx,
kMetricVoltageMem,
kMetricVoltageSoc, //v1.3
kMetricVoltageGfx, //v1.3
kMetricVoltageMem, //v1.3
// kGpuMetricTimestamp counters
kMetricTSClockCounter,
@@ -526,8 +594,12 @@ struct AMDGpuDynamicMetricsValue_t
using AMDGpuDynamicMetricTblValues_t = std::vector<AMDGpuDynamicMetricsValue_t>;
using AMDGpuDynamicMetricsTbl_t = std::map<AMDGpuMetricsClassId_t, std::map<AMDGpuMetricsUnitType_t, AMDGpuDynamicMetricTblValues_t>>;
// Note: All supported metric versions are listed her
// If not here, they are not supported
/*
*
* Note: All supported metric versions are listed here, otherwise unsupported
*
*/
enum class AMDGpuMetricVersionFlags_t : AMDGpuMetricVersionFlagId_t
{
kGpuMetricNone = 0x0,
@@ -537,21 +609,19 @@ enum class AMDGpuMetricVersionFlags_t : AMDGpuMetricVersionFlagId_t
kGpuMetricV13 = (0x1 << 3),
kGpuMetricV14 = (0x1 << 4),
};
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint64_t, AMDGpuMetricVersionFlags_t>;
class GpuMetricsBase_t;
using GpuMetricsBasePtr = std::shared_ptr<GpuMetricsBase_t>;
using AMDGpuMetricVersionTranslationTbl_t = std::map<uint16_t, AMDGpuMetricVersionFlags_t>;
using GpuMetricTypePtr_t = std::shared_ptr<void>;
class GpuMetricsBase_t
{
public:
virtual ~GpuMetricsBase_t() = default;
virtual size_t sizeof_metric_table() = 0;
virtual AMDGpuMetricsBaseRef get_metrics_table() = 0;
virtual GpuMetricTypePtr_t get_metrics_table() = 0;
virtual void dump_internal_metrics_table() = 0;
virtual AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() = 0;
virtual rsmi_status_t populate_metrics_dynamic_tbl() = 0;
virtual AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() = 0;
virtual AMDGpuDynamicMetricsTbl_t get_metrics_dynamic_tbl() {
return m_metrics_dynamic_tbl;
}
@@ -561,21 +631,31 @@ class GpuMetricsBase_t
uint64_t m_metrics_timestamp;
};
using GpuMetricsBasePtr = std::shared_ptr<GpuMetricsBase_t>;
using AMDGpuMetricFactories_t = std::map<AMDGpuMetricVersionFlags_t, GpuMetricsBasePtr>;
class GpuMetricsBase_v11_t final : public GpuMetricsBase_t
{
public:
~GpuMetricsBase_v11_t() = default;
virtual ~GpuMetricsBase_v11_t() = default;
size_t sizeof_metric_table() override {
return sizeof(AMDGpuMetrics_v11_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
GpuMetricTypePtr_t get_metrics_table() override
{
return m_gpu_metrics_tbl;
if (!m_gpu_metric_ptr) {
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v11_t*){});
}
assert(m_gpu_metric_ptr != nullptr);
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override
{
return;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
@@ -584,10 +664,12 @@ class GpuMetricsBase_v11_t final : public GpuMetricsBase_t
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
private:
AMDGpuMetrics_v11_t m_gpu_metrics_tbl;
std::shared_ptr<AMDGpuMetrics_v11_t> m_gpu_metric_ptr;
};
@@ -600,9 +682,18 @@ class GpuMetricsBase_v12_t final : public GpuMetricsBase_t
return sizeof(AMDGpuMetrics_v12_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
GpuMetricTypePtr_t get_metrics_table() override
{
return m_gpu_metrics_tbl;
if (!m_gpu_metric_ptr) {
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v12_t*){});
}
assert(m_gpu_metric_ptr != nullptr);
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override
{
return;
}
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
@@ -611,10 +702,11 @@ class GpuMetricsBase_v12_t final : public GpuMetricsBase_t
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
private:
AMDGpuMetrics_v12_t m_gpu_metrics_tbl;
std::shared_ptr<AMDGpuMetrics_v12_t> m_gpu_metric_ptr;
};
@@ -627,21 +719,29 @@ class GpuMetricsBase_v13_t final : public GpuMetricsBase_t
return sizeof(AMDGpuMetrics_v13_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
GpuMetricTypePtr_t get_metrics_table() override
{
return m_gpu_metrics_tbl;
if (!m_gpu_metric_ptr) {
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v13_t*){});
}
assert(m_gpu_metric_ptr != nullptr);
return (m_gpu_metric_ptr);
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
{
return AMDGpuMetricVersionFlags_t::kGpuMetricV13;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
private:
AMDGpuMetrics_v13_t m_gpu_metrics_tbl;
std::shared_ptr<AMDGpuMetrics_v13_t> m_gpu_metric_ptr;
};
@@ -654,28 +754,41 @@ class GpuMetricsBase_v14_t final : public GpuMetricsBase_t
return sizeof(AMDGpuMetrics_v14_t);
}
AMDGpuMetricsBaseRef get_metrics_table() override
GpuMetricTypePtr_t get_metrics_table() override
{
return m_gpu_metrics_tbl;
if (!m_gpu_metric_ptr) {
m_gpu_metric_ptr.reset(&m_gpu_metrics_tbl, [](AMDGpuMetrics_v14_t*){});
}
assert(m_gpu_metric_ptr != nullptr);
return m_gpu_metric_ptr;
}
void dump_internal_metrics_table() override;
AMDGpuMetricVersionFlags_t get_gpu_metrics_version_used() override
{
return AMDGpuMetricVersionFlags_t::kGpuMetricV14;
}
rsmi_status_t populate_metrics_dynamic_tbl() override;
AMGpuMetricsPublicLatestTupl_t copy_internal_to_external_metrics() override;
private:
AMDGpuMetrics_v14_t m_gpu_metrics_tbl;
std::shared_ptr<AMDGpuMetrics_v14_t> m_gpu_metric_ptr;
};
template<typename T>
rsmi_status_t rsmi_dev_gpu_metrics_info_query(uint32_t dv_ind, AMDGpuMetricsUnitType_t metric_counter, T& metric_value);
} // namespace amd::smi
#endif // ROCM_SMI_ROCM_SMI_GPU_METRICS_H_
rsmi_status_t
rsmi_dev_gpu_metrics_header_info_get(uint32_t dv_ind, metrics_table_header_t& header_value);
#endif // ROCM_SMI_ROCM_SMI_GPU_METRICS_H_
+1 -3
Просмотреть файл
@@ -1,6 +1,4 @@
## Synopsis
Radeon Open Compute Platform - System Management Interface - Command Line tool.
## Radeon Open Compute (ROCm) - System Management Interface - Command Line Tool
This tool acts as a command line interface for manipulating
and monitoring the amdgpu kernel, and is intended to replace
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+9 -2
Просмотреть файл
@@ -549,7 +549,7 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
Device::Device(std::string p, RocmSMI_env_vars const *e) :
monitor_(nullptr), path_(p), env_(e), evt_notif_anon_fd_(-1),
gpu_metrics_ver_{0, 0, 0} {
m_gpu_metrics_header{0, 0, 0} {
#ifndef DEBUG
env_ = nullptr;
#endif
@@ -916,7 +916,14 @@ int Device::readDevInfoBinary(DevInfoTypes type, std::size_t b_size,
if ((num*b_size) != b_size) {
ss << "Could not read DevInfoBinary for DevInfoType ("
<< RocmSMI::devInfoTypesStrings.at(type) << ") - SYSFS ("
<< sysfs_path << "), binary size error, "
<< sysfs_path << "), binary size error; "
<< "[buff: "
<< p_binary_data
<< " size: "
<< b_size
<< " read: "
<< num
<< "]"
<< ", returning ENOENT (" << std::strerror(ENOENT) << ")";
LOG_ERROR(ss);
return ENOENT;
Разница между файлами не показана из-за своего большого размера Загрузить разницу