Fix GPU metric tests & cleanup test output

- CLI: Added average_power to display if current_power is empty
    - CLI: fixed PCIe current_speed not displaying GT/s
    - ROCm API: 1.3 & 1.4
                -> commented out setting avg clocks to current clock value
(leave as max uint value, not re-assign; these are not same values)
                    -> commented out setting current_socket_power = average_power
(leave as max uint value, not re-assign; these are not same values)
                    -> For all non-array clocks, placed value in first
                        array[0] to keep outputs consistent
                    (helps xcd calc)
      - ROCm API: rsmi_dev_metrics_curr_gfxclk_get fixed to count
        XCDs using backwards compatible rsmi_dev_gpu_metrics_info_get.
      - ^ Fixes XCD count overall + assigning clock[0] in 1.3 to curr
        freq
      - AMD SMI API: amdsmi_get_gpu_metrics_info() initialized all new
        1.5 metric values for all lower metric tables
      - AMD SMI API: wrapper -> fix is here + returns correct AMD SMI return
      - AMD SMI API: wrapper -> now displays amdsmi return status as
        string in logs
      - gpu_metrics_read.cc -> now has better overview of backwards
        compatible output
      - gpu_metrics_read.cc -> Cleaned up output, added units, and
        display all array output

Signed-off-by: Charis Poag <Charis.Poag@amd.com>
Change-Id: Id5b60ded5b0ed2cdf0f96ca72c79e356f0410960


[ROCm/amdsmi commit: 5ff5af0b5a]
Этот коммит содержится в:
Charis Poag
2023-12-19 02:49:52 -06:00
родитель e924266a25
Коммит 601a254f37
6 изменённых файлов: 600 добавлений и 113 удалений
+4 -2
Просмотреть файл
@@ -1173,7 +1173,7 @@ class AMDSMICommands():
power_dict['current_power'] = power_info['current_socket_power']
if power_dict['current_power'] == "N/A":
power_dict['current_power'] = power_info['average_socket_power']
power_dict['average_power'] = power_info['average_socket_power']
power_dict['current_gfx_voltage'] = power_info['gfx_voltage']
power_dict['current_soc_voltage'] = power_info['soc_voltage']
@@ -1365,7 +1365,9 @@ class AMDSMICommands():
if self.logger.is_human_readable_format():
unit = 'GT/s'
pcie_link_status['current_speed'] = f"{pcie_link_status['pcie_speed']} {unit}"
pcie_dict['current_lanes'] = f"{pcie_link_status['pcie_lanes']} lanes"
pcie_dict['current_speed'] = f"{pcie_dict['current_speed']} GT/s"
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get pcie link status for gpu %s | %s", gpu_id, e.get_error_info())
+3 -3
Просмотреть файл
@@ -1332,6 +1332,9 @@ typedef struct {
/*
* v1.5 additions
*/
// JPEG activity % per AID
uint16_t jpeg_activity[AMDSMI_MAX_NUM_JPEG];
// Memory Bandwidth Usage Accumulated (GB/sec)
uint64_t mem_bandwidth_acc;
@@ -1343,9 +1346,6 @@ typedef struct {
// PCIE NAK received accumulated count
uint32_t pcie_nak_rcvd_count_acc;
// JPEG activity % per AID
uint16_t jpeg_activity[AMDSMI_MAX_NUM_JPEG];
/// \endcond
} amdsmi_gpu_metrics_t;
+12 -9
Просмотреть файл
@@ -6800,23 +6800,26 @@ rsmi_dev_metrics_curr_gfxclk_get(uint32_t dv_ind, GPUMetricCurrGfxClk_t* current
}
const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrGfxClock);
amd::smi::GPUMetricCurrGfxClkTbl_t tmp_curr_gfxclk_tbl{};
auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_gfxclk_tbl);
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
const auto max_num_elems =
static_cast<uint16_t>(std::end(*current_gfxclk_value) - std::begin(*current_gfxclk_value));
std::copy_n(std::begin(tmp_curr_gfxclk_tbl), max_num_elems, *current_gfxclk_value);
rsmi_gpu_metrics_t gpu = {};
auto status = rsmi_dev_gpu_metrics_info_get(dv_ind, &gpu);
if (status == rsmi_status_t::RSMI_STATUS_SUCCESS) {
std::copy_n(std::begin(gpu.current_gfxclks),
static_cast<uint16_t>(
sizeof(gpu.current_gfxclks)/sizeof(gpu.current_gfxclks[0])),
*current_gfxclk_value);
}
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | End Result "
<< " | Device #: " << dv_ind
<< " | Metric Type: " << static_cast<AMDGpuMetricTypeId_t>(gpu_metric_unit)
<< " | Metric Size: " << tmp_curr_gfxclk_tbl.size()
<< " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |";
<< " | Metric Size: " << static_cast<uint16_t>(
sizeof(gpu.current_gfxclks)/sizeof(gpu.current_gfxclks[0]))
<< " | Returning = " << status << " "
<< getRSMIStatusString(status) << " |";
LOG_INFO(ostrstream);
return status_code;
return status;
CATCH
}
+44 -26
Просмотреть файл
@@ -836,29 +836,35 @@ rsmi_status_t init_max_public_gpu_matrics(AMGpuMetricsPublicLatest_t& rsmi_gpu_m
rsmi_gpu_metrics.pcie_replay_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_replay_count_acc)>();
rsmi_gpu_metrics.pcie_replay_rover_count_acc = init_max_uint_types<decltype(rsmi_gpu_metrics.pcie_replay_rover_count_acc)>();
std::fill(std::begin(rsmi_gpu_metrics.xgmi_read_data_acc),
std::end(rsmi_gpu_metrics.xgmi_read_data_acc),
init_max_uint_types<std::uint64_t>());
std::fill_n(&rsmi_gpu_metrics.xgmi_read_data_acc[0],
(sizeof(rsmi_gpu_metrics.xgmi_read_data_acc) /
sizeof(rsmi_gpu_metrics.xgmi_read_data_acc[0])),
std::numeric_limits<uint64_t>::max());
std::fill(std::begin(rsmi_gpu_metrics.xgmi_write_data_acc),
std::end(rsmi_gpu_metrics.xgmi_write_data_acc),
init_max_uint_types<std::uint64_t>());
std::fill_n(&rsmi_gpu_metrics.xgmi_write_data_acc[0],
(sizeof(rsmi_gpu_metrics.xgmi_write_data_acc) /
sizeof(rsmi_gpu_metrics.xgmi_write_data_acc[0])),
std::numeric_limits<uint64_t>::max());
std::fill(std::begin(rsmi_gpu_metrics.current_gfxclks),
std::end(rsmi_gpu_metrics.current_gfxclks),
init_max_uint_types<std::uint16_t>());
std::fill_n(&rsmi_gpu_metrics.current_gfxclks[0],
(sizeof(rsmi_gpu_metrics.current_gfxclks) /
sizeof(rsmi_gpu_metrics.current_gfxclks[0])),
std::numeric_limits<uint16_t>::max());
std::fill(std::begin(rsmi_gpu_metrics.current_socclks),
std::end(rsmi_gpu_metrics.current_socclks),
init_max_uint_types<std::uint16_t>());
std::fill_n(&rsmi_gpu_metrics.current_socclks[0],
(sizeof(rsmi_gpu_metrics.current_socclks) /
sizeof(rsmi_gpu_metrics.current_socclks[0])),
std::numeric_limits<uint16_t>::max());
std::fill(std::begin(rsmi_gpu_metrics.current_vclk0s),
std::end(rsmi_gpu_metrics.current_vclk0s),
init_max_uint_types<std::uint16_t>());
std::fill_n(&rsmi_gpu_metrics.current_vclk0s[0],
(sizeof(rsmi_gpu_metrics.current_vclk0s) /
sizeof(rsmi_gpu_metrics.current_vclk0s[0])),
std::numeric_limits<uint16_t>::max());
std::fill(std::begin(rsmi_gpu_metrics.current_dclk0s),
std::end(rsmi_gpu_metrics.current_dclk0s),
init_max_uint_types<std::uint16_t>());
std::fill_n(&rsmi_gpu_metrics.current_dclk0s[0],
(sizeof(rsmi_gpu_metrics.current_dclk0s) /
sizeof(rsmi_gpu_metrics.current_dclk0s[0])),
std::numeric_limits<uint16_t>::max());
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
@@ -1016,22 +1022,22 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v14_t::copy_internal_to_external_m
// Note: Backwards compatibility -> Handling extra/exception cases
// related to earlier versions (1.3)
metrics_public_init.current_gfxclk = metrics_public_init.current_gfxclks[0];
metrics_public_init.average_gfxclk_frequency = metrics_public_init.current_gfxclks[0];
// metrics_public_init.average_gfxclk_frequency = metrics_public_init.current_gfxclks[0];
metrics_public_init.current_socclk = metrics_public_init.current_socclks[0];
metrics_public_init.average_socclk_frequency = metrics_public_init.current_socclks[0];
// metrics_public_init.average_socclk_frequency = metrics_public_init.current_socclks[0];
metrics_public_init.current_vclk0 = metrics_public_init.current_vclk0s[0];
metrics_public_init.average_vclk0_frequency = metrics_public_init.current_vclk0s[0];
// metrics_public_init.average_vclk0_frequency = metrics_public_init.current_vclk0s[0];
metrics_public_init.current_vclk1 = metrics_public_init.current_vclk0s[1];
metrics_public_init.average_vclk1_frequency = metrics_public_init.current_vclk0s[1];
// metrics_public_init.average_vclk1_frequency = metrics_public_init.current_vclk0s[1];
metrics_public_init.current_dclk0 = metrics_public_init.current_dclk0s[0];
metrics_public_init.average_dclk0_frequency = metrics_public_init.current_dclk0s[0];
// metrics_public_init.average_dclk0_frequency = metrics_public_init.current_dclk0s[0];
metrics_public_init.current_dclk1 = metrics_public_init.current_dclk0s[1];
metrics_public_init.average_dclk1_frequency = metrics_public_init.current_dclk0s[1];
// metrics_public_init.average_dclk1_frequency = metrics_public_init.current_dclk0s[1];
return metrics_public_init;
}();
@@ -1407,7 +1413,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m
metrics_public_init.average_mm_activity = m_gpu_metrics_tbl.m_average_mm_activity;
// Power/Energy
metrics_public_init.average_socket_power = m_gpu_metrics_tbl.m_average_socket_power; // 1.3 and 1.4 have the same value
// metrics_public_init.average_socket_power = m_gpu_metrics_tbl.m_average_socket_power; // 1.3 and 1.4 have the same value
metrics_public_init.energy_accumulator = m_gpu_metrics_tbl.m_energy_accumulator;
// Driver attached timestamp (in ns)
@@ -1424,9 +1430,13 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m
// Current clocks
metrics_public_init.current_gfxclk = m_gpu_metrics_tbl.m_current_gfxclk;
metrics_public_init.current_gfxclks[0] = m_gpu_metrics_tbl.m_current_gfxclk;
metrics_public_init.current_socclk = m_gpu_metrics_tbl.m_current_socclk;
metrics_public_init.current_socclks[0] = m_gpu_metrics_tbl.m_current_socclk;
metrics_public_init.current_vclk0 = m_gpu_metrics_tbl.m_current_vclk0;
metrics_public_init.current_vclk0s[0] = m_gpu_metrics_tbl.m_current_vclk0;
metrics_public_init.current_dclk0 = m_gpu_metrics_tbl.m_current_dclk0;
metrics_public_init.current_dclk0s[0] = m_gpu_metrics_tbl.m_current_dclk0;
metrics_public_init.current_uclk = m_gpu_metrics_tbl.m_current_uclk;
metrics_public_init.current_vclk1 = m_gpu_metrics_tbl.m_current_vclk1;
metrics_public_init.current_dclk1 = m_gpu_metrics_tbl.m_current_dclk1;
@@ -1467,7 +1477,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m
//
// Note: Backwards compatibility -> Handling extra/exception cases
// related to earlier versions (1.2)
metrics_public_init.current_socket_power = metrics_public_init.average_socket_power;
// metrics_public_init.current_socket_power = metrics_public_init.average_socket_power;
return metrics_public_init;
}();
@@ -2798,6 +2808,14 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t* smu) {
assert(smu != nullptr);
if (smu == nullptr) {
status_code = rsmi_status_t::RSMI_STATUS_INVALID_ARGS;
ostrstream << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Returning = "
<< getRSMIStatusString(status_code)
<< " |";
LOG_ERROR(ostrstream);
return status_code;
}
+21 -1
Просмотреть файл
@@ -116,7 +116,13 @@ amdsmi_status_t rsmi_wrapper(F && f,
uint32_t gpu_index = gpu_device->get_gpu_id();
auto rstatus = std::forward<F>(f)(gpu_index,
std::forward<Args>(args)...);
return amd::smi::rsmi_to_amdsmi_status(rstatus);
r = amd::smi::rsmi_to_amdsmi_status(rstatus);
std::ostringstream ss;
const char *status_string;
amdsmi_status_code_to_string(r, &status_string);
ss << __PRETTY_FUNCTION__ << " | returning status = " << status_string;
LOG_INFO(ss);
return r;
}
amdsmi_status_t
@@ -1098,7 +1104,21 @@ amdsmi_status_t amdsmi_get_gpu_metrics_info(
(sizeof(pgpu_metrics->jpeg_activity) /
sizeof(pgpu_metrics->jpeg_activity[0])),
std::numeric_limits<uint16_t>::max());
pgpu_metrics->mem_bandwidth_acc =
static_cast<uint64_t>(std::numeric_limits<uint64_t>::max());
pgpu_metrics->mem_max_bandwidth =
static_cast<uint32_t>(std::numeric_limits<uint32_t>::max());
pgpu_metrics->pcie_nak_sent_count_acc =
static_cast<uint32_t>(std::numeric_limits<uint32_t>::max());
pgpu_metrics->pcie_nak_rcvd_count_acc =
static_cast<uint32_t>(std::numeric_limits<uint32_t>::max());
}
std::ostringstream ss;
const char *status_string;
amdsmi_status_code_to_string(ret, &status_string);
ss << __PRETTY_FUNCTION__
<< " | END, returning status = " << status_string;
LOG_TRACE(ss);
// END: REMOVE WHATS ABOVE ME
return ret;
}
+516 -72
Просмотреть файл
@@ -98,12 +98,17 @@ void TestGpuMetricsRead::Run(void) {
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(processor_handles_[i]);
std::cout << "Device #" << std::to_string(i) << "\n";
IF_VERB(STANDARD) {
std::cout << "\t**GPU METRICS: Using static struct (Backwards Compatibility):\n";
}
amdsmi_gpu_metrics_t smu;
err = amdsmi_get_gpu_metrics_info(processor_handles_[i], &smu);
const char *status_string;
amdsmi_status_code_to_string(err, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_info(): " << status_string
<< "\n";
if (err != AMDSMI_STATUS_SUCCESS) {
if (err == AMDSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
@@ -115,8 +120,21 @@ void TestGpuMetricsRead::Run(void) {
} else {
CHK_ERR_ASRT(err);
IF_VERB(STANDARD) {
std::cout << "METRIC TABLE HEADER:\n";
std::cout << "structure_size=" << std::dec
<< static_cast<int>(smu.common_header.structure_size) << '\n';
std::cout << "format_revision=" << std::dec
<< static_cast<int>(smu.common_header.format_revision) << '\n';
std::cout << "content_revision=" << std::dec
<< static_cast<int>(smu.common_header.content_revision) << '\n';
std::cout << "\n";
std::cout << "TIME STAMPS (ns):\n";
std::cout << std::dec << "system_clock_counter="
<< smu.system_clock_counter << '\n';
std::cout << "firmware_timestamp (10ns resolution)=" << std::dec
<< smu.firmware_timestamp << '\n';
std::cout << "\n";
std::cout << "TEMPERATURES (C):\n";
std::cout << std::dec << "temperature_edge="
<< smu.temperature_edge << '\n';
std::cout << std::dec << "temperature_hotspot="
@@ -129,16 +147,39 @@ void TestGpuMetricsRead::Run(void) {
<< smu.temperature_vrsoc << '\n';
std::cout << std::dec << "temperature_vrmem="
<< smu.temperature_vrmem << '\n';
for (int i = 0; i < AMDSMI_NUM_HBM_INSTANCES; ++i) {
std::cout << "temperature_hbm[" << i << "]=" << std::dec <<
smu.temperature_hbm[i] << '\n';
}
std::cout << "\n";
std::cout << "UTILIZATION (%):\n";
std::cout << std::dec << "average_gfx_activity="
<< smu.average_gfx_activity << '\n';
std::cout << std::dec << "average_umc_activity="
<< smu.average_umc_activity << '\n';
std::cout << std::dec << "average_mm_activity="
<< smu.average_mm_activity << '\n';
std::cout << std::dec << "jpeg_activity= [";
uint16_t size = static_cast<uint16_t>(
sizeof(smu.jpeg_activity)/sizeof(smu.jpeg_activity[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.jpeg_activity[i] << ", ";
} else {
std::cout << std::dec << smu.jpeg_activity[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\n";
std::cout << "POWER (W)/ENERGY (15.259uJ per 1ns):\n";
std::cout << std::dec << "average_socket_power="
<< smu.average_socket_power << '\n';
std::cout << std::dec << "current_socket_power="
<< smu.current_socket_power << '\n';
std::cout << std::dec << "energy_accumulator="
<< smu.energy_accumulator << '\n';
std::cout << "\n";
std::cout << "AVG CLOCKS (MHz):\n";
std::cout << std::dec << "average_gfxclk_frequency="
<< smu.average_gfxclk_frequency << '\n';
std::cout << std::dec << "average_gfxclk_frequency="
@@ -153,417 +194,820 @@ void TestGpuMetricsRead::Run(void) {
<< smu.average_vclk1_frequency << '\n';
std::cout << std::dec << "average_dclk1_frequency="
<< smu.average_dclk1_frequency << '\n';
std::cout << "\n";
std::cout << "CURRENT CLOCKS (MHz):\n";
std::cout << std::dec << "current_gfxclk="
<< smu.current_gfxclk << '\n';
std::cout << std::dec << "current_gfxclks= [";
size = static_cast<uint16_t>(
sizeof(smu.current_gfxclks)/sizeof(smu.current_gfxclks[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.current_gfxclks[i] << ", ";
} else {
std::cout << std::dec << smu.current_gfxclks[i];
}
}
std::cout << std::dec << "]\n";
std::cout << std::dec << "current_socclk="
<< smu.current_socclk << '\n';
std::cout << std::dec << "current_socclks= [";
size = static_cast<uint16_t>(
sizeof(smu.current_socclks)/sizeof(smu.current_socclks[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.current_socclks[i] << ", ";
} else {
std::cout << std::dec << smu.current_socclks[i];
}
}
std::cout << std::dec << "]\n";
std::cout << std::dec << "current_uclk="
<< smu.current_uclk << '\n';
std::cout << std::dec << "current_vclk0="
<< smu.current_vclk0 << '\n';
std::cout << std::dec << "current_vclk0s= [";
size = static_cast<uint16_t>(
sizeof(smu.current_vclk0s)/sizeof(smu.current_vclk0s[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.current_vclk0s[i] << ", ";
} else {
std::cout << std::dec << smu.current_vclk0s[i];
}
}
std::cout << std::dec << "]\n";
std::cout << std::dec << "current_dclk0="
<< smu.current_dclk0 << '\n';
std::cout << std::dec << "current_dclk0s= [";
size = static_cast<uint16_t>(
sizeof(smu.current_dclk0s)/sizeof(smu.current_dclk0s[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.current_dclk0s[i] << ", ";
} else {
std::cout << std::dec << smu.current_dclk0s[i];
}
}
std::cout << std::dec << "]\n";
std::cout << std::dec << "current_vclk1="
<< smu.current_vclk1 << '\n';
std::cout << std::dec << "current_dclk1="
<< smu.current_dclk1 << '\n';
std::cout << "\n";
std::cout << "TROTTLE STATUS:\n";
std::cout << std::dec << "throttle_status="
<< smu.throttle_status << '\n';
std::cout << "\n";
std::cout << "FAN SPEED:\n";
std::cout << std::dec << "current_fan_speed="
<< smu.current_fan_speed << '\n';
std::cout << "\n";
std::cout << "LINK WIDTH (number of lanes) /SPEED (0.1 GT/s):\n";
std::cout << "pcie_link_width="
<< std::to_string(smu.pcie_link_width) << '\n';
std::cout << "pcie_link_width="
std::cout << "pcie_link_speed="
<< std::to_string(smu.pcie_link_speed) << '\n';
std::cout << "xgmi_link_width="
<< std::to_string(smu.xgmi_link_width) << '\n';
std::cout << "xgmi_link_speed="
<< std::to_string(smu.xgmi_link_speed) << '\n';
std::cout << "\n";
std::cout << "Utilization Accumulated(%):\n";
std::cout << "gfx_activity_acc="
<< std::dec << smu.gfx_activity_acc << '\n';
std::cout << "mem_activity_acc="
<< std::dec << smu.mem_activity_acc << '\n';
for (int i = 0; i < AMDSMI_NUM_HBM_INSTANCES; ++i) {
std::cout << "temperature_hbm[" << i << "]=" << std::dec <<
smu.temperature_hbm[i] << '\n';
std::cout << "\n";
std::cout << "XGMI ACCUMULATED DATA TRANSFER SIZE (KB):\n";
std::cout << std::dec << "xgmi_read_data_acc= [";
size = static_cast<uint16_t>(
sizeof(smu.xgmi_read_data_acc)/sizeof(smu.xgmi_read_data_acc[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.xgmi_read_data_acc[i] << ", ";
} else {
std::cout << std::dec << smu.xgmi_read_data_acc[i];
}
}
std::cout << std::dec << "]\n";
std::cout << std::dec << "xgmi_write_data_acc= [";
size = static_cast<uint16_t>(
sizeof(smu.xgmi_write_data_acc)/sizeof(smu.xgmi_write_data_acc[0]));
for (uint16_t i= 0; i < size; i++) {
if (i+1 < size) {
std::cout << std::dec << smu.xgmi_write_data_acc[i] << ", ";
} else {
std::cout << std::dec << smu.xgmi_write_data_acc[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "mem_bandwidth_acc=" << std::dec
<< smu.mem_bandwidth_acc << "\n";
std::cout << "mem_max_bandwidth=" << std::dec
<< smu.mem_max_bandwidth << "\n";
std::cout << "pcie_nak_sent_count_acc=" << std::dec
<< smu.pcie_nak_sent_count_acc << "\n";
std::cout << "pcie_nak_rcvd_count_acc=" << std::dec
<< smu.pcie_nak_rcvd_count_acc << "\n";
}
}
// Verify api support checking functionality is working
err = amdsmi_get_gpu_metrics_info(processor_handles_[i], nullptr);
DISPLAY_AMDSMI_ERR(err);
ASSERT_EQ(err, AMDSMI_STATUS_INVAL);
}
//
auto val_ui16 = uint16_t(0);
auto val_ui32 = uint32_t(0);
auto val_ui64 = uint64_t(0);
auto status_code(amdsmi_status_t::AMDSMI_STATUS_SUCCESS);
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(processor_handles_[i]);
std::cout << "Device #" << std::to_string(i) << "\n";
auto temp_edge_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_temp_edge(processor_handles_[i], &temp_edge_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_edge(): " << status_string << "\n";
}
auto temp_hotspot_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_temp_hotspot(processor_handles_[i], &temp_hotspot_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_hotspot(): " << status_string << "\n";
}
auto temp_mem_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_temp_mem(processor_handles_[i], &temp_mem_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_mem(): " << status_string << "\n";
}
auto temp_vrgfx_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_temp_vrgfx(processor_handles_[i], &temp_vrgfx_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_vrgfx(): " << status_string << "\n";
}
auto temp_vrsoc_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_temp_vrsoc(processor_handles_[i], &temp_vrsoc_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_vrsoc(): " << status_string << "\n";
}
auto temp_vrmem_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_temp_vrmem(processor_handles_[i], &temp_vrmem_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_vrmem(): " << status_string << "\n";
}
gpu_metric_temp_hbm_t temp_hbm_values;
status_code = amdsmi_get_gpu_metrics_temp_hbm(processor_handles_[i], &temp_hbm_values);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_hbm(): " << status_string << "\n";
}
auto temp_curr_socket_power_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_curr_socket_power(processor_handles_[i], &temp_curr_socket_power_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_socket_power(): " << status_string << "\n";
}
auto temp_energy_accum_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_energy_acc(processor_handles_[i], &temp_energy_accum_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_energy_acc(): " << status_string << "\n";
}
auto temp_avg_socket_power_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_socket_power(processor_handles_[i], &temp_avg_socket_power_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_edge(): " << status_string << "\n";
}
auto temp_avg_gfx_activity_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_gfx_activity(processor_handles_[i], &temp_avg_gfx_activity_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_gfx_activity(): " << status_string << "\n";
}
auto temp_avg_umc_activity_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_umc_activity(processor_handles_[i], &temp_avg_umc_activity_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_umc_activity(): " << status_string << "\n";
}
auto temp_avg_mm_activity_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_mm_activity(processor_handles_[i], &temp_avg_mm_activity_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_mm_activity(): " << status_string << "\n";
}
gpu_metric_vcn_activity_t temp_vcn_values;
status_code = amdsmi_get_gpu_metrics_vcn_activity(processor_handles_[i], &temp_vcn_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_vcn_activity(): " << status_string << "\n";
}
auto temp_mem_activity_accum_value = val_ui32;
status_code = amdsmi_get_gpu_metrics_mem_activity_acc(processor_handles_[i], &temp_mem_activity_accum_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_mem_activity_acc(): " << status_string << "\n";
}
auto temp_gfx_activity_accum_value = val_ui32;
status_code = amdsmi_get_gpu_metrics_gfx_activity_acc(processor_handles_[i], &temp_gfx_activity_accum_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_gfx_activity_acc(): " << status_string << "\n";
}
auto temp_avg_gfx_clock_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_gfx_clock_frequency(processor_handles_[i], &temp_avg_gfx_clock_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_gfx_clock_frequency(): " << status_string << "\n";
}
auto temp_avg_soc_clock_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_soc_clock_frequency(processor_handles_[i], &temp_avg_soc_clock_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_soc_clock_frequency(): " << status_string << "\n";
}
auto temp_avg_uclock_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_uclock_frequency(processor_handles_[i], &temp_avg_uclock_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_uclock_frequency(): " << status_string << "\n";
}
auto temp_avg_vclock0_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_vclock0_frequency(processor_handles_[i], &temp_avg_vclock0_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_vclock0_frequency(): " << status_string << "\n";
}
auto temp_avg_dclock0_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_dclock0_frequency(processor_handles_[i], &temp_avg_dclock0_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_dclock0_frequency(): " << status_string << "\n";
}
auto temp_avg_vclock1_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_vclock1_frequency(processor_handles_[i], &temp_avg_vclock1_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_vclock1_frequency(): " << status_string << "\n";
}
auto temp_avg_dclock1_freq_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_avg_dclock1_frequency(processor_handles_[i], &temp_avg_dclock1_freq_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_dclock1_frequency(): " << status_string << "\n";
}
auto temp_curr_vclk1_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_curr_vclk1(processor_handles_[i], &temp_curr_vclk1_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_vclk1(): " << status_string << "\n";
}
auto temp_curr_dclk1_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_curr_dclk1(processor_handles_[i], &temp_curr_dclk1_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_dclk1(): " << status_string << "\n";
}
auto temp_curr_uclk_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_curr_uclk(processor_handles_[i], &temp_curr_uclk_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_uclk(): " << status_string << "\n";
}
gpu_metric_curr_dclk0_t temp_curr_dclk0_values;
status_code = amdsmi_get_gpu_metrics_curr_dclk0(processor_handles_[i], &temp_curr_dclk0_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_dclk0(): " << status_string << "\n";
}
gpu_metric_curr_gfxclk_t temp_curr_gfxclk_values;
status_code = amdsmi_get_gpu_metrics_curr_gfxclk(processor_handles_[i], &temp_curr_gfxclk_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_gfxclk(): " << status_string << "\n";
}
gpu_metric_curr_socclk_t temp_curr_socclk_values;
status_code = amdsmi_get_gpu_metrics_curr_socclk(processor_handles_[i], &temp_curr_socclk_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_socclk(): " << status_string << "\n";
}
gpu_metric_curr_vclk0_t temp_curr_vclk0_values;
status_code = amdsmi_get_gpu_metrics_curr_vclk0(processor_handles_[i], &temp_curr_vclk0_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_vclk0(): " << status_string << "\n";
}
auto temp_indep_throttle_status_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_indep_throttle_status(processor_handles_[i], &temp_indep_throttle_status_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_indep_throttle_status(): " << status_string << "\n";
}
auto temp_throttle_status_value = val_ui32;
status_code = amdsmi_get_gpu_metrics_throttle_status(processor_handles_[i], &temp_throttle_status_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_throttle_status(): " << status_string << "\n";
}
auto temp_gfxclk_lock_status_value = val_ui32;
status_code = amdsmi_get_gpu_metrics_gfxclk_lock_status(processor_handles_[i], &temp_gfxclk_lock_status_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_gfxclk_lock_status(): " << status_string << "\n";
}
auto temp_curr_fan_speed_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_curr_fan_speed(processor_handles_[i], &temp_curr_fan_speed_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_fan_speed(): " << status_string << "\n";
}
auto temp_pcie_link_width_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_pcie_link_width(processor_handles_[i], &temp_pcie_link_width_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_link_width(): " << status_string << "\n";
}
auto temp_pcie_link_speed_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_pcie_link_speed(processor_handles_[i], &temp_pcie_link_speed_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_link_speed(): " << status_string << "\n";
}
auto temp_pcie_bandwidth_accum_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_pcie_bandwidth_acc(processor_handles_[i], &temp_pcie_bandwidth_accum_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_bandwidth_acc(): " << status_string << "\n";
}
auto temp_pcie_bandwidth_inst_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_pcie_bandwidth_inst(processor_handles_[i], &temp_pcie_bandwidth_inst_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_bandwidth_inst(): " << status_string << "\n";
}
auto temp_pcie_l0_recov_count_accum_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_pcie_l0_recov_count_acc(processor_handles_[i], &temp_pcie_l0_recov_count_accum_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_l0_recov_count_acc(): " << status_string << "\n";
}
auto temp_pcie_replay_count_accum_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_pcie_replay_count_acc(processor_handles_[i], &temp_pcie_replay_count_accum_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_replay_count_acc(): " << status_string << "\n";
}
auto temp_pcie_replay_rover_count_accum_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_pcie_replay_rover_count_acc(processor_handles_[i], &temp_pcie_replay_rover_count_accum_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_replay_rover_count_acc(): " << status_string << "\n";
}
auto temp_xgmi_link_width_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_xgmi_link_width(processor_handles_[i], &temp_xgmi_link_width_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_link_width(): " << status_string << "\n";
}
auto temp_xgmi_link_speed_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_xgmi_link_speed(processor_handles_[i], &temp_xgmi_link_speed_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_link_speed(): " << status_string << "\n";
}
gpu_metric_xgmi_read_data_acc_t temp_xgmi_read_values;
status_code = amdsmi_get_gpu_metrics_xgmi_read_data(processor_handles_[i], &temp_xgmi_read_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_read_data(): " << status_string << "\n";
}
gpu_metric_xgmi_write_data_acc_t temp_xgmi_write_values;
status_code = amdsmi_get_gpu_metrics_xgmi_write_data(processor_handles_[i], &temp_xgmi_write_values);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_write_data(): " << status_string << "\n";
}
auto temp_voltage_soc_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_volt_soc(processor_handles_[i], &temp_voltage_soc_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_volt_soc(): " << status_string << "\n";
}
auto temp_voltage_gfx_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_volt_gfx(processor_handles_[i], &temp_voltage_gfx_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_volt_gfx(): " << status_string << "\n";
}
auto temp_voltage_mem_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_volt_mem(processor_handles_[i], &temp_voltage_mem_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_volt_mem(): " << status_string << "\n";
}
auto temp_system_clock_counter_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_system_clock_counter(processor_handles_[i], &temp_system_clock_counter_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_system_clock_counter(): " << status_string << "\n";
}
auto temp_firmware_timestamp_value = val_ui64;
status_code = amdsmi_get_gpu_metrics_firmware_timestamp(processor_handles_[i], &temp_firmware_timestamp_value);
CHK_ERR_ASRT(status_code);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_firmware_timestamp(): " << status_string << "\n";
}
auto temp_xcd_counter_value = val_ui16;
status_code = amdsmi_get_gpu_metrics_xcd_counter(processor_handles_[i], &temp_xcd_counter_value);
if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) {
CHK_ERR_ASRT(status_code);
} else {
const char *status_string;
amdsmi_status_code_to_string(status_code, &status_string);
std::cout << "\t\t** amdsmi_get_gpu_metrics_xcd_counter(): " << status_string << "\n";
}
IF_VERB(STANDARD) {
std::cout << "\n";
std::cout << "\t[Temperature]" << "\n";
std::cout << "\t -> temp_edge(): " << temp_edge_value << "\n";
std::cout << "\t -> temp_hotspot(): " << temp_hotspot_value << "\n";
std::cout << "\t -> temp_mem(): " << temp_mem_value << "\n";
std::cout << "\t -> temp_vrgfx(): " << temp_vrgfx_value << "\n";
std::cout << "\t -> temp_vrsoc(): " << temp_vrsoc_value << "\n";
std::cout << "\t -> temp_vrmem(): " << temp_vrmem_value << "\n";
std::cout << "\t -> temp_hbm(): " << temp_hbm_values << "\n";
std::cout << "\t -> temp_edge(): " << std::dec << temp_edge_value << "\n";
std::cout << "\t -> temp_hotspot(): " << std::dec << temp_hotspot_value << "\n";
std::cout << "\t -> temp_mem(): " << std::dec << temp_mem_value << "\n";
std::cout << "\t -> temp_vrgfx(): " << std::dec << temp_vrgfx_value << "\n";
std::cout << "\t -> temp_vrsoc(): " << std::dec << temp_vrsoc_value << "\n";
std::cout << "\t -> temp_vrmem(): " << std::dec << temp_vrmem_value << "\n";
std::cout << "\t -> temp_hbm(temp_hbm_values): [";
uint16_t size = static_cast<uint16_t>(
sizeof(temp_hbm_values) / sizeof(temp_hbm_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_hbm_values[i] << ", ";
} else {
std::cout << std::dec << temp_hbm_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\n";
std::cout << "\t[Power/Energy]" << "\n";
std::cout << "\t -> current_socket_power(): " << temp_curr_socket_power_value << "\n";
std::cout << "\t -> energy_accum(): " << temp_energy_accum_value << "\n";
std::cout << "\t -> average_socket_power(): " << temp_avg_socket_power_value << "\n";
std::cout << "\t -> current_socket_power(): " << std::dec << temp_curr_socket_power_value << "\n";
std::cout << "\t -> energy_accum(): " << std::dec << temp_energy_accum_value << "\n";
std::cout << "\t -> average_socket_power(): " << std::dec << temp_avg_socket_power_value << "\n";
std::cout << "\n";
std::cout << "\t[Utilization]" << "\n";
std::cout << "\t -> average_gfx_activity(): " << temp_avg_gfx_activity_value << "\n";
std::cout << "\t -> average_umc_activity(): " << temp_avg_umc_activity_value << "\n";
std::cout << "\t -> average_mm_activity(): " << temp_avg_mm_activity_value << "\n";
std::cout << "\t -> vcn_activity(): " << temp_vcn_values << "\n";
std::cout << "\t -> mem_activity_accum(): " << temp_mem_activity_accum_value << "\n";
std::cout << "\t -> gfx_activity_accum(): " << temp_gfx_activity_accum_value << "\n";
std::cout << "\t -> average_gfx_activity(): " << std::dec << temp_avg_gfx_activity_value << "\n";
std::cout << "\t -> average_umc_activity(): " << std::dec << temp_avg_umc_activity_value << "\n";
std::cout << "\t -> average_mm_activity(): " << std::dec << temp_avg_mm_activity_value << "\n";
std::cout << "\t -> vcn_activity(temp_vcn_values): [";
size = static_cast<uint16_t>(
sizeof(temp_vcn_values) / sizeof(temp_vcn_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_vcn_values[i] << ", ";
} else {
std::cout << std::dec << temp_vcn_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\t -> mem_activity_accum(): " << std::dec << temp_mem_activity_accum_value << "\n";
std::cout << "\t -> gfx_activity_accum(): " << std::dec << temp_gfx_activity_accum_value << "\n";
std::cout << "\n";
std::cout << "\t[Average Clock]" << "\n";
std::cout << "\t -> average_gfx_clock_frequency(): " << temp_avg_gfx_clock_freq_value << "\n";
std::cout << "\t -> average_soc_clock_frequency(): " << temp_avg_soc_clock_freq_value << "\n";
std::cout << "\t -> average_uclock_frequency(): " << temp_avg_uclock_freq_value << "\n";
std::cout << "\t -> average_vclock0_frequency(): " << temp_avg_vclock0_freq_value << "\n";
std::cout << "\t -> average_dclock0_frequency(): " << temp_avg_dclock0_freq_value << "\n";
std::cout << "\t -> average_vclock1_frequency(): " << temp_avg_vclock1_freq_value << "\n";
std::cout << "\t -> average_dclock1_frequency(): " << temp_avg_dclock1_freq_value << "\n";
std::cout << "\t -> average_gfx_clock_frequency(): " << std::dec << temp_avg_gfx_clock_freq_value << "\n";
std::cout << "\t -> average_soc_clock_frequency(): " << std::dec << temp_avg_soc_clock_freq_value << "\n";
std::cout << "\t -> average_uclock_frequency(): " << std::dec << temp_avg_uclock_freq_value << "\n";
std::cout << "\t -> average_vclock0_frequency(): " << std::dec << std::dec << temp_avg_vclock0_freq_value << "\n";
std::cout << "\t -> average_dclock0_frequency(): " << std::dec << temp_avg_dclock0_freq_value << "\n";
std::cout << "\t -> average_vclock1_frequency(): " << std::dec << temp_avg_vclock1_freq_value << "\n";
std::cout << "\t -> average_dclock1_frequency(): " << std::dec << temp_avg_dclock1_freq_value << "\n";
std::cout << "\n";
std::cout << "\t[Current Clock]" << "\n";
std::cout << "\t -> current_vclock1(): " << temp_curr_vclk1_value << "\n";
std::cout << "\t -> current_dclock1(): " << temp_curr_dclk1_value << "\n";
std::cout << "\t -> current_uclock(): " << temp_curr_uclk_value << "\n";
std::cout << "\t -> current_dclk0(): " << temp_curr_dclk0_values << "\n";
std::cout << "\t -> current_gfxclk(): " << temp_curr_gfxclk_values << "\n";
std::cout << "\t -> current_soc_clock(): " << temp_curr_socclk_values << "\n";
std::cout << "\t -> current_vclk0(): " << temp_curr_vclk0_values << "\n";
std::cout << "\t -> current_vclock1(): " << std::dec << temp_curr_vclk1_value << "\n";
std::cout << "\t -> current_dclock1(): " << std::dec << temp_curr_dclk1_value << "\n";
std::cout << "\t -> current_uclock(): " << std::dec << temp_curr_uclk_value << "\n";
std::cout << "\t -> current_dclk0(temp_curr_dclk0_values): [";
size = static_cast<uint16_t>(
sizeof(temp_curr_dclk0_values) / sizeof(temp_curr_dclk0_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_curr_dclk0_values[i] << ", ";
} else {
std::cout << std::dec << temp_curr_dclk0_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\t -> current_gfxclk(temp_curr_gfxclk_values): [";
size = static_cast<uint16_t>(
sizeof(temp_curr_gfxclk_values) / sizeof(temp_curr_gfxclk_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_curr_gfxclk_values[i] << ", ";
} else {
std::cout << std::dec << temp_curr_gfxclk_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\t -> current_soc_clock(temp_curr_socclk_values): [";
size = static_cast<uint16_t>(
sizeof(temp_curr_socclk_values) / sizeof(temp_curr_socclk_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_curr_socclk_values[i] << ", ";
} else {
std::cout << std::dec << temp_curr_socclk_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\t -> current_vclk0(temp_curr_vclk0_values): [";
size = static_cast<uint16_t>(
sizeof(temp_curr_vclk0_values) / sizeof(temp_curr_vclk0_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_curr_vclk0_values[i] << ", ";
} else {
std::cout << std::dec << temp_curr_vclk0_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\n";
std::cout << "\t[Throttle]" << "\n";
std::cout << "\t -> indep_throttle_status(): " << temp_indep_throttle_status_value << "\n";
std::cout << "\t -> throttle_status(): " << temp_throttle_status_value << "\n";
std::cout << "\t -> indep_throttle_status(): " << std::dec << temp_indep_throttle_status_value << "\n";
std::cout << "\t -> throttle_status(): " << std::dec << temp_throttle_status_value << "\n";
std::cout << "\n";
std::cout << "\t[Gfx Clock Lock]" << "\n";
std::cout << "\t -> gfxclk_lock_status(): " << temp_gfxclk_lock_status_value << "\n";
std::cout << "\t -> gfxclk_lock_status(): " << std::dec << temp_gfxclk_lock_status_value << "\n";
std::cout << "\n";
std::cout << "\t[Current Fan Speed]" << "\n";
std::cout << "\t -> current_fan_speed(): " << temp_curr_fan_speed_value << "\n";
std::cout << "\t -> current_fan_speed(): " << std::dec << temp_curr_fan_speed_value << "\n";
std::cout << "\n";
std::cout << "\t[Link/Bandwidth/Speed]" << "\n";
std::cout << "\t -> pcie_link_width(): " << temp_pcie_link_width_value << "\n";
std::cout << "\t -> pcie_link_speed(): " << temp_pcie_link_speed_value << "\n";
std::cout << "\t -> pcie_bandwidth_accum(): " << temp_pcie_bandwidth_accum_value << "\n";
std::cout << "\t -> pcie_bandwidth_inst(): " << temp_pcie_bandwidth_inst_value << "\n";
std::cout << "\t -> pcie_l0_recov_count_accum(): " << temp_pcie_l0_recov_count_accum_value << "\n";
std::cout << "\t -> pcie_replay_count_accum(): " << temp_pcie_replay_count_accum_value << "\n";
std::cout << "\t -> pcie_replay_rollover_count_accum(): " << temp_pcie_replay_rover_count_accum_value << "\n";
std::cout << "\t -> xgmi_link_width(): " << temp_xgmi_link_width_value << "\n";
std::cout << "\t -> xgmi_link_speed(): " << temp_xgmi_link_speed_value << "\n";
std::cout << "\t -> xgmi_read_data(): " << temp_xgmi_read_values << "\n";
std::cout << "\t -> xgmi_write_data(): " << temp_xgmi_write_values << "\n";
std::cout << "\t -> pcie_link_width(): " << std::dec << temp_pcie_link_width_value << "\n";
std::cout << "\t -> pcie_link_speed(): " << std::dec << temp_pcie_link_speed_value << "\n";
std::cout << "\t -> pcie_bandwidth_accum(): " << std::dec << std::dec << temp_pcie_bandwidth_accum_value << "\n";
std::cout << "\t -> pcie_bandwidth_inst(): " << std::dec << temp_pcie_bandwidth_inst_value << "\n";
std::cout << "\t -> pcie_l0_recov_count_accum(): " << std::dec << std::dec << temp_pcie_l0_recov_count_accum_value << "\n";
std::cout << "\t -> pcie_replay_count_accum(): " << std::dec << temp_pcie_replay_count_accum_value << "\n";
std::cout << "\t -> pcie_replay_rollover_count_accum(): " << std::dec << temp_pcie_replay_rover_count_accum_value << "\n";
std::cout << "\t -> xgmi_link_width(): " << std::dec << temp_xgmi_link_width_value << "\n";
std::cout << "\t -> xgmi_link_speed(): " << std::dec << std::dec << temp_xgmi_link_speed_value << "\n";
std::cout << "\t -> xgmi_read_data(temp_xgmi_read_values): ";
size = static_cast<uint16_t>(
sizeof(temp_xgmi_read_values) / sizeof(temp_xgmi_read_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_xgmi_read_values[i] << ", ";
} else {
std::cout << std::dec << temp_xgmi_read_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\t -> xgmi_write_data(temp_xgmi_write_values): [";
size = static_cast<uint16_t>(
sizeof(temp_xgmi_write_values) / sizeof(temp_xgmi_write_values[0]));
for (uint16_t i = 0; i < size; i++) {
if (i + 1 < size) {
std::cout << std::dec << temp_xgmi_write_values[i] << ", ";
} else {
std::cout << std::dec << temp_xgmi_write_values[i];
}
}
std::cout << std::dec << "]\n";
std::cout << "\n";
std::cout << "\t[Voltage]" << "\n";
std::cout << "\t -> voltage_soc(): " << temp_voltage_soc_value << "\n";
std::cout << "\t -> voltage_gfx(): " << temp_voltage_gfx_value << "\n";
std::cout << "\t -> voltage_mem(): " << temp_voltage_mem_value << "\n";
std::cout << "\t -> voltage_soc(): " << std::dec << temp_voltage_soc_value << "\n";
std::cout << "\t -> voltage_gfx(): " << std::dec << temp_voltage_gfx_value << "\n";
std::cout << "\t -> voltage_mem(): " << std::dec << temp_voltage_mem_value << "\n";
std::cout << "\n";
std::cout << "\t[Timestamp]" << "\n";
std::cout << "\t -> system_clock_counter(): " << temp_system_clock_counter_value << "\n";
std::cout << "\t -> firmware_timestamp(): " << temp_firmware_timestamp_value << "\n";
std::cout << "\t -> system_clock_counter(): " << std::dec << temp_system_clock_counter_value << "\n";
std::cout << "\t -> firmware_timestamp(): " << std::dec << temp_firmware_timestamp_value << "\n";
std::cout << "\n";
std::cout << "\t[XCD CounterVoltage]" << "\n";
std::cout << "\t -> xcd_counter(): " << temp_xcd_counter_value << "\n";
std::cout << "\t[XCD Counter]" << "\n";
std::cout << "\t -> xcd_counter(): " << std::dec << temp_xcd_counter_value << "\n";
std::cout << "\n\n";
}
}