diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index a38adcfde4..b4f6bea6d2 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -1173,7 +1173,7 @@ class AMDSMICommands(): power_dict['current_power'] = power_info['current_socket_power'] if power_dict['current_power'] == "N/A": - power_dict['current_power'] = power_info['average_socket_power'] + power_dict['average_power'] = power_info['average_socket_power'] power_dict['current_gfx_voltage'] = power_info['gfx_voltage'] power_dict['current_soc_voltage'] = power_info['soc_voltage'] @@ -1365,7 +1365,9 @@ class AMDSMICommands(): if self.logger.is_human_readable_format(): unit = 'GT/s' - pcie_link_status['current_speed'] = f"{pcie_link_status['pcie_speed']} {unit}" + pcie_dict['current_lanes'] = f"{pcie_link_status['pcie_lanes']} lanes" + pcie_dict['current_speed'] = f"{pcie_dict['current_speed']} GT/s" + except amdsmi_exception.AmdSmiLibraryException as e: logging.debug("Failed to get pcie link status for gpu %s | %s", gpu_id, e.get_error_info()) diff --git a/projects/amdsmi/include/amd_smi/amdsmi.h b/projects/amdsmi/include/amd_smi/amdsmi.h index 6946663574..aaf7eabc5f 100644 --- a/projects/amdsmi/include/amd_smi/amdsmi.h +++ b/projects/amdsmi/include/amd_smi/amdsmi.h @@ -1332,6 +1332,9 @@ typedef struct { /* * v1.5 additions */ + // JPEG activity % per AID + uint16_t jpeg_activity[AMDSMI_MAX_NUM_JPEG]; + // Memory Bandwidth Usage Accumulated (GB/sec) uint64_t mem_bandwidth_acc; @@ -1343,9 +1346,6 @@ typedef struct { // PCIE NAK received accumulated count uint32_t pcie_nak_rcvd_count_acc; - - // JPEG activity % per AID - uint16_t jpeg_activity[AMDSMI_MAX_NUM_JPEG]; /// \endcond } amdsmi_gpu_metrics_t; diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi.cc b/projects/amdsmi/rocm_smi/src/rocm_smi.cc index 988708675d..88fda5dd74 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi.cc @@ -6800,23 +6800,26 @@ rsmi_dev_metrics_curr_gfxclk_get(uint32_t dv_ind, GPUMetricCurrGfxClk_t* current } const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrGfxClock); - amd::smi::GPUMetricCurrGfxClkTbl_t tmp_curr_gfxclk_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_gfxclk_tbl); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - const auto max_num_elems = - static_cast(std::end(*current_gfxclk_value) - std::begin(*current_gfxclk_value)); - std::copy_n(std::begin(tmp_curr_gfxclk_tbl), max_num_elems, *current_gfxclk_value); + rsmi_gpu_metrics_t gpu = {}; + auto status = rsmi_dev_gpu_metrics_info_get(dv_ind, &gpu); + if (status == rsmi_status_t::RSMI_STATUS_SUCCESS) { + std::copy_n(std::begin(gpu.current_gfxclks), + static_cast( + sizeof(gpu.current_gfxclks)/sizeof(gpu.current_gfxclks[0])), + *current_gfxclk_value); } ostrstream << __PRETTY_FUNCTION__ << " | ======= end ======= " << " | End Result " << " | Device #: " << dv_ind << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Metric Size: " << tmp_curr_gfxclk_tbl.size() - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; + << " | Metric Size: " << static_cast( + sizeof(gpu.current_gfxclks)/sizeof(gpu.current_gfxclks[0])) + << " | Returning = " << status << " " + << getRSMIStatusString(status) << " |"; LOG_INFO(ostrstream); - return status_code; + return status; CATCH } diff --git a/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc b/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc index 11d2871ca9..2887d3021f 100755 --- a/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc +++ b/projects/amdsmi/rocm_smi/src/rocm_smi_gpu_metrics.cc @@ -836,29 +836,35 @@ rsmi_status_t init_max_public_gpu_matrics(AMGpuMetricsPublicLatest_t& rsmi_gpu_m rsmi_gpu_metrics.pcie_replay_count_acc = init_max_uint_types(); rsmi_gpu_metrics.pcie_replay_rover_count_acc = init_max_uint_types(); - std::fill(std::begin(rsmi_gpu_metrics.xgmi_read_data_acc), - std::end(rsmi_gpu_metrics.xgmi_read_data_acc), - init_max_uint_types()); + std::fill_n(&rsmi_gpu_metrics.xgmi_read_data_acc[0], + (sizeof(rsmi_gpu_metrics.xgmi_read_data_acc) / + sizeof(rsmi_gpu_metrics.xgmi_read_data_acc[0])), + std::numeric_limits::max()); - std::fill(std::begin(rsmi_gpu_metrics.xgmi_write_data_acc), - std::end(rsmi_gpu_metrics.xgmi_write_data_acc), - init_max_uint_types()); + std::fill_n(&rsmi_gpu_metrics.xgmi_write_data_acc[0], + (sizeof(rsmi_gpu_metrics.xgmi_write_data_acc) / + sizeof(rsmi_gpu_metrics.xgmi_write_data_acc[0])), + std::numeric_limits::max()); - std::fill(std::begin(rsmi_gpu_metrics.current_gfxclks), - std::end(rsmi_gpu_metrics.current_gfxclks), - init_max_uint_types()); + std::fill_n(&rsmi_gpu_metrics.current_gfxclks[0], + (sizeof(rsmi_gpu_metrics.current_gfxclks) / + sizeof(rsmi_gpu_metrics.current_gfxclks[0])), + std::numeric_limits::max()); - std::fill(std::begin(rsmi_gpu_metrics.current_socclks), - std::end(rsmi_gpu_metrics.current_socclks), - init_max_uint_types()); + std::fill_n(&rsmi_gpu_metrics.current_socclks[0], + (sizeof(rsmi_gpu_metrics.current_socclks) / + sizeof(rsmi_gpu_metrics.current_socclks[0])), + std::numeric_limits::max()); - std::fill(std::begin(rsmi_gpu_metrics.current_vclk0s), - std::end(rsmi_gpu_metrics.current_vclk0s), - init_max_uint_types()); + std::fill_n(&rsmi_gpu_metrics.current_vclk0s[0], + (sizeof(rsmi_gpu_metrics.current_vclk0s) / + sizeof(rsmi_gpu_metrics.current_vclk0s[0])), + std::numeric_limits::max()); - std::fill(std::begin(rsmi_gpu_metrics.current_dclk0s), - std::end(rsmi_gpu_metrics.current_dclk0s), - init_max_uint_types()); + std::fill_n(&rsmi_gpu_metrics.current_dclk0s[0], + (sizeof(rsmi_gpu_metrics.current_dclk0s) / + sizeof(rsmi_gpu_metrics.current_dclk0s[0])), + std::numeric_limits::max()); ostrstream << __PRETTY_FUNCTION__ << " | ======= end ======= " @@ -1016,22 +1022,22 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v14_t::copy_internal_to_external_m // Note: Backwards compatibility -> Handling extra/exception cases // related to earlier versions (1.3) metrics_public_init.current_gfxclk = metrics_public_init.current_gfxclks[0]; - metrics_public_init.average_gfxclk_frequency = metrics_public_init.current_gfxclks[0]; + // metrics_public_init.average_gfxclk_frequency = metrics_public_init.current_gfxclks[0]; metrics_public_init.current_socclk = metrics_public_init.current_socclks[0]; - metrics_public_init.average_socclk_frequency = metrics_public_init.current_socclks[0]; + // metrics_public_init.average_socclk_frequency = metrics_public_init.current_socclks[0]; metrics_public_init.current_vclk0 = metrics_public_init.current_vclk0s[0]; - metrics_public_init.average_vclk0_frequency = metrics_public_init.current_vclk0s[0]; + // metrics_public_init.average_vclk0_frequency = metrics_public_init.current_vclk0s[0]; metrics_public_init.current_vclk1 = metrics_public_init.current_vclk0s[1]; - metrics_public_init.average_vclk1_frequency = metrics_public_init.current_vclk0s[1]; + // metrics_public_init.average_vclk1_frequency = metrics_public_init.current_vclk0s[1]; metrics_public_init.current_dclk0 = metrics_public_init.current_dclk0s[0]; - metrics_public_init.average_dclk0_frequency = metrics_public_init.current_dclk0s[0]; + // metrics_public_init.average_dclk0_frequency = metrics_public_init.current_dclk0s[0]; metrics_public_init.current_dclk1 = metrics_public_init.current_dclk0s[1]; - metrics_public_init.average_dclk1_frequency = metrics_public_init.current_dclk0s[1]; + // metrics_public_init.average_dclk1_frequency = metrics_public_init.current_dclk0s[1]; return metrics_public_init; }(); @@ -1407,7 +1413,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m metrics_public_init.average_mm_activity = m_gpu_metrics_tbl.m_average_mm_activity; // Power/Energy - metrics_public_init.average_socket_power = m_gpu_metrics_tbl.m_average_socket_power; // 1.3 and 1.4 have the same value + // metrics_public_init.average_socket_power = m_gpu_metrics_tbl.m_average_socket_power; // 1.3 and 1.4 have the same value metrics_public_init.energy_accumulator = m_gpu_metrics_tbl.m_energy_accumulator; // Driver attached timestamp (in ns) @@ -1424,9 +1430,13 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m // Current clocks metrics_public_init.current_gfxclk = m_gpu_metrics_tbl.m_current_gfxclk; + metrics_public_init.current_gfxclks[0] = m_gpu_metrics_tbl.m_current_gfxclk; metrics_public_init.current_socclk = m_gpu_metrics_tbl.m_current_socclk; + metrics_public_init.current_socclks[0] = m_gpu_metrics_tbl.m_current_socclk; metrics_public_init.current_vclk0 = m_gpu_metrics_tbl.m_current_vclk0; + metrics_public_init.current_vclk0s[0] = m_gpu_metrics_tbl.m_current_vclk0; metrics_public_init.current_dclk0 = m_gpu_metrics_tbl.m_current_dclk0; + metrics_public_init.current_dclk0s[0] = m_gpu_metrics_tbl.m_current_dclk0; metrics_public_init.current_uclk = m_gpu_metrics_tbl.m_current_uclk; metrics_public_init.current_vclk1 = m_gpu_metrics_tbl.m_current_vclk1; metrics_public_init.current_dclk1 = m_gpu_metrics_tbl.m_current_dclk1; @@ -1467,7 +1477,7 @@ AMGpuMetricsPublicLatestTupl_t GpuMetricsBase_v13_t::copy_internal_to_external_m // // Note: Backwards compatibility -> Handling extra/exception cases // related to earlier versions (1.2) - metrics_public_init.current_socket_power = metrics_public_init.average_socket_power; + // metrics_public_init.current_socket_power = metrics_public_init.average_socket_power; return metrics_public_init; }(); @@ -2798,6 +2808,14 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t* smu) { assert(smu != nullptr); if (smu == nullptr) { status_code = rsmi_status_t::RSMI_STATUS_INVALID_ARGS; + ostrstream << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Fail " + << " | Device #: " << dv_ind + << " | Returning = " + << getRSMIStatusString(status_code) + << " |"; + LOG_ERROR(ostrstream); return status_code; } diff --git a/projects/amdsmi/src/amd_smi/amd_smi.cc b/projects/amdsmi/src/amd_smi/amd_smi.cc index 30faf129ec..ae4b5af69b 100644 --- a/projects/amdsmi/src/amd_smi/amd_smi.cc +++ b/projects/amdsmi/src/amd_smi/amd_smi.cc @@ -116,7 +116,13 @@ amdsmi_status_t rsmi_wrapper(F && f, uint32_t gpu_index = gpu_device->get_gpu_id(); auto rstatus = std::forward(f)(gpu_index, std::forward(args)...); - return amd::smi::rsmi_to_amdsmi_status(rstatus); + r = amd::smi::rsmi_to_amdsmi_status(rstatus); + std::ostringstream ss; + const char *status_string; + amdsmi_status_code_to_string(r, &status_string); + ss << __PRETTY_FUNCTION__ << " | returning status = " << status_string; + LOG_INFO(ss); + return r; } amdsmi_status_t @@ -1098,7 +1104,21 @@ amdsmi_status_t amdsmi_get_gpu_metrics_info( (sizeof(pgpu_metrics->jpeg_activity) / sizeof(pgpu_metrics->jpeg_activity[0])), std::numeric_limits::max()); + pgpu_metrics->mem_bandwidth_acc = + static_cast(std::numeric_limits::max()); + pgpu_metrics->mem_max_bandwidth = + static_cast(std::numeric_limits::max()); + pgpu_metrics->pcie_nak_sent_count_acc = + static_cast(std::numeric_limits::max()); + pgpu_metrics->pcie_nak_rcvd_count_acc = + static_cast(std::numeric_limits::max()); } + std::ostringstream ss; + const char *status_string; + amdsmi_status_code_to_string(ret, &status_string); + ss << __PRETTY_FUNCTION__ + << " | END, returning status = " << status_string; + LOG_TRACE(ss); // END: REMOVE WHATS ABOVE ME return ret; } diff --git a/projects/amdsmi/tests/amd_smi_test/functional/gpu_metrics_read.cc b/projects/amdsmi/tests/amd_smi_test/functional/gpu_metrics_read.cc index 571bb30585..33af655534 100644 --- a/projects/amdsmi/tests/amd_smi_test/functional/gpu_metrics_read.cc +++ b/projects/amdsmi/tests/amd_smi_test/functional/gpu_metrics_read.cc @@ -98,12 +98,17 @@ void TestGpuMetricsRead::Run(void) { for (uint32_t i = 0; i < num_monitor_devs(); ++i) { PrintDeviceHeader(processor_handles_[i]); + std::cout << "Device #" << std::to_string(i) << "\n"; IF_VERB(STANDARD) { std::cout << "\t**GPU METRICS: Using static struct (Backwards Compatibility):\n"; } amdsmi_gpu_metrics_t smu; err = amdsmi_get_gpu_metrics_info(processor_handles_[i], &smu); + const char *status_string; + amdsmi_status_code_to_string(err, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_info(): " << status_string + << "\n"; if (err != AMDSMI_STATUS_SUCCESS) { if (err == AMDSMI_STATUS_NOT_SUPPORTED) { IF_VERB(STANDARD) { @@ -115,8 +120,21 @@ void TestGpuMetricsRead::Run(void) { } else { CHK_ERR_ASRT(err); IF_VERB(STANDARD) { + std::cout << "METRIC TABLE HEADER:\n"; + std::cout << "structure_size=" << std::dec + << static_cast(smu.common_header.structure_size) << '\n'; + std::cout << "format_revision=" << std::dec + << static_cast(smu.common_header.format_revision) << '\n'; + std::cout << "content_revision=" << std::dec + << static_cast(smu.common_header.content_revision) << '\n'; + std::cout << "\n"; + std::cout << "TIME STAMPS (ns):\n"; std::cout << std::dec << "system_clock_counter=" << smu.system_clock_counter << '\n'; + std::cout << "firmware_timestamp (10ns resolution)=" << std::dec + << smu.firmware_timestamp << '\n'; + std::cout << "\n"; + std::cout << "TEMPERATURES (C):\n"; std::cout << std::dec << "temperature_edge=" << smu.temperature_edge << '\n'; std::cout << std::dec << "temperature_hotspot=" @@ -129,16 +147,39 @@ void TestGpuMetricsRead::Run(void) { << smu.temperature_vrsoc << '\n'; std::cout << std::dec << "temperature_vrmem=" << smu.temperature_vrmem << '\n'; + for (int i = 0; i < AMDSMI_NUM_HBM_INSTANCES; ++i) { + std::cout << "temperature_hbm[" << i << "]=" << std::dec << + smu.temperature_hbm[i] << '\n'; + } + std::cout << "\n"; + std::cout << "UTILIZATION (%):\n"; std::cout << std::dec << "average_gfx_activity=" << smu.average_gfx_activity << '\n'; std::cout << std::dec << "average_umc_activity=" << smu.average_umc_activity << '\n'; std::cout << std::dec << "average_mm_activity=" << smu.average_mm_activity << '\n'; + std::cout << std::dec << "jpeg_activity= ["; + uint16_t size = static_cast( + sizeof(smu.jpeg_activity)/sizeof(smu.jpeg_activity[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.jpeg_activity[i] << ", "; + } else { + std::cout << std::dec << smu.jpeg_activity[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "\n"; + std::cout << "POWER (W)/ENERGY (15.259uJ per 1ns):\n"; std::cout << std::dec << "average_socket_power=" << smu.average_socket_power << '\n'; + std::cout << std::dec << "current_socket_power=" + << smu.current_socket_power << '\n'; std::cout << std::dec << "energy_accumulator=" << smu.energy_accumulator << '\n'; + std::cout << "\n"; + std::cout << "AVG CLOCKS (MHz):\n"; std::cout << std::dec << "average_gfxclk_frequency=" << smu.average_gfxclk_frequency << '\n'; std::cout << std::dec << "average_gfxclk_frequency=" @@ -153,417 +194,820 @@ void TestGpuMetricsRead::Run(void) { << smu.average_vclk1_frequency << '\n'; std::cout << std::dec << "average_dclk1_frequency=" << smu.average_dclk1_frequency << '\n'; + std::cout << "\n"; + std::cout << "CURRENT CLOCKS (MHz):\n"; std::cout << std::dec << "current_gfxclk=" << smu.current_gfxclk << '\n'; + std::cout << std::dec << "current_gfxclks= ["; + size = static_cast( + sizeof(smu.current_gfxclks)/sizeof(smu.current_gfxclks[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.current_gfxclks[i] << ", "; + } else { + std::cout << std::dec << smu.current_gfxclks[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << std::dec << "current_socclk=" << smu.current_socclk << '\n'; + std::cout << std::dec << "current_socclks= ["; + size = static_cast( + sizeof(smu.current_socclks)/sizeof(smu.current_socclks[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.current_socclks[i] << ", "; + } else { + std::cout << std::dec << smu.current_socclks[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << std::dec << "current_uclk=" << smu.current_uclk << '\n'; std::cout << std::dec << "current_vclk0=" << smu.current_vclk0 << '\n'; + std::cout << std::dec << "current_vclk0s= ["; + size = static_cast( + sizeof(smu.current_vclk0s)/sizeof(smu.current_vclk0s[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.current_vclk0s[i] << ", "; + } else { + std::cout << std::dec << smu.current_vclk0s[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << std::dec << "current_dclk0=" << smu.current_dclk0 << '\n'; + std::cout << std::dec << "current_dclk0s= ["; + size = static_cast( + sizeof(smu.current_dclk0s)/sizeof(smu.current_dclk0s[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.current_dclk0s[i] << ", "; + } else { + std::cout << std::dec << smu.current_dclk0s[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << std::dec << "current_vclk1=" << smu.current_vclk1 << '\n'; std::cout << std::dec << "current_dclk1=" << smu.current_dclk1 << '\n'; + std::cout << "\n"; + std::cout << "TROTTLE STATUS:\n"; std::cout << std::dec << "throttle_status=" << smu.throttle_status << '\n'; + std::cout << "\n"; + std::cout << "FAN SPEED:\n"; std::cout << std::dec << "current_fan_speed=" << smu.current_fan_speed << '\n'; + std::cout << "\n"; + std::cout << "LINK WIDTH (number of lanes) /SPEED (0.1 GT/s):\n"; std::cout << "pcie_link_width=" << std::to_string(smu.pcie_link_width) << '\n'; - std::cout << "pcie_link_width=" + std::cout << "pcie_link_speed=" << std::to_string(smu.pcie_link_speed) << '\n'; + std::cout << "xgmi_link_width=" + << std::to_string(smu.xgmi_link_width) << '\n'; + std::cout << "xgmi_link_speed=" + << std::to_string(smu.xgmi_link_speed) << '\n'; + + std::cout << "\n"; + std::cout << "Utilization Accumulated(%):\n"; std::cout << "gfx_activity_acc=" << std::dec << smu.gfx_activity_acc << '\n'; std::cout << "mem_activity_acc=" << std::dec << smu.mem_activity_acc << '\n'; - for (int i = 0; i < AMDSMI_NUM_HBM_INSTANCES; ++i) { - std::cout << "temperature_hbm[" << i << "]=" << std::dec << - smu.temperature_hbm[i] << '\n'; + std::cout << "\n"; + std::cout << "XGMI ACCUMULATED DATA TRANSFER SIZE (KB):\n"; + std::cout << std::dec << "xgmi_read_data_acc= ["; + size = static_cast( + sizeof(smu.xgmi_read_data_acc)/sizeof(smu.xgmi_read_data_acc[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.xgmi_read_data_acc[i] << ", "; + } else { + std::cout << std::dec << smu.xgmi_read_data_acc[i]; + } } + std::cout << std::dec << "]\n"; + std::cout << std::dec << "xgmi_write_data_acc= ["; + size = static_cast( + sizeof(smu.xgmi_write_data_acc)/sizeof(smu.xgmi_write_data_acc[0])); + for (uint16_t i= 0; i < size; i++) { + if (i+1 < size) { + std::cout << std::dec << smu.xgmi_write_data_acc[i] << ", "; + } else { + std::cout << std::dec << smu.xgmi_write_data_acc[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "mem_bandwidth_acc=" << std::dec + << smu.mem_bandwidth_acc << "\n"; + std::cout << "mem_max_bandwidth=" << std::dec + << smu.mem_max_bandwidth << "\n"; + std::cout << "pcie_nak_sent_count_acc=" << std::dec + << smu.pcie_nak_sent_count_acc << "\n"; + std::cout << "pcie_nak_rcvd_count_acc=" << std::dec + << smu.pcie_nak_rcvd_count_acc << "\n"; } } // Verify api support checking functionality is working err = amdsmi_get_gpu_metrics_info(processor_handles_[i], nullptr); + DISPLAY_AMDSMI_ERR(err); ASSERT_EQ(err, AMDSMI_STATUS_INVAL); } - // + auto val_ui16 = uint16_t(0); auto val_ui32 = uint32_t(0); auto val_ui64 = uint64_t(0); auto status_code(amdsmi_status_t::AMDSMI_STATUS_SUCCESS); for (uint32_t i = 0; i < num_monitor_devs(); ++i) { PrintDeviceHeader(processor_handles_[i]); + std::cout << "Device #" << std::to_string(i) << "\n"; auto temp_edge_value = val_ui16; status_code = amdsmi_get_gpu_metrics_temp_edge(processor_handles_[i], &temp_edge_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_edge(): " << status_string << "\n"; } auto temp_hotspot_value = val_ui16; status_code = amdsmi_get_gpu_metrics_temp_hotspot(processor_handles_[i], &temp_hotspot_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_hotspot(): " << status_string << "\n"; + } auto temp_mem_value = val_ui16; status_code = amdsmi_get_gpu_metrics_temp_mem(processor_handles_[i], &temp_mem_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_mem(): " << status_string << "\n"; + } auto temp_vrgfx_value = val_ui16; status_code = amdsmi_get_gpu_metrics_temp_vrgfx(processor_handles_[i], &temp_vrgfx_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_vrgfx(): " << status_string << "\n"; } auto temp_vrsoc_value = val_ui16; status_code = amdsmi_get_gpu_metrics_temp_vrsoc(processor_handles_[i], &temp_vrsoc_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_vrsoc(): " << status_string << "\n"; + } auto temp_vrmem_value = val_ui16; status_code = amdsmi_get_gpu_metrics_temp_vrmem(processor_handles_[i], &temp_vrmem_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_vrmem(): " << status_string << "\n"; } gpu_metric_temp_hbm_t temp_hbm_values; status_code = amdsmi_get_gpu_metrics_temp_hbm(processor_handles_[i], &temp_hbm_values); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_hbm(): " << status_string << "\n"; + } auto temp_curr_socket_power_value = val_ui16; status_code = amdsmi_get_gpu_metrics_curr_socket_power(processor_handles_[i], &temp_curr_socket_power_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_socket_power(): " << status_string << "\n"; } auto temp_energy_accum_value = val_ui64; status_code = amdsmi_get_gpu_metrics_energy_acc(processor_handles_[i], &temp_energy_accum_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_energy_acc(): " << status_string << "\n"; + } auto temp_avg_socket_power_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_socket_power(processor_handles_[i], &temp_avg_socket_power_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_temp_edge(): " << status_string << "\n"; } auto temp_avg_gfx_activity_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_gfx_activity(processor_handles_[i], &temp_avg_gfx_activity_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_gfx_activity(): " << status_string << "\n"; + } auto temp_avg_umc_activity_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_umc_activity(processor_handles_[i], &temp_avg_umc_activity_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_umc_activity(): " << status_string << "\n"; + } auto temp_avg_mm_activity_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_mm_activity(processor_handles_[i], &temp_avg_mm_activity_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_mm_activity(): " << status_string << "\n"; } gpu_metric_vcn_activity_t temp_vcn_values; status_code = amdsmi_get_gpu_metrics_vcn_activity(processor_handles_[i], &temp_vcn_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_vcn_activity(): " << status_string << "\n"; } auto temp_mem_activity_accum_value = val_ui32; status_code = amdsmi_get_gpu_metrics_mem_activity_acc(processor_handles_[i], &temp_mem_activity_accum_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_mem_activity_acc(): " << status_string << "\n"; + } auto temp_gfx_activity_accum_value = val_ui32; status_code = amdsmi_get_gpu_metrics_gfx_activity_acc(processor_handles_[i], &temp_gfx_activity_accum_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_gfx_activity_acc(): " << status_string << "\n"; + } auto temp_avg_gfx_clock_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_gfx_clock_frequency(processor_handles_[i], &temp_avg_gfx_clock_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_gfx_clock_frequency(): " << status_string << "\n"; } auto temp_avg_soc_clock_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_soc_clock_frequency(processor_handles_[i], &temp_avg_soc_clock_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_soc_clock_frequency(): " << status_string << "\n"; } auto temp_avg_uclock_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_uclock_frequency(processor_handles_[i], &temp_avg_uclock_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_uclock_frequency(): " << status_string << "\n"; } auto temp_avg_vclock0_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_vclock0_frequency(processor_handles_[i], &temp_avg_vclock0_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_vclock0_frequency(): " << status_string << "\n"; } auto temp_avg_dclock0_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_dclock0_frequency(processor_handles_[i], &temp_avg_dclock0_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_dclock0_frequency(): " << status_string << "\n"; } auto temp_avg_vclock1_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_vclock1_frequency(processor_handles_[i], &temp_avg_vclock1_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_vclock1_frequency(): " << status_string << "\n"; } auto temp_avg_dclock1_freq_value = val_ui16; status_code = amdsmi_get_gpu_metrics_avg_dclock1_frequency(processor_handles_[i], &temp_avg_dclock1_freq_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_avg_dclock1_frequency(): " << status_string << "\n"; } auto temp_curr_vclk1_value = val_ui16; status_code = amdsmi_get_gpu_metrics_curr_vclk1(processor_handles_[i], &temp_curr_vclk1_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_vclk1(): " << status_string << "\n"; } auto temp_curr_dclk1_value = val_ui16; status_code = amdsmi_get_gpu_metrics_curr_dclk1(processor_handles_[i], &temp_curr_dclk1_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_dclk1(): " << status_string << "\n"; } auto temp_curr_uclk_value = val_ui16; status_code = amdsmi_get_gpu_metrics_curr_uclk(processor_handles_[i], &temp_curr_uclk_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_uclk(): " << status_string << "\n"; + } gpu_metric_curr_dclk0_t temp_curr_dclk0_values; status_code = amdsmi_get_gpu_metrics_curr_dclk0(processor_handles_[i], &temp_curr_dclk0_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_dclk0(): " << status_string << "\n"; } gpu_metric_curr_gfxclk_t temp_curr_gfxclk_values; status_code = amdsmi_get_gpu_metrics_curr_gfxclk(processor_handles_[i], &temp_curr_gfxclk_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_gfxclk(): " << status_string << "\n"; } gpu_metric_curr_socclk_t temp_curr_socclk_values; status_code = amdsmi_get_gpu_metrics_curr_socclk(processor_handles_[i], &temp_curr_socclk_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_socclk(): " << status_string << "\n"; } gpu_metric_curr_vclk0_t temp_curr_vclk0_values; status_code = amdsmi_get_gpu_metrics_curr_vclk0(processor_handles_[i], &temp_curr_vclk0_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_vclk0(): " << status_string << "\n"; } auto temp_indep_throttle_status_value = val_ui64; status_code = amdsmi_get_gpu_metrics_indep_throttle_status(processor_handles_[i], &temp_indep_throttle_status_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_indep_throttle_status(): " << status_string << "\n"; } auto temp_throttle_status_value = val_ui32; status_code = amdsmi_get_gpu_metrics_throttle_status(processor_handles_[i], &temp_throttle_status_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_throttle_status(): " << status_string << "\n"; + } auto temp_gfxclk_lock_status_value = val_ui32; status_code = amdsmi_get_gpu_metrics_gfxclk_lock_status(processor_handles_[i], &temp_gfxclk_lock_status_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_gfxclk_lock_status(): " << status_string << "\n"; } auto temp_curr_fan_speed_value = val_ui16; status_code = amdsmi_get_gpu_metrics_curr_fan_speed(processor_handles_[i], &temp_curr_fan_speed_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_curr_fan_speed(): " << status_string << "\n"; } auto temp_pcie_link_width_value = val_ui16; status_code = amdsmi_get_gpu_metrics_pcie_link_width(processor_handles_[i], &temp_pcie_link_width_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_link_width(): " << status_string << "\n"; + } auto temp_pcie_link_speed_value = val_ui16; status_code = amdsmi_get_gpu_metrics_pcie_link_speed(processor_handles_[i], &temp_pcie_link_speed_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_link_speed(): " << status_string << "\n"; + } auto temp_pcie_bandwidth_accum_value = val_ui64; status_code = amdsmi_get_gpu_metrics_pcie_bandwidth_acc(processor_handles_[i], &temp_pcie_bandwidth_accum_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_bandwidth_acc(): " << status_string << "\n"; } auto temp_pcie_bandwidth_inst_value = val_ui64; status_code = amdsmi_get_gpu_metrics_pcie_bandwidth_inst(processor_handles_[i], &temp_pcie_bandwidth_inst_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_bandwidth_inst(): " << status_string << "\n"; } auto temp_pcie_l0_recov_count_accum_value = val_ui64; status_code = amdsmi_get_gpu_metrics_pcie_l0_recov_count_acc(processor_handles_[i], &temp_pcie_l0_recov_count_accum_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_l0_recov_count_acc(): " << status_string << "\n"; } auto temp_pcie_replay_count_accum_value = val_ui64; status_code = amdsmi_get_gpu_metrics_pcie_replay_count_acc(processor_handles_[i], &temp_pcie_replay_count_accum_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_replay_count_acc(): " << status_string << "\n"; } auto temp_pcie_replay_rover_count_accum_value = val_ui64; status_code = amdsmi_get_gpu_metrics_pcie_replay_rover_count_acc(processor_handles_[i], &temp_pcie_replay_rover_count_accum_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_pcie_replay_rover_count_acc(): " << status_string << "\n"; } auto temp_xgmi_link_width_value = val_ui16; status_code = amdsmi_get_gpu_metrics_xgmi_link_width(processor_handles_[i], &temp_xgmi_link_width_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_link_width(): " << status_string << "\n"; } auto temp_xgmi_link_speed_value = val_ui16; status_code = amdsmi_get_gpu_metrics_xgmi_link_speed(processor_handles_[i], &temp_xgmi_link_speed_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_link_speed(): " << status_string << "\n"; } gpu_metric_xgmi_read_data_acc_t temp_xgmi_read_values; status_code = amdsmi_get_gpu_metrics_xgmi_read_data(processor_handles_[i], &temp_xgmi_read_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_read_data(): " << status_string << "\n"; } gpu_metric_xgmi_write_data_acc_t temp_xgmi_write_values; status_code = amdsmi_get_gpu_metrics_xgmi_write_data(processor_handles_[i], &temp_xgmi_write_values); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_xgmi_write_data(): " << status_string << "\n"; } auto temp_voltage_soc_value = val_ui16; status_code = amdsmi_get_gpu_metrics_volt_soc(processor_handles_[i], &temp_voltage_soc_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_volt_soc(): " << status_string << "\n"; } auto temp_voltage_gfx_value = val_ui16; status_code = amdsmi_get_gpu_metrics_volt_gfx(processor_handles_[i], &temp_voltage_gfx_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_volt_gfx(): " << status_string << "\n"; } auto temp_voltage_mem_value = val_ui16; status_code = amdsmi_get_gpu_metrics_volt_mem(processor_handles_[i], &temp_voltage_mem_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_volt_mem(): " << status_string << "\n"; } auto temp_system_clock_counter_value = val_ui64; status_code = amdsmi_get_gpu_metrics_system_clock_counter(processor_handles_[i], &temp_system_clock_counter_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_system_clock_counter(): " << status_string << "\n"; + } auto temp_firmware_timestamp_value = val_ui64; status_code = amdsmi_get_gpu_metrics_firmware_timestamp(processor_handles_[i], &temp_firmware_timestamp_value); - CHK_ERR_ASRT(status_code); + if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_firmware_timestamp(): " << status_string << "\n"; + } auto temp_xcd_counter_value = val_ui16; status_code = amdsmi_get_gpu_metrics_xcd_counter(processor_handles_[i], &temp_xcd_counter_value); if (status_code != AMDSMI_STATUS_NOT_SUPPORTED) { CHK_ERR_ASRT(status_code); + } else { + const char *status_string; + amdsmi_status_code_to_string(status_code, &status_string); + std::cout << "\t\t** amdsmi_get_gpu_metrics_xcd_counter(): " << status_string << "\n"; } IF_VERB(STANDARD) { std::cout << "\n"; std::cout << "\t[Temperature]" << "\n"; - std::cout << "\t -> temp_edge(): " << temp_edge_value << "\n"; - std::cout << "\t -> temp_hotspot(): " << temp_hotspot_value << "\n"; - std::cout << "\t -> temp_mem(): " << temp_mem_value << "\n"; - std::cout << "\t -> temp_vrgfx(): " << temp_vrgfx_value << "\n"; - std::cout << "\t -> temp_vrsoc(): " << temp_vrsoc_value << "\n"; - std::cout << "\t -> temp_vrmem(): " << temp_vrmem_value << "\n"; - std::cout << "\t -> temp_hbm(): " << temp_hbm_values << "\n"; + std::cout << "\t -> temp_edge(): " << std::dec << temp_edge_value << "\n"; + std::cout << "\t -> temp_hotspot(): " << std::dec << temp_hotspot_value << "\n"; + std::cout << "\t -> temp_mem(): " << std::dec << temp_mem_value << "\n"; + std::cout << "\t -> temp_vrgfx(): " << std::dec << temp_vrgfx_value << "\n"; + std::cout << "\t -> temp_vrsoc(): " << std::dec << temp_vrsoc_value << "\n"; + std::cout << "\t -> temp_vrmem(): " << std::dec << temp_vrmem_value << "\n"; + std::cout << "\t -> temp_hbm(temp_hbm_values): ["; + uint16_t size = static_cast( + sizeof(temp_hbm_values) / sizeof(temp_hbm_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_hbm_values[i] << ", "; + } else { + std::cout << std::dec << temp_hbm_values[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << "\n"; std::cout << "\t[Power/Energy]" << "\n"; - std::cout << "\t -> current_socket_power(): " << temp_curr_socket_power_value << "\n"; - std::cout << "\t -> energy_accum(): " << temp_energy_accum_value << "\n"; - std::cout << "\t -> average_socket_power(): " << temp_avg_socket_power_value << "\n"; + std::cout << "\t -> current_socket_power(): " << std::dec << temp_curr_socket_power_value << "\n"; + std::cout << "\t -> energy_accum(): " << std::dec << temp_energy_accum_value << "\n"; + std::cout << "\t -> average_socket_power(): " << std::dec << temp_avg_socket_power_value << "\n"; std::cout << "\n"; std::cout << "\t[Utilization]" << "\n"; - std::cout << "\t -> average_gfx_activity(): " << temp_avg_gfx_activity_value << "\n"; - std::cout << "\t -> average_umc_activity(): " << temp_avg_umc_activity_value << "\n"; - std::cout << "\t -> average_mm_activity(): " << temp_avg_mm_activity_value << "\n"; - std::cout << "\t -> vcn_activity(): " << temp_vcn_values << "\n"; - std::cout << "\t -> mem_activity_accum(): " << temp_mem_activity_accum_value << "\n"; - std::cout << "\t -> gfx_activity_accum(): " << temp_gfx_activity_accum_value << "\n"; + std::cout << "\t -> average_gfx_activity(): " << std::dec << temp_avg_gfx_activity_value << "\n"; + std::cout << "\t -> average_umc_activity(): " << std::dec << temp_avg_umc_activity_value << "\n"; + std::cout << "\t -> average_mm_activity(): " << std::dec << temp_avg_mm_activity_value << "\n"; + std::cout << "\t -> vcn_activity(temp_vcn_values): ["; + size = static_cast( + sizeof(temp_vcn_values) / sizeof(temp_vcn_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_vcn_values[i] << ", "; + } else { + std::cout << std::dec << temp_vcn_values[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "\t -> mem_activity_accum(): " << std::dec << temp_mem_activity_accum_value << "\n"; + std::cout << "\t -> gfx_activity_accum(): " << std::dec << temp_gfx_activity_accum_value << "\n"; std::cout << "\n"; std::cout << "\t[Average Clock]" << "\n"; - std::cout << "\t -> average_gfx_clock_frequency(): " << temp_avg_gfx_clock_freq_value << "\n"; - std::cout << "\t -> average_soc_clock_frequency(): " << temp_avg_soc_clock_freq_value << "\n"; - std::cout << "\t -> average_uclock_frequency(): " << temp_avg_uclock_freq_value << "\n"; - std::cout << "\t -> average_vclock0_frequency(): " << temp_avg_vclock0_freq_value << "\n"; - std::cout << "\t -> average_dclock0_frequency(): " << temp_avg_dclock0_freq_value << "\n"; - std::cout << "\t -> average_vclock1_frequency(): " << temp_avg_vclock1_freq_value << "\n"; - std::cout << "\t -> average_dclock1_frequency(): " << temp_avg_dclock1_freq_value << "\n"; + std::cout << "\t -> average_gfx_clock_frequency(): " << std::dec << temp_avg_gfx_clock_freq_value << "\n"; + std::cout << "\t -> average_soc_clock_frequency(): " << std::dec << temp_avg_soc_clock_freq_value << "\n"; + std::cout << "\t -> average_uclock_frequency(): " << std::dec << temp_avg_uclock_freq_value << "\n"; + std::cout << "\t -> average_vclock0_frequency(): " << std::dec << std::dec << temp_avg_vclock0_freq_value << "\n"; + std::cout << "\t -> average_dclock0_frequency(): " << std::dec << temp_avg_dclock0_freq_value << "\n"; + std::cout << "\t -> average_vclock1_frequency(): " << std::dec << temp_avg_vclock1_freq_value << "\n"; + std::cout << "\t -> average_dclock1_frequency(): " << std::dec << temp_avg_dclock1_freq_value << "\n"; std::cout << "\n"; std::cout << "\t[Current Clock]" << "\n"; - std::cout << "\t -> current_vclock1(): " << temp_curr_vclk1_value << "\n"; - std::cout << "\t -> current_dclock1(): " << temp_curr_dclk1_value << "\n"; - std::cout << "\t -> current_uclock(): " << temp_curr_uclk_value << "\n"; - std::cout << "\t -> current_dclk0(): " << temp_curr_dclk0_values << "\n"; - std::cout << "\t -> current_gfxclk(): " << temp_curr_gfxclk_values << "\n"; - std::cout << "\t -> current_soc_clock(): " << temp_curr_socclk_values << "\n"; - std::cout << "\t -> current_vclk0(): " << temp_curr_vclk0_values << "\n"; + std::cout << "\t -> current_vclock1(): " << std::dec << temp_curr_vclk1_value << "\n"; + std::cout << "\t -> current_dclock1(): " << std::dec << temp_curr_dclk1_value << "\n"; + std::cout << "\t -> current_uclock(): " << std::dec << temp_curr_uclk_value << "\n"; + std::cout << "\t -> current_dclk0(temp_curr_dclk0_values): ["; + size = static_cast( + sizeof(temp_curr_dclk0_values) / sizeof(temp_curr_dclk0_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_curr_dclk0_values[i] << ", "; + } else { + std::cout << std::dec << temp_curr_dclk0_values[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "\t -> current_gfxclk(temp_curr_gfxclk_values): ["; + size = static_cast( + sizeof(temp_curr_gfxclk_values) / sizeof(temp_curr_gfxclk_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_curr_gfxclk_values[i] << ", "; + } else { + std::cout << std::dec << temp_curr_gfxclk_values[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "\t -> current_soc_clock(temp_curr_socclk_values): ["; + size = static_cast( + sizeof(temp_curr_socclk_values) / sizeof(temp_curr_socclk_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_curr_socclk_values[i] << ", "; + } else { + std::cout << std::dec << temp_curr_socclk_values[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "\t -> current_vclk0(temp_curr_vclk0_values): ["; + size = static_cast( + sizeof(temp_curr_vclk0_values) / sizeof(temp_curr_vclk0_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_curr_vclk0_values[i] << ", "; + } else { + std::cout << std::dec << temp_curr_vclk0_values[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << "\n"; std::cout << "\t[Throttle]" << "\n"; - std::cout << "\t -> indep_throttle_status(): " << temp_indep_throttle_status_value << "\n"; - std::cout << "\t -> throttle_status(): " << temp_throttle_status_value << "\n"; + std::cout << "\t -> indep_throttle_status(): " << std::dec << temp_indep_throttle_status_value << "\n"; + std::cout << "\t -> throttle_status(): " << std::dec << temp_throttle_status_value << "\n"; std::cout << "\n"; std::cout << "\t[Gfx Clock Lock]" << "\n"; - std::cout << "\t -> gfxclk_lock_status(): " << temp_gfxclk_lock_status_value << "\n"; + std::cout << "\t -> gfxclk_lock_status(): " << std::dec << temp_gfxclk_lock_status_value << "\n"; std::cout << "\n"; std::cout << "\t[Current Fan Speed]" << "\n"; - std::cout << "\t -> current_fan_speed(): " << temp_curr_fan_speed_value << "\n"; + std::cout << "\t -> current_fan_speed(): " << std::dec << temp_curr_fan_speed_value << "\n"; std::cout << "\n"; std::cout << "\t[Link/Bandwidth/Speed]" << "\n"; - std::cout << "\t -> pcie_link_width(): " << temp_pcie_link_width_value << "\n"; - std::cout << "\t -> pcie_link_speed(): " << temp_pcie_link_speed_value << "\n"; - std::cout << "\t -> pcie_bandwidth_accum(): " << temp_pcie_bandwidth_accum_value << "\n"; - std::cout << "\t -> pcie_bandwidth_inst(): " << temp_pcie_bandwidth_inst_value << "\n"; - std::cout << "\t -> pcie_l0_recov_count_accum(): " << temp_pcie_l0_recov_count_accum_value << "\n"; - std::cout << "\t -> pcie_replay_count_accum(): " << temp_pcie_replay_count_accum_value << "\n"; - std::cout << "\t -> pcie_replay_rollover_count_accum(): " << temp_pcie_replay_rover_count_accum_value << "\n"; - std::cout << "\t -> xgmi_link_width(): " << temp_xgmi_link_width_value << "\n"; - std::cout << "\t -> xgmi_link_speed(): " << temp_xgmi_link_speed_value << "\n"; - std::cout << "\t -> xgmi_read_data(): " << temp_xgmi_read_values << "\n"; - std::cout << "\t -> xgmi_write_data(): " << temp_xgmi_write_values << "\n"; + std::cout << "\t -> pcie_link_width(): " << std::dec << temp_pcie_link_width_value << "\n"; + std::cout << "\t -> pcie_link_speed(): " << std::dec << temp_pcie_link_speed_value << "\n"; + std::cout << "\t -> pcie_bandwidth_accum(): " << std::dec << std::dec << temp_pcie_bandwidth_accum_value << "\n"; + std::cout << "\t -> pcie_bandwidth_inst(): " << std::dec << temp_pcie_bandwidth_inst_value << "\n"; + std::cout << "\t -> pcie_l0_recov_count_accum(): " << std::dec << std::dec << temp_pcie_l0_recov_count_accum_value << "\n"; + std::cout << "\t -> pcie_replay_count_accum(): " << std::dec << temp_pcie_replay_count_accum_value << "\n"; + std::cout << "\t -> pcie_replay_rollover_count_accum(): " << std::dec << temp_pcie_replay_rover_count_accum_value << "\n"; + std::cout << "\t -> xgmi_link_width(): " << std::dec << temp_xgmi_link_width_value << "\n"; + std::cout << "\t -> xgmi_link_speed(): " << std::dec << std::dec << temp_xgmi_link_speed_value << "\n"; + std::cout << "\t -> xgmi_read_data(temp_xgmi_read_values): "; + size = static_cast( + sizeof(temp_xgmi_read_values) / sizeof(temp_xgmi_read_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_xgmi_read_values[i] << ", "; + } else { + std::cout << std::dec << temp_xgmi_read_values[i]; + } + } + std::cout << std::dec << "]\n"; + std::cout << "\t -> xgmi_write_data(temp_xgmi_write_values): ["; + size = static_cast( + sizeof(temp_xgmi_write_values) / sizeof(temp_xgmi_write_values[0])); + for (uint16_t i = 0; i < size; i++) { + if (i + 1 < size) { + std::cout << std::dec << temp_xgmi_write_values[i] << ", "; + } else { + std::cout << std::dec << temp_xgmi_write_values[i]; + } + } + std::cout << std::dec << "]\n"; std::cout << "\n"; std::cout << "\t[Voltage]" << "\n"; - std::cout << "\t -> voltage_soc(): " << temp_voltage_soc_value << "\n"; - std::cout << "\t -> voltage_gfx(): " << temp_voltage_gfx_value << "\n"; - std::cout << "\t -> voltage_mem(): " << temp_voltage_mem_value << "\n"; + std::cout << "\t -> voltage_soc(): " << std::dec << temp_voltage_soc_value << "\n"; + std::cout << "\t -> voltage_gfx(): " << std::dec << temp_voltage_gfx_value << "\n"; + std::cout << "\t -> voltage_mem(): " << std::dec << temp_voltage_mem_value << "\n"; std::cout << "\n"; std::cout << "\t[Timestamp]" << "\n"; - std::cout << "\t -> system_clock_counter(): " << temp_system_clock_counter_value << "\n"; - std::cout << "\t -> firmware_timestamp(): " << temp_firmware_timestamp_value << "\n"; + std::cout << "\t -> system_clock_counter(): " << std::dec << temp_system_clock_counter_value << "\n"; + std::cout << "\t -> firmware_timestamp(): " << std::dec << temp_firmware_timestamp_value << "\n"; std::cout << "\n"; - std::cout << "\t[XCD CounterVoltage]" << "\n"; - std::cout << "\t -> xcd_counter(): " << temp_xcd_counter_value << "\n"; + std::cout << "\t[XCD Counter]" << "\n"; + std::cout << "\t -> xcd_counter(): " << std::dec << temp_xcd_counter_value << "\n"; std::cout << "\n\n"; } }