diff --git a/CMakeLists.txt b/CMakeLists.txt index b3eff5d9fb..9490c73c48 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,6 +14,9 @@ endif() ## Include common cmake modules include(utils) +# Default libdir to "lib", this skips GNUInstallDirs from trying to take a guess if it's unset: +set(CMAKE_INSTALL_LIBDIR "lib" CACHE STRING "Library install directory") + if (NOT DEFINED CPACK_RESOURCE_FILE_LICENSE) set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/License.txt") endif() diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index 28f0bd795b..120669d7cf 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -1134,6 +1134,8 @@ typedef struct { uint32_t cu_occupancy; //!< Compute Unit usage in percent } rsmi_process_info_t; +//! CU occupancy invalidation value for the GFX revisions not providing cu_occupancy debugfs method +#define CU_OCCUPANCY_INVALID 0xFFFFFFFF /** * @brief Opaque handle to function-support object @@ -1622,6 +1624,54 @@ rsmi_status_t rsmi_dev_unique_id_get(uint32_t dv_ind, uint64_t *id); */ rsmi_status_t rsmi_dev_xgmi_physical_id_get(uint32_t dv_ind, uint16_t *id); +/** + * @brief Get the GUID, also known as the GPU device id, + * associated with the provided device index indicated by KFD. + * + * @details Given a device index @p dv_ind and a pointer to a uint64_t + * @p guid, this function will write the KFD GPU id value to the + * uint64_t pointed to by @p guid. + * + * @param[in] dv_ind a device index + * + * @param[inout] gpu_id a pointer to uint64_t to which the KFD gpu id will be + * written. If the @p guid parameter is nullptr, this function will return + * ::RSMI_STATUS_INVALID_ARGS. If the GPU ID is not supported with + * the device index queried, gpu_id will return MAX UINT64 value an + * arguments and ::RSMI_STATUS_NOT_SUPPORTED as a response. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * + */ +rsmi_status_t rsmi_dev_guid_get(uint32_t dv_ind, uint64_t *guid); + +/** + * @brief Get the node id associated with the provided device index + * indicated by KFD. + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t + * @p node_id, this function will write the KFD node id value to the + * uint32_t pointed to by @p node_id. + * + * @param[in] dv_ind a device index + * + * @param[inout] node_id a pointer to uint64_t to which the KFD gpu id will be + * written. If the @p node_id parameter is nullptr, this function will return + * ::RSMI_STATUS_INVALID_ARGS. If @p node_id is not supported with + * the device index queried, @p node_id will return MAX UINT64 value as an + * argument and ::RSMI_STATUS_NOT_SUPPORTED as a response. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + * + */ +rsmi_status_t rsmi_dev_node_id_get(uint32_t dv_ind, uint32_t *node_id); + /** @} */ // end of IDQuer @@ -1822,7 +1872,7 @@ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask); * backwards compatibility, which looks at both average and current power * values. Whereas ::rsmi_dev_power_ave_get only looks for average power * consumption. Newer ASICs will support current power only. - * + * * @param[in] dv_ind a device index * * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. @@ -3203,7 +3253,7 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block, uint64_t *fw_version); /** - * @brief Get the graphics version for a GPU device + * @brief Get the target graphics version for a GPU device * * @details Given a device ID @p dv_ind and a uint64_t pointer * @p gfx_version, this function will write the graphics version. @@ -4439,1054 +4489,6 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind); * @{ */ -/** - * Metric multi-valued counter types - */ -typedef uint16_t GPUMetricTempHbm_t[RSMI_NUM_HBM_INSTANCES]; -typedef uint16_t GPUMetricVcnActivity_t[RSMI_MAX_NUM_VCNS]; -typedef uint16_t GPUMetricJpegActivity_t[RSMI_MAX_NUM_JPEG_ENGS]; -typedef uint64_t GPUMetricXgmiReadDataAcc_t[RSMI_MAX_NUM_XGMI_LINKS]; -typedef uint64_t GPUMetricXgmiWriteDataAcc_t[RSMI_MAX_NUM_XGMI_LINKS]; -typedef uint16_t GPUMetricCurrGfxClk_t[RSMI_MAX_NUM_GFX_CLKS]; -typedef uint16_t GPUMetricCurrSocClk_t[RSMI_MAX_NUM_CLKS]; -typedef uint16_t GPUMetricCurrVClk0_t[RSMI_MAX_NUM_CLKS]; -typedef uint16_t GPUMetricCurrDClk0_t[RSMI_MAX_NUM_CLKS]; - - -/****** - * Metric single-valued counter types - */ - -/** - * @brief Get the 'temp_hotspot' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_hotspot' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] hotspot_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_hotspot_get(uint32_t dv_ind, uint16_t* hotspot_value); - -/** - * @brief Get the 'temp_mem' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_mem' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] mem_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_mem_get(uint32_t dv_ind, uint16_t* mem_value); - -/** - * @brief Get the 'temp_vrsoc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_vrsoc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] vrsoc_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_vrsoc_get(uint32_t dv_ind, uint16_t* vrsoc_value); - -/** - * @brief Get the 'curr_socket_power' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'socket_power' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] socket_power_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value); - -/** - * @brief Get the 'avg_gfx_activity' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'gfx_activity' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] gfx_activity_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_gfx_activity_get(uint32_t dv_ind, uint16_t* gfx_activity_value); - -/** - * @brief Get the 'avg_umc_activity' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'umc_activity' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] umc_activity_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_umc_activity_get(uint32_t dv_ind, uint16_t* umc_activity_value); - -/** - * @brief Get the 'energy_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'energy_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] energy_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_energy_acc_get(uint32_t dv_ind, uint64_t* energy_acc_value); - -/** - * @brief Get the 'system_clock_counter' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'system_clock_counter' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] system_clock_counter_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_system_clock_counter_get(uint32_t dv_ind, uint64_t* system_clock_counter_value); - -/** - * @brief Get the 'firmware_timestamp' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'firmware_timestamp' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] firmware_timestamp_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_firmware_timestamp_get(uint32_t dv_ind, uint64_t* firmware_timestamp_value); - -/** - * @brief Get the 'throttle_status' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t in which - * the 'throttle_status' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] throttle_status_value a pointer to uint32_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_throttle_status_get(uint32_t dv_ind, uint32_t* throttle_status_value); - -/** - * @brief Get the 'pcie_link_width' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'pcie_link_width' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_link_width_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_link_width_get(uint32_t dv_ind, uint16_t* pcie_link_width_value); - -/** - * @brief Get the 'pcie_link_speed' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'pcie_link_speed' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_link_speed_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_link_speed_get(uint32_t dv_ind, uint16_t* pcie_link_speed_value); - -/** - * @brief Get the 'xgmi_link_width' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'xgmi_link_width' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] xgmi_link_width_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_xgmi_link_width_get(uint32_t dv_ind, uint16_t* xgmi_link_width_value); - -/** - * @brief Get the 'xgmi_link_speed' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'xgmi_link_speed' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] xgmi_link_speed_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_xgmi_link_speed_get(uint32_t dv_ind, uint16_t* xgmi_link_speed_value); - -/** - * @brief Get the 'gfxclk_lock_status' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t in which - * the 'gfxclk_lock_status' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] gfxclk_lock_status_value a pointer to uint32_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_gfxclk_lock_status_get(uint32_t dv_ind, uint32_t* gfxclk_lock_status_value); - -/** - * @brief Get the 'gfx_activity_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t in which - * the 'gfx_activity_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] gfx_activity_acc_value a pointer to uint32_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_gfx_activity_acc_get(uint32_t dv_ind, uint32_t* gfx_activity_acc_value); - -/** - * @brief Get the 'mem_activity_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t in which - * the 'mem_activity_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] mem_activity_acc_value a pointer to uint32_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_mem_activity_acc_get(uint32_t dv_ind, uint32_t* mem_activity_acc_value); - -/** - * @brief Get the 'pcie_bandwidth_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'pcie_bandwidth_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_bandwidth_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_bandwidth_acc_get(uint32_t dv_ind, uint64_t* pcie_bandwidth_acc_value); - -/** - * @brief Get the 'pcie_bandwidth_inst' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'pcie_bandwidth_inst' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_bandwidth_inst_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_bandwidth_inst_get(uint32_t dv_ind, uint64_t* pcie_bandwidth_inst_value); - -/** - * @brief Get the 'pcie_l0_recov_count_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'pcie_l0_recov_count_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_count_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_l0_recov_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value); - -/** - * @brief Get the 'pcie_replay_count_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'pcie_replay_count_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_count_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_replay_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value); - -/** - * @brief Get the 'pcie_replay_rover_count_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'pcie_replay_rover_count_acc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_count_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_replay_rover_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value); - -/** - * @brief Get the 'pcie_nak_sent_count_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t in which - * the 'mem_max_bandwidth_usage' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_nak_sent_count_acc_value a pointer to uint32_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_nak_sent_count_acc_get(uint32_t dv_ind, uint32_t* pcie_nak_sent_count_acc_value); - -/** - * @brief Get the 'pcie_nak_rcvd_count_acc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t in which - * the 'mem_max_bandwidth_usage' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] pcie_nak_rcvd_count_acc_value a pointer to uint32_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get(uint32_t dv_ind, uint32_t* pcie_nak_rcvd_count_acc_value); - -/** - * @brief Get the 'curr_uclk' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_uclk' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] uclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_uclk_get(uint32_t dv_ind, uint16_t* uclk_value); - - -/****** - * Metric multi-valued counter types - */ - -/** - * @brief Get the 'temp_hbm' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_hbm' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] temp_hbm_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding a 4 (RSMI_NUM_HBM_INSTANCES) - * element array (GPUMetricTempHbm_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_hbm_get(uint32_t dv_ind, GPUMetricTempHbm_t* temp_hbm_value); - -/** - * @brief Get the 'vcn_activity' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'vcn_activity' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] vcn_activity_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding a 4 (RSMI_MAX_NUM_VCNS) - * element array (GPUMetricVcnActivity_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_vcn_activity_get(uint32_t dv_ind, GPUMetricVcnActivity_t* vcn_activity_value); - -/** - * @brief Get the 'jpeg_activity' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'vcn_activity' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] jpeg_activity_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding a 32 (RSMI_MAX_NUM_JPEG_ENGS) - * element array (GPUMetricJpegActivity_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_jpeg_activity_get(uint32_t dv_ind, GPUMetricJpegActivity_t* jpeg_activity_value); - -/** - * @brief Get the 'xgmi_read_data' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'xgmi_read_data' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] xgmi_read_data_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding an 8 (RSMI_MAX_NUM_XGMI_LINKS) - * element array (GPUMetricXgmiReadDataAcc_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_xgmi_read_data_get(uint32_t dv_ind, GPUMetricXgmiReadDataAcc_t* xgmi_read_data_acc_value); - -/** - * @brief Get the 'xgmi_write_data' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'xgmi_write_data' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] xgmi_write_data_acc_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding an 8 (RSMI_MAX_NUM_XGMI_LINKS) - * element array (GPUMetricXgmiWriteDataAcc_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_xgmi_write_data_get(uint32_t dv_ind, GPUMetricXgmiWriteDataAcc_t* xgmi_write_data_acc_value); - -/** - * @brief Get the 'curr_gfxclk' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'curr_gfxclk' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] current_gfxclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding an 8 (RSMI_MAX_NUM_GFX_CLKS) - * element array (GPUMetricCurrGfxClk_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_gfxclk_get(uint32_t dv_ind, GPUMetricCurrGfxClk_t* current_gfxclk_value); - -/** - * @brief Get the 'curr_socclk' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_socclk' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] current_socclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding a 4 (RSMI_MAX_NUM_CLKS) - * element array (GPUMetricCurrSocClk_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_socclk_get(uint32_t dv_ind, GPUMetricCurrSocClk_t* current_socclk_value); - -/** - * @brief Get the 'curr_vclk0' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_vclk0' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] current_vclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding a 4 (RSMI_MAX_NUM_CLKS) - * element array (GPUMetricCurrVClk0_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_vclk0_get(uint32_t dv_ind, GPUMetricCurrVClk0_t* current_vclk_value); - -/** - * @brief Get the 'curr_dclk0' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_dclk0' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] current_dclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - This is a multi-valued counter holding a 4 (RSMI_MAX_NUM_CLKS) - * element array (GPUMetricCurrDClk0_t) - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_dclk0_get(uint32_t dv_ind, GPUMetricCurrDClk0_t* current_dclk_value); - -/** - * @brief Get the 'temp_edge' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_edge' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] edge_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_edge_get(uint32_t dv_ind, uint16_t* edge_value); - -/** - * @brief Get the 'temp_vrgfx' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_vrgfx' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] vrgfx_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_vrgfx_get(uint32_t dv_ind, uint16_t* vrgfx_value); - -/** - * @brief Get the 'temp_vrmem' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'temp_vrmem' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] vrmem_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_temp_vrmem_get(uint32_t dv_ind, uint16_t* vrmem_value); - -/** - * @brief Get the 'avg_mm_activity' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_mm_activity' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] mm_activity_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_mm_activity_get(uint32_t dv_ind, uint16_t* mm_activity_value); - -/** - * @brief Get the 'curr_vclk1' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_vclk1' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] current_vclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_vclk1_get(uint32_t dv_ind, uint16_t* current_vclk_value); - -/** - * @brief Get the 'curr_dclk1' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_dclk1' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] current_dclk_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_dclk1_get(uint32_t dv_ind, uint16_t* current_dclk_value); - -/** - * @brief Get the 'indep_throttle_status' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t in which - * the 'indep_throttle_status' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] throttle_status_value a pointer to uint64_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_indep_throttle_status_get(uint32_t dv_ind, uint64_t* throttle_status_value); - -/** - * @brief Get the 'avg_socket_power' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_socket_power' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] socket_power_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value); - -/** - * @brief Get the 'curr_fan_speed' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'curr_fan_speed' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] fan_speed_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_curr_fan_speed_get(uint32_t dv_ind, uint16_t* fan_speed_value); - -/** - * @brief Get the 'avg_gfx_clock_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_gfx_clock_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_gfx_clock_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'avg_soc_clock_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_soc_clock_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_soc_clock_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'avg_uclock_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_uclock_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_uclock_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'avg_vclock0_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_vclock0_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_vclock0_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'avg_dclock0_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_dclock0_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_dclock0_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'avg_vclock1_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_vclock1_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_vclock1_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'avg_dclock1_frequency' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'avg_dclock1_frequency' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] clock_frequency_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_avg_dclock1_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value); - -/** - * @brief Get the 'volt_soc' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'volt_soc' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] voltage_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_volt_soc_get(uint32_t dv_ind, uint16_t* voltage_value); - -/** - * @brief Get the 'volt_gfx' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'volt_gfx' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] voltage_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_volt_gfx_get(uint32_t dv_ind, uint16_t* voltage_value); - -/** - * @brief Get the 'volt_mem' from the GPU metrics associated with the device - * - * @details Given a device index @p dv_ind and a pointer to a uint16_t in which - * the 'volt_mem' will stored - * - * @param[in] dv_ind a device index - * - * @param[inout] voltage_value a pointer to uint16_t to which the device gpu - * metric unit will be stored - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * ::RSMI_STATUS_NOT_SUPPORTED is returned in case the metric unit - * does not exist for the given device - * - */ -rsmi_status_t -rsmi_dev_metrics_volt_mem_get(uint32_t dv_ind, uint16_t* voltage_value); - /** * @brief Get the 'metrics_header_info' from the GPU metrics associated with the device * diff --git a/include/rocm_smi/rocm_smi_kfd.h b/include/rocm_smi/rocm_smi_kfd.h index e13ea003ba..2759dfdab9 100755 --- a/include/rocm_smi/rocm_smi_kfd.h +++ b/include/rocm_smi/rocm_smi_kfd.h @@ -86,6 +86,10 @@ class KFDNode { // Get gfx target version from kfd int get_gfx_target_version(uint64_t* gfx_target_version); + // Get gpu_id (AKA GUID) version from kfd + int get_gpu_id(uint64_t *gpu_id); + // Get node id from kfd + int get_node_id(uint32_t *node_id); private: uint32_t node_indx_; diff --git a/include/rocm_smi/rocm_smi_utils.h b/include/rocm_smi/rocm_smi_utils.h index 18b1c1fcb5..47b1e94655 100755 --- a/include/rocm_smi/rocm_smi_utils.h +++ b/include/rocm_smi/rocm_smi_utils.h @@ -162,7 +162,8 @@ std::string print_unsigned_hex_and_int(T i, std::string heading="") { } ss << "Hex (MSB): " << print_int_as_hex(i) << ", " << "Unsigned int: " << print_unsigned_int(i) << ", " - << "Byte Size: " << sizeof(T); + << "Byte Size: " << sizeof(T) << ", " + << "Bits: " << sizeof(T) * 8; // 8 bits per 1 byte return ss.str(); } diff --git a/python_smi_tools/rocm_smi.py b/python_smi_tools/rocm_smi.py index 23a3f49c3c..ada8bd1784 100755 --- a/python_smi_tools/rocm_smi.py +++ b/python_smi_tools/rocm_smi.py @@ -254,7 +254,7 @@ def getGpuUse(device, silent=False): return -1 -def getId(device, silent=False): +def getDRMDeviceId(device, silent=False): """ Return the hexadecimal value of a device's ID @param device: DRM device identifier @@ -263,8 +263,10 @@ def getId(device, silent=False): """ dv_id = c_short() ret = rocmsmi.rsmi_dev_id_get(device, byref(dv_id)) + device_id_ret = "N/A" if rsmi_ret_ok(ret, device, 'get_device_id', silent): - return hex(dv_id.value) + device_id_ret = hex(dv_id.value) + return device_id_ret def getRev(device, silent=False): @@ -276,9 +278,103 @@ def getRev(device, silent=False): """ dv_rev = c_short() ret = rocmsmi.rsmi_dev_revision_get(device, byref(dv_rev)) - if rsmi_ret_ok(ret, device, 'get_device_rev', silent): - return hex(dv_rev.value) + revision_ret = "N/A" + if rsmi_ret_ok(ret, device, 'get_device_rev', silent=silent): + revision_ret = padHexValue(hex(dv_rev.value), 2) + return revision_ret +def getSubsystemId(device, silent=False): + """ Return the a device's subsystem id + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + model = create_string_buffer(MAX_BUFF_SIZE) + ret = rocmsmi.rsmi_dev_subsystem_name_get(device, model, MAX_BUFF_SIZE) + device_model = "N/A" + if rsmi_ret_ok(ret, device, 'get_subsystem_name', silent=silent): + device_model = model.value.decode() + # padHexValue is used for applications that expect 4-digit card models + device_model = padHexValue(device_model, 4) + return device_model + +def getVendor(device, silent=False): + """ Return the a device's vendor id + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + vendor = create_string_buffer(MAX_BUFF_SIZE) + device_vendor = "N/A" + # Retrieve card vendor + ret = rocmsmi.rsmi_dev_vendor_name_get(device, vendor, MAX_BUFF_SIZE) + # Only continue if GPU vendor is AMD + if rsmi_ret_ok(ret, device, 'get_vendor_name', silent) and isAmdDevice(device): + device_vendor = vendor.value.decode() + return device_vendor + +def getGUID(device, silent=False): + """ Return the uint64 value of device's GUID, + also referred as GPU ID - reported by KFD. + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + guid = c_uint64() + ret = rocmsmi.rsmi_dev_guid_get(device, byref(guid)) + guid_ret = "N/A" + if rsmi_ret_ok(ret, device, 'get_gpu_id_kfd', silent=silent): + guid_ret = guid.value + return guid_ret + +def getTargetGfxVersion(device, silent=False): + """ Return the uint64 value of device's target + graphics version as reported by KFD + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + gfx_version = c_uint64() + gfx_ver_ret = "N/A" + ret = rocmsmi.rsmi_dev_target_graphics_version_get(device, byref(gfx_version)) + if rsmi_ret_ok(ret, device, 'get_target_gfx_version', silent=silent): + gfx_ver_ret = "gfx" + str(gfx_version.value) + return gfx_ver_ret + +def getNodeId(device, silent=False): + """ Return the uint32 value of device's node id + reported by KFD. + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + node_id = c_uint32() + ret = rocmsmi.rsmi_dev_node_id_get(device, byref(node_id)) + node_id_ret = "N/A" + if rsmi_ret_ok(ret, device, 'get_node_id_kfd', silent=silent): + node_id_ret = node_id.value + return node_id_ret + +def getDeviceName(device, silent=False): + """ Return the uint64 value of device's target + graphics version as reported by KFD + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + # Retrieve the device series + series = create_string_buffer(MAX_BUFF_SIZE) + device_name_ret = "N/A" + ret = rocmsmi.rsmi_dev_name_get(device, series, MAX_BUFF_SIZE) + if rsmi_ret_ok(ret, device, 'get_name', silent=silent): + device_name_ret = series.value.decode() + return device_name_ret def getMaxPower(device, silent=False): """ Return the maximum power cap of a given device @@ -515,10 +611,12 @@ def getVbiosVersion(device, silent=False): """ vbios = create_string_buffer(256) ret = rocmsmi.rsmi_dev_vbios_version_get(device, vbios, 256) - if ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED: - return "Unsupported" - elif rsmi_ret_ok(ret, device, silent=silent): - return vbios.value.decode() + vbios_ret = "N/A" + if rsmi_ret_ok(ret, device, silent=silent): + vbios_ret = vbios.value.decode() + if vbios_ret == "": + vbios_ret = "N/A" + return vbios_ret def getVersion(deviceList, component, silent=False): @@ -1784,9 +1882,9 @@ def showAllConcise(deviceList): deviceList.sort() available_temp_type = getTemperatureLabel(deviceList) temp_type = "(" + available_temp_type.capitalize() + ")" - header=['Device', '[Model : Revision]', 'Temp', 'Power', 'Partitions', + header=['Device', 'Node','IDs','', 'Temp', 'Power', 'Partitions', 'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%'] - subheader = ['', 'Name (20 chars)', temp_type, getPowerLabel(deviceList), + subheader = ['', '','(DID, ', 'GUID)', temp_type, getPowerLabel(deviceList), '(Mem, Compute)', '', '', '', '', '', '', ''] # add additional spaces to match header for idx, item in enumerate(subheader): @@ -1804,8 +1902,6 @@ def showAllConcise(deviceList): values = {} degree_sign = u'\N{DEGREE SIGN}' for device in deviceList: - gpu_dev_product_info = getDevProductInfo(device, silent) - gpu_dev_product_info_names = list(gpu_dev_product_info[device]) temp_val = str(getTemp(device, available_temp_type, silent)) if temp_val != 'N/A': temp_val += degree_sign + 'C' @@ -1838,19 +1934,19 @@ def showAllConcise(deviceList): if vram_used is None: mem_use_pct='Unsupported' if vram_used != None and vram_total != None and float(vram_total) != 0: - mem_use_pct = '% 3.0f%%' % (100 * (float(vram_used) / float(vram_total))) + mem_use_pct = float(100 * (float(vram_used) / float(vram_total))) + mem_use_pct = '{:<.0f}%'.format(mem_use_pct) # left aligned + # values with no precision - gpu_dev_product_info_top_name = gpu_dev_product_info_names[0] - if (len(gpu_dev_product_info_names) > 1): - values['card%s_Info' % (str(device))] = ['', gpu_dev_product_info_names[0], '', '', '', - '', '', '', - '', '', '', ''] - gpu_dev_product_info_top_name = gpu_dev_product_info_names[1] - - values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val, - powerVal, combined_partition, sclk, mclk, - fan, str(perf).lower(), pwrCap, mem_use_pct, - gpu_busy] + # Top Row - per device data + values['card%s' % (str(device))] = [device, getNodeId(device), + str(getDRMDeviceId(device)) + ", ", + str(getGUID(device)), + temp_val, powerVal, combined_partition, + sclk, mclk, fan, str(perf).lower(), + str(pwrCap), + str(mem_use_pct), + str(gpu_busy)] val_widths = {} for device in deviceList: @@ -1874,18 +1970,13 @@ def showAllConcise(deviceList): for device in deviceList: printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), values['card%s' % (str(device))])), None) - gpu_dev_product_info = getDevProductInfo(device, silent) - gpu_dev_product_info_names = list(gpu_dev_product_info[device]) - if (len(gpu_dev_product_info_names) > 1): - printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in - zip(range(len(max_widths)), values['card%s_Info' % (str(device))])), None) printLogSpacer(contentSizeToFit=len(header_output)) printLogSpacer(footerString, contentSizeToFit=len(header_output)) def showAllConciseHw(deviceList): - """ Display critical Hardware info for all devices in a concise format + """ Display critical Hardware info @param deviceList: List of DRM devices (can be a single-item list) """ @@ -1893,25 +1984,22 @@ def showAllConciseHw(deviceList): if PRINT_JSON: print('ERROR: Cannot print JSON/CSV output for concise hardware output') sys.exit(1) - printLogSpacer(' Concise Hardware Info ') - header = ['GPU', 'DID', 'DREV', 'GFX RAS', 'SDMA RAS', 'UMC RAS', 'VBIOS', 'BUS'] + header = ['GPU', 'NODE', 'DID', 'GUID', 'GFX VER', 'GFX RAS', 'SDMA RAS', 'UMC RAS', 'VBIOS', 'BUS'] head_widths = [len(head) + 2 for head in header] values = {} silent = True for device in deviceList: - gpuid = getId(device, silent) - if str(gpuid).startswith('0x'): - gpuid = str(gpuid)[2:] - gpurev = getRev(device, silent) - if str(gpurev).startswith('0x'): - gpurev = str(gpurev)[2:] - + did = getDRMDeviceId(device, silent) + nodeid = getNodeId(device, silent) + guid = getGUID(device, silent) + gfxVer = getTargetGfxVersion(device, silent) gfxRas = getRasEnablement(device, 'GFX', silent) sdmaRas = getRasEnablement(device, 'SDMA', silent) umcRas = getRasEnablement(device, 'UMC', silent) vbios = getVbiosVersion(device, silent) bus = getBus(device, silent) - values['card%s' % (str(device))] = [device, gpuid, gpurev, gfxRas, sdmaRas, umcRas, vbios, bus] + values['card%s' % (str(device))] = [device, nodeid, did, guid, gfxVer, gfxRas, sdmaRas, + umcRas, vbios, bus] val_widths = {} for device in deviceList: val_widths[device] = [len(str(val)) + 2 for val in values['card%s' % (str(device))]] @@ -1919,11 +2007,25 @@ def showAllConciseHw(deviceList): for device in deviceList: for col in range(len(val_widths[device])): max_widths[col] = max(max_widths[col], val_widths[device][col]) - printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)), None) + device_output="" for device in deviceList: - printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in - zip(range(len(max_widths)), values['card%s' % (str(device))])), None) - printLogSpacer() + if (device + 1 != len(deviceList)): + device_output += "".join(str(word).ljust(max_widths[col]) for col, word in + zip(range(len(max_widths)), values['card%s' % (str(device))])) + "\n" + else: + device_output += "".join(str(word).ljust(max_widths[col]) for col, word in + zip(range(len(max_widths)), values['card%s' % (str(device))])) + + ################################# + # Display concise hardware info # + ################################# + header_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)) + printLogSpacer(headerString, contentSizeToFit=len(header_output)) + printLogSpacer(' Concise Hardware Info ', contentSizeToFit=len(header_output)) + printLog(None, header_output, None) + printLog(None, device_output, None) + printLogSpacer(fill='=', contentSizeToFit=len(header_output)) + printLogSpacer(footerString, contentSizeToFit=len(header_output)) def showBus(deviceList): @@ -2275,14 +2377,17 @@ def showEnergy(deviceList): def showId(deviceList): - """ Display the device ID for a list of devices + """ Display the device IDs for a list of devices @param deviceList: List of DRM devices (can be a single-item list) """ printLogSpacer(' ID ') for device in deviceList: - printLog(device, 'Device ID', getId(device)) - printLog(device, 'Device Rev', getRev(device)) + printLog(device, 'Device Name', '\t\t' + str(getDeviceName(device))) + printLog(device, 'Device ID', '\t\t' + str(getDRMDeviceId(device))) + printLog(device, 'Device Rev', '\t\t' + str(getRev(device))) + printLog(device, 'Subsystem ID', '\t' + str(getSubsystemId(device))) + printLog(device, 'GUID', '\t\t' + str(getGUID(device))) printLogSpacer() @@ -2475,6 +2580,7 @@ def showPids(verbose): vramUsage = 'UNKNOWN' sdmaUsage = 'UNKNOWN' cuOccupancy = 'UNKNOWN' + cuOccupancyInvalid = 0xFFFFFFFF dv_indices = (c_uint32 * num_devices.value)() ret = rocmsmi.rsmi_compute_process_gpus_get(int(pid), None, byref(num_devices)) if rsmi_ret_ok(ret, metric='get_gpu_compute_process'): @@ -2490,7 +2596,8 @@ def showPids(verbose): if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'): vramUsage = proc.vram_usage sdmaUsage = proc.sdma_usage - cuOccupancy = proc.cu_occupancy + if proc.cu_occupancy != cuOccupancyInvalid: + cuOccupancy = proc.cu_occupancy else: logging.debug('Unable to fetch process info by PID') dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)]) @@ -2499,7 +2606,8 @@ def showPids(verbose): if rsmi_ret_ok(ret, metric='get_compute_process_info_by_pid'): vramUsage = proc.vram_usage sdmaUsage = proc.sdma_usage - cuOccupancy = proc.cu_occupancy + if proc.cu_occupancy != cuOccupancyInvalid: + cuOccupancy = proc.cu_occupancy else: logging.debug('Unable to fetch process info by PID') dataArray.append([pid, getProcessName(pid), str(gpuNumber), str(vramUsage), str(sdmaUsage), str(cuOccupancy)]) @@ -2578,126 +2686,41 @@ def showPowerPlayTable(deviceList): printLogSpacer() -def showProductName(deviceList): - """ Show the requested product name for a list of devices +def showProduct(deviceList): + """ Show the requested product information for a list of devices @param deviceList: List of DRM devices (can be a single-item list) """ - series = create_string_buffer(256) - model = create_string_buffer(256) - vendor = create_string_buffer(256) - vbios = create_string_buffer(256) - # sku = create_string_buffer(256) printLogSpacer(' Product Info ') for device in deviceList: - # Retrieve card vendor - ret = rocmsmi.rsmi_dev_vendor_name_get(device, vendor, 256) # Only continue if GPU vendor is AMD - if rsmi_ret_ok(ret, device, 'get_vendor_name') and isAmdDevice(device): - try: - device_vendor = vendor.value.decode() - except UnicodeDecodeError: - printErrLog(device, "Unable to read device vendor") - device_vendor = "N/A" - # Retrieve the device series - ret = rocmsmi.rsmi_dev_name_get(device, series, 256) - if rsmi_ret_ok(ret, device, 'get_name'): - try: - device_series = series.value.decode() - printLog(device, 'Card series', '\t\t' + device_series) - except UnicodeDecodeError: - printErrLog(device, "Unable to read card series") - # Retrieve the device model - ret = rocmsmi.rsmi_dev_subsystem_name_get(device, model, 256) - if rsmi_ret_ok(ret, device, 'get_subsystem_name'): - try: - device_model = model.value.decode() - # padHexValue is used for applications that expect 4-digit card models - printLog(device, 'Card model', '\t\t' + padHexValue(device_model, 4)) - except UnicodeDecodeError: - printErrLog(device, "Unable to read device model") - printLog(device, 'Card vendor', '\t\t' + device_vendor) + if isAmdDevice(device): # TODO: Retrieve the SKU using 'rsmi_dev_sku_get' from the LIB - # ret = rocmsmi.rsmi_dev_sku_get(device, sku, 256) - # if rsmi_ret_ok(ret, device) and sku.value.decode(): - # device_sku = sku.value.decode() - # Retrieve the device SKU as a substring from VBIOS - device_sku = "" - ret = rocmsmi.rsmi_dev_vbios_version_get(device, vbios, 256) - if ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED: - device_sku = "Unsupported" - printLog(device, 'Card SKU', '\t\t' + device_sku) - elif rsmi_ret_ok(ret, device, 'get_vbios_version') and vbios.value.decode(): - # Device SKU is just the characters in between the two '-' in vbios_version - if vbios.value.decode().count('-') == 2 and len(str(vbios.value.decode().split('-')[1])) > 1: - device_sku = vbios.value.decode().split('-')[1] - else: - device_sku = 'unknown' - printLog(device, 'Card SKU', '\t\t' + device_sku) - else: - printErrLog(device, "Unable to decode VBIOS value for device SKU") + # Device SKU is just the characters in between the two '-' in vbios_version + vbios = getVbiosVersion(device, True) + device_sku = "N/A" + if vbios.count('-') == 2 and len(str(vbios.split('-')[1])) > 1: + device_sku = vbios.split('-')[1] + + printLog(device, 'Card Series', '\t\t' + str(getDeviceName(device))) + # Retrieve device ID from DRM and KFD + printLog(device, 'Card Model', str('\t\t' + getDRMDeviceId(device))) + printLog(device, 'Card Vendor', '\t\t' + getVendor(device)) + printLog(device, 'Card SKU', '\t\t' + device_sku) + printLog(device, 'Subsystem ID', str('\t' + getSubsystemId(device))) + printLog(device, 'Device Rev', str('\t\t' + getRev(device))) + printLog(device, 'Node ID', str('\t\t' + str(getNodeId(device)))) + printLog(device, 'GUID', str('\t\t' + str(getGUID(device)))) + printLog(device, 'GFX Version', str('\t\t' + getTargetGfxVersion(device))) + else: + vendor = getVendor(device) printLog(device, 'Incompatible device.\n' \ 'GPU[%s]\t\t: Expected vendor name: Advanced Micro Devices, Inc. [AMD/ATI]\n' \ - 'GPU[%s]\t\t: Actual vendor name' % (device, device), vendor.value.decode()) + 'GPU[%s]\t\t: Actual vendor name' % (device, device), vendor) printLogSpacer() -def getDevProductInfo(device, silent=False): - """ Show the requested product name for the device requested - - @param device: Device we want to get the info for - @param silent=Turn on to silence error output - (you plan to handle manually). Default is off. - """ - - # Retrieve card vendor - MAX_DESC_SIZE = 20 - device_series = "N/A" - device_model = "N/A" - gpu_revision = "N/A" - device_list = {} - vendor = create_string_buffer(MAX_BUFF_SIZE) - ret = rocmsmi.rsmi_dev_vendor_name_get(device, vendor, MAX_BUFF_SIZE) - # Only continue if GPU vendor is AMD - if rsmi_ret_ok(ret, device, 'get_vendor_name', silent) and isAmdDevice(device): - # Retrieve the device series - series = create_string_buffer(MAX_BUFF_SIZE) - ret = rocmsmi.rsmi_dev_name_get(device, series, MAX_BUFF_SIZE) - if rsmi_ret_ok(ret, device, 'get_name', silent): - try: - device_series = series.value.decode() - except UnicodeDecodeError: - if not silent: - printErrLog(device, "Unable to read card series") - - # Retrieve the device model - model = create_string_buffer(MAX_BUFF_SIZE) - ret = rocmsmi.rsmi_dev_subsystem_name_get(device, model, MAX_BUFF_SIZE) - if rsmi_ret_ok(ret, device, 'get_subsystem_name', silent): - try: - device_model = model.value.decode() - device_model = padHexValue(device_model, 4) - except UnicodeDecodeError: - if not silent: - printErrLog(device, "Unable to read device model") - - try: - gpu_revision = padHexValue(getRev(device), 2) - except Exception as exc: - if not silent: - printErrLog(device, "Unable to read card revision %s" % (exc)) - - device_series_str = str(device_series[:MAX_DESC_SIZE]) - device_series_str = device_series_str.ljust(MAX_DESC_SIZE, ' ') - device_model_str = str(('[' + device_model + ' : ' + gpu_revision + ']')) - device_model_str = str(device_model_str[:MAX_DESC_SIZE]) - device_model_str = device_model_str.ljust(MAX_DESC_SIZE, ' ') - device_list = {device : [device_series_str, device_model_str]} - - return device_list - - def showProfile(deviceList): """ Display available Power Profiles for a list of devices. @@ -3709,9 +3732,10 @@ def save(deviceList, savefilepath): # The code below is for when this script is run as an executable instead of when imported as a module def isConciseInfoRequested(args): - return len(sys.argv) == 1 or \ + is_concise_req = len(sys.argv) == 1 or \ len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \ len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv)) + return is_concise_req if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -3737,7 +3761,7 @@ if __name__ == '__main__': groupDisplayOpt.add_argument('--showhw', help='Show Hardware details', action='store_true') groupDisplayOpt.add_argument('-a', '--showallinfo', help='Show Temperature, Fan and Clock values', action='store_true') - groupDisplayTop.add_argument('-i', '--showid', help='Show DEVICE ID', action='store_true') + groupDisplayTop.add_argument('-i', '--showid', help='Show DEVICE IDs', action='store_true') groupDisplayTop.add_argument('-v', '--showvbios', help='Show VBIOS version', action='store_true') groupDisplayTop.add_argument('-e', '--showevents', help='Show event list', metavar='EVENT', type=str, nargs='*') groupDisplayTop.add_argument('--showdriverversion', help='Show kernel driver version', action='store_true') @@ -3746,7 +3770,7 @@ if __name__ == '__main__': groupDisplayTop.add_argument('--showmclkrange', help='Show mclk range', action='store_true') groupDisplayTop.add_argument('--showmemvendor', help='Show GPU memory vendor', action='store_true') groupDisplayTop.add_argument('--showsclkrange', help='Show sclk range', action='store_true') - groupDisplayTop.add_argument('--showproductname', help='Show SKU/Vendor name', action='store_true') + groupDisplayTop.add_argument('--showproductname', help='Show product details', action='store_true') groupDisplayTop.add_argument('--showserial', help='Show GPU\'s Serial Number', action='store_true') groupDisplayTop.add_argument('--showuniqueid', help='Show GPU\'s Unique ID', action='store_true') groupDisplayTop.add_argument('--showvoltagerange', help='Show voltage range', action='store_true') @@ -3929,7 +3953,7 @@ if __name__ == '__main__': if not PRINT_JSON: print('\n') - if not isConciseInfoRequested(args): + if not isConciseInfoRequested(args) and args.showhw == False: printLogSpacer(headerString) if args.showallinfo: @@ -4056,7 +4080,7 @@ if __name__ == '__main__': if args.showfwinfo or str(args.showfwinfo) == '[]': showFwInfo(deviceList, args.showfwinfo) if args.showproductname: - showProductName(deviceList) + showProduct(deviceList) if args.showxgmierr: showXgmiErr(deviceList) if args.shownodesbw: @@ -4193,7 +4217,7 @@ if __name__ == '__main__': devCsv = formatCsv(deviceList) print(devCsv) - if not isConciseInfoRequested(args): + if not isConciseInfoRequested(args) and args.showhw == False: printLogSpacer(footerString) rsmi_ret_ok(rocmsmi.rsmi_shut_down()) diff --git a/rocm_smi/example/rocm_smi_example.cc b/rocm_smi/example/rocm_smi_example.cc index 76b1341c7b..5ea30bf5bf 100755 --- a/rocm_smi/example/rocm_smi_example.cc +++ b/rocm_smi/example/rocm_smi_example.cc @@ -796,6 +796,20 @@ int main() { ret = rsmi_dev_target_graphics_version_get(i, &val_ui64); std::cout << "\t**Target Graphics Version: " << std::dec << static_cast(val_ui64) << "\n"; + ret = rsmi_dev_guid_get(i, &val_ui64); + std::cout << "\t**GUID: " << std::dec + << static_cast(val_ui64) << "\n"; + ret = rsmi_dev_node_id_get(i, &val_ui32); + std::cout << "\t**Node ID: " << std::dec + << static_cast(val_ui32) << "\n"; + char vbios_version[256]; + ret = rsmi_dev_vbios_version_get(i, vbios_version, 256); + if (ret == RSMI_STATUS_SUCCESS) { + std::cout << "\t**VBIOS Version: " << vbios_version << "\n"; + } else { + std::cout << "\t**VBIOS Version: " + << amd::smi::getRSMIStatusString(ret, false) << "\n"; + } char current_compute_partition[256]; current_compute_partition[0] = '\0'; @@ -988,18 +1002,10 @@ int main() { } std::cout << " ** Note: Values MAX'ed out (UINTX MAX are unsupported for the version in question) ** " << "\n"; + std::cout << "\n\n"; print_test_header("GPU METRICS: Using direct APIs (newer)", i); metrics_table_header_t header_values; - GPUMetricTempHbm_t hbm_values; - GPUMetricVcnActivity_t vcn_values; - GPUMetricJpegActivity_t jpeg_values; - GPUMetricXgmiReadDataAcc_t xgmi_read_values; - GPUMetricXgmiWriteDataAcc_t xgmi_write_values; - GPUMetricCurrGfxClk_t curr_gfxclk_values; - GPUMetricCurrSocClk_t curr_socclk_values; - GPUMetricCurrVClk0_t curr_vclk0_values; - GPUMetricCurrDClk0_t curr_dclk0_values; ret = rsmi_dev_metrics_header_info_get(i, &header_values); std::cout << "\t[Metrics Header]" << "\n"; @@ -1007,152 +1013,12 @@ int main() { std::cout << "\t -> content_revision : " << print_unsigned_int(header_values.content_revision) << "\n"; std::cout << "\t--------------------" << "\n"; - std::cout << "\n"; - std::cout << "\t[Temperature]" << "\n"; - ret = rsmi_dev_metrics_temp_edge_get(i, &val_ui16); - std::cout << "\t -> temp_edge(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_temp_hotspot_get(i, &val_ui16); - std::cout << "\t -> temp_hotspot(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_temp_mem_get(i, &val_ui16); - std::cout << "\t -> temp_mem(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_temp_vrgfx_get(i, &val_ui16); - std::cout << "\t -> temp_vrgfx(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_temp_vrsoc_get(i, &val_ui16); - std::cout << "\t -> temp_vrsoc(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_temp_vrmem_get(i, &val_ui16); - std::cout << "\t -> temp_vrmem(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_temp_hbm_get(i, &hbm_values); - std::cout << "\t -> temp_hbm(): " << print_error_or_value(ret, hbm_values) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Power/Energy]" << "\n"; - ret = rsmi_dev_metrics_curr_socket_power_get(i, &val_ui16); - std::cout << "\t -> current_socket_power(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_energy_acc_get(i, &val_ui64); - std::cout << "\t -> energy_accum(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_avg_socket_power_get(i, &val_ui16); - std::cout << "\t -> average_socket_power(): " << print_error_or_value(ret, val_ui16) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Utilization]" << "\n"; - ret = rsmi_dev_metrics_avg_gfx_activity_get(i, &val_ui16); - std::cout << "\t -> average_gfx_activity(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_umc_activity_get(i, &val_ui16); - std::cout << "\t -> average_umc_activity(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_mm_activity_get(i, &val_ui16); - std::cout << "\t -> average_mm_activity(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_vcn_activity_get(i, &vcn_values); - std::cout << "\t -> vcn_activity(): " << print_error_or_value(ret, vcn_values) << "\n"; - ret = rsmi_dev_metrics_jpeg_activity_get(i, &jpeg_values); - std::cout << "\t -> jpeg_activity(): " << print_error_or_value(ret, jpeg_values) << "\n"; - ret = rsmi_dev_metrics_mem_activity_acc_get(i, &val_ui32); - std::cout << "\t -> mem_activity_accum(): " << print_error_or_value(ret, val_ui32) << "\n"; - ret = rsmi_dev_metrics_gfx_activity_acc_get(i, &val_ui32); - std::cout << "\t -> gfx_activity_accum(): " << print_error_or_value(ret, val_ui32) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Average Clock]" << "\n"; - ret = rsmi_dev_metrics_avg_gfx_clock_frequency_get(i, &val_ui16); - std::cout << "\t -> average_gfx_clock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_soc_clock_frequency_get(i, &val_ui16); - std::cout << "\t -> average_soc_clock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_uclock_frequency_get(i, &val_ui16); - std::cout << "\t -> average_uclock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_vclock0_frequency_get(i, &val_ui16); - std::cout << "\t -> average_vclock0_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_dclock0_frequency_get(i, &val_ui16); - std::cout << "\t -> average_dclock0_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_vclock1_frequency_get(i, &val_ui16); - std::cout << "\t -> average_vclock1_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_avg_dclock1_frequency_get(i, &val_ui16); - std::cout << "\t -> average_dclock1_frequency(): " << print_error_or_value(ret, val_ui16) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Current Clock]" << "\n"; - ret = rsmi_dev_metrics_curr_vclk1_get(i, &val_ui16); - std::cout << "\t -> current_vclock1(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_curr_dclk1_get(i, &val_ui16); - std::cout << "\t -> current_dclock1(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_curr_uclk_get(i, &val_ui16); - std::cout << "\t -> current_uclock(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_curr_dclk0_get(i, &curr_dclk0_values); - std::cout << "\t -> current_dclk0(): " << print_error_or_value(ret, curr_dclk0_values) << "\n"; - ret = rsmi_dev_metrics_curr_gfxclk_get(i, &curr_gfxclk_values); - std::cout << "\t -> current_gfxclk(): " << print_error_or_value(ret, curr_gfxclk_values) << "\n"; - ret = rsmi_dev_metrics_curr_socclk_get(i, &curr_socclk_values); - std::cout << "\t -> current_soc_clock(): " << print_error_or_value(ret, curr_socclk_values) << "\n"; - ret = rsmi_dev_metrics_curr_vclk0_get(i, &curr_vclk0_values); - std::cout << "\t -> current_vclk0(): " << print_error_or_value(ret, curr_vclk0_values) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Throttle]" << "\n"; - ret = rsmi_dev_metrics_indep_throttle_status_get(i, &val_ui64); - std::cout << "\t -> indep_throttle_status(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_throttle_status_get(i, &val_ui32); - std::cout << "\t -> throttle_status(): " << print_error_or_value(ret, val_ui32) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Gfx Clock Lock]" << "\n"; - ret = rsmi_dev_metrics_gfxclk_lock_status_get(i, &val_ui32); - std::cout << "\t -> gfxclk_lock_status(): " << print_error_or_value(ret, val_ui32) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Current Fan Speed]" << "\n"; - ret = rsmi_dev_metrics_curr_fan_speed_get(i, &val_ui16); - std::cout << "\t -> current_fan_speed(): " << print_error_or_value(ret, val_ui16) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Link/Bandwidth/Speed]" << "\n"; - ret = rsmi_dev_metrics_pcie_link_width_get(i, &val_ui16); - std::cout << "\t -> pcie_link_width(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_pcie_link_speed_get(i, &val_ui16); - std::cout << "\t -> pcie_link_speed(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_pcie_bandwidth_acc_get(i, &val_ui64); - std::cout << "\t -> pcie_bandwidth_accum(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_pcie_bandwidth_inst_get(i, &val_ui64); - std::cout << "\t -> pcie_bandwidth_inst(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_pcie_l0_recov_count_acc_get(i, &val_ui64); - std::cout << "\t -> pcie_l0_recov_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_pcie_replay_count_acc_get(i, &val_ui64); - std::cout << "\t -> pcie_replay_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_pcie_replay_rover_count_acc_get(i, &val_ui64); - std::cout << "\t -> pcie_replay_rollover_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_xgmi_link_width_get(i, &val_ui16); - std::cout << "\t -> xgmi_link_width(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_xgmi_link_speed_get(i, &val_ui16); - std::cout << "\t -> xgmi_link_speed(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_xgmi_read_data_get(i, &xgmi_read_values); - std::cout << "\t -> xgmi_read_data(): " << print_error_or_value(ret, xgmi_read_values) << "\n"; - ret = rsmi_dev_metrics_xgmi_write_data_get(i, &xgmi_write_values); - std::cout << "\t -> xgmi_write_data(): " << print_error_or_value(ret, xgmi_write_values) << "\n"; - ret = rsmi_dev_metrics_pcie_nak_sent_count_acc_get(i, &val_ui32); - std::cout << "\t -> pcie_nak_sent_count_accum(): " << print_error_or_value(ret, val_ui32) << "\n"; - ret = rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get(i, &val_ui32); - std::cout << "\t -> pcie_nak_rcvd_count_accum(): " << print_error_or_value(ret, val_ui32) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Voltage]" << "\n"; - ret = rsmi_dev_metrics_volt_soc_get(i, &val_ui16); - std::cout << "\t -> voltage_soc(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_volt_gfx_get(i, &val_ui16); - std::cout << "\t -> voltage_gfx(): " << print_error_or_value(ret, val_ui16) << "\n"; - ret = rsmi_dev_metrics_volt_mem_get(i, &val_ui16); - std::cout << "\t -> voltage_mem(): " << print_error_or_value(ret, val_ui16) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Timestamp]" << "\n"; - ret = rsmi_dev_metrics_system_clock_counter_get(i, &val_ui64); - std::cout << "\t -> system_clock_counter(): " << print_error_or_value(ret, val_ui64) << "\n"; - ret = rsmi_dev_metrics_firmware_timestamp_get(i, &val_ui64); - std::cout << "\t -> firmware_timestamp(): " << print_error_or_value(ret, val_ui64) << "\n"; - std::cout << "\n"; std::cout << "\t[XCD CounterVoltage]" << "\n"; ret = rsmi_dev_metrics_xcd_counter_get(i, &val_ui16); std::cout << "\t -> xcd_counter(): " << print_error_or_value(ret, val_ui16) << "\n"; std::cout << "\n\n"; - ret = rsmi_dev_perf_level_get(i, &pfl); CHK_AND_PRINT_RSMI_ERR_RET(ret) std::cout << "\t**Performance Level:" << diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 200466947c..f6f0ff64b4 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -5143,11 +5143,12 @@ rsmi_status_t rsmi_dev_target_graphics_version_get(uint32_t dv_ind, uint64_t *gfx_version) { TRY std::ostringstream ss; - ss << __PRETTY_FUNCTION__ << "| ======= start ======="; + ss << __PRETTY_FUNCTION__ << " | ======= start =======" + << " | Device #: " << dv_ind; + LOG_TRACE(ss); rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED; std::string version = ""; const uint64_t undefined_gfx_version = std::numeric_limits::max(); - LOG_TRACE(ss); if (gfx_version == nullptr) { ret = RSMI_STATUS_INVALID_ARGS; } else { @@ -5160,15 +5161,80 @@ rsmi_status_t rsmi_dev_target_graphics_version_get(uint32_t dv_ind, } ss << __PRETTY_FUNCTION__ << " | ======= end ======= " - << " | Returning: " << getRSMIStatusString(ret) + << " | Returning: " << getRSMIStatusString(ret, false) << " | Device #: " << dv_ind - << " | Type: N/A" - << " | Data: " << ((gfx_version == nullptr) ? "nullptr": std::to_string(*gfx_version)); + << " | Type: Target_graphics_version" + << " | Data: " + << ((gfx_version == nullptr) ? "nullptr" : + amd::smi::print_unsigned_hex_and_int(*gfx_version)); LOG_TRACE(ss); return ret; CATCH } +rsmi_status_t rsmi_dev_guid_get(uint32_t dv_ind, uint64_t *guid) { + TRY + std::ostringstream ss; + ss << __PRETTY_FUNCTION__ << " | ======= start =======" + << " | Device #: " << dv_ind; + LOG_TRACE(ss); + GET_DEV_AND_KFDNODE_FROM_INDX + uint64_t kgd_gpu_id = 0; + rsmi_status_t resp = RSMI_STATUS_NOT_SUPPORTED; + int ret = kfd_node->KFDNode::get_gpu_id(&kgd_gpu_id); + resp = amd::smi::ErrnoToRsmiStatus(ret); + + if (guid == nullptr) { + resp = RSMI_STATUS_INVALID_ARGS; + } else { + *guid = kgd_gpu_id; + } + + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Returning: " << getRSMIStatusString(resp, false) + << " | Device #: " << dv_ind + << " | Type: GUID (gpu_id)" + << " | Data: " << ((guid == nullptr) ? "nullptr" : + amd::smi::print_unsigned_hex_and_int(*guid)); + LOG_INFO(ss); + return resp; + CATCH +} + +rsmi_status_t rsmi_dev_node_id_get(uint32_t dv_ind, uint32_t *node_id) { + TRY + std::ostringstream ss; + ss << __PRETTY_FUNCTION__ << " | ======= start =======" + << " | Device #: " << dv_ind; + LOG_TRACE(ss); + GET_DEV_AND_KFDNODE_FROM_INDX + uint32_t kgd_node_id = std::numeric_limits::max(); + rsmi_status_t resp = RSMI_STATUS_NOT_SUPPORTED; + int ret = kfd_node->KFDNode::get_node_id(&kgd_node_id); + resp = amd::smi::ErrnoToRsmiStatus(ret); + + if (node_id == nullptr) { + resp = RSMI_STATUS_INVALID_ARGS; + } else { + *node_id = kgd_node_id; + if (kgd_node_id == std::numeric_limits::max()) { + resp = RSMI_STATUS_NOT_SUPPORTED; + } + } + + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Returning: " << getRSMIStatusString(resp, false) + << " | Device #: " << dv_ind + << " | Type: node_id" + << " | Data: " << ((node_id == nullptr) ? "nullptr" : + amd::smi::print_unsigned_hex_and_int(*node_id)); + LOG_INFO(ss); + return resp; + CATCH +} + enum iterator_handle_type { FUNC_ITER = 0, VARIANT_ITER, @@ -5629,1548 +5695,6 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) { CATCH } -// -// NOTE: APIs related to new 'GPU Metrics' related work are added here -// so they can be used/tested. -// -rsmi_status_t -rsmi_dev_metrics_temp_edge_get(uint32_t dv_ind, uint16_t* edge_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(edge_value != nullptr); - if (edge_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempEdge); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *edge_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_temp_hotspot_get(uint32_t dv_ind, uint16_t* hotspot_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(hotspot_value != nullptr); - if (hotspot_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempHotspot); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *hotspot_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_temp_mem_get(uint32_t dv_ind, uint16_t* mem_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(mem_value != nullptr); - if (mem_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempMem); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *mem_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_temp_vrgfx_get(uint32_t dv_ind, uint16_t* vrgfx_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(vrgfx_value != nullptr); - if (vrgfx_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempVrGfx); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *vrgfx_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_temp_vrsoc_get(uint32_t dv_ind, uint16_t* vrsoc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(vrsoc_value != nullptr); - if (vrsoc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempVrSoc); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *vrsoc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_temp_vrmem_get(uint32_t dv_ind, uint16_t* vrmem_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(vrmem_value != nullptr); - if (vrmem_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempVrMem); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *vrmem_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(socket_power_value != nullptr); - if (socket_power_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrSocketPower); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *socket_power_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_socket_power_get(uint32_t dv_ind, uint16_t* socket_power_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(socket_power_value != nullptr); - if (socket_power_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgSocketPower); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *socket_power_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_gfx_activity_get(uint32_t dv_ind, uint16_t* gfx_activity_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(gfx_activity_value != nullptr); - if (gfx_activity_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgGfxActivity); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *gfx_activity_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_umc_activity_get(uint32_t dv_ind, uint16_t* umc_activity_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(umc_activity_value != nullptr); - if (umc_activity_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgUmcActivity); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *umc_activity_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_mm_activity_get(uint32_t dv_ind, uint16_t* mm_activity_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(mm_activity_value != nullptr); - if (mm_activity_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgMmActivity); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *mm_activity_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_energy_acc_get(uint32_t dv_ind, uint64_t* energy_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(energy_acc_value != nullptr); - if (energy_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricEnergyAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *energy_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_system_clock_counter_get(uint32_t dv_ind, uint64_t* system_clock_counter_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(system_clock_counter_value != nullptr); - if (system_clock_counter_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTSClockCounter); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *system_clock_counter_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_firmware_timestamp_get(uint32_t dv_ind, uint64_t* firmware_timestamp_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(firmware_timestamp_value != nullptr); - if (firmware_timestamp_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTSFirmware); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *firmware_timestamp_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_indep_throttle_status_get(uint32_t dv_ind, uint64_t* throttle_status_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(throttle_status_value != nullptr); - if (throttle_status_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricIndepThrottleStatus); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *throttle_status_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_throttle_status_get(uint32_t dv_ind, uint32_t* throttle_status_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(throttle_status_value != nullptr); - if (throttle_status_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricThrottleStatus); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *throttle_status_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_fan_speed_get(uint32_t dv_ind, uint16_t* fan_speed_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(fan_speed_value != nullptr); - if (fan_speed_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrFanSpeed); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *fan_speed_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_link_width_get(uint32_t dv_ind, uint16_t* pcie_link_width_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_link_width_value != nullptr); - if (pcie_link_width_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieLinkWidth); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_link_width_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_link_speed_get(uint32_t dv_ind, uint16_t* pcie_link_speed_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_link_speed_value != nullptr); - if (pcie_link_speed_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieLinkSpeed); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_link_speed_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_xgmi_link_width_get(uint32_t dv_ind, uint16_t* xgmi_link_width_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(xgmi_link_width_value != nullptr); - if (xgmi_link_width_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiLinkWidth); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *xgmi_link_width_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_xgmi_link_speed_get(uint32_t dv_ind, uint16_t* xgmi_link_speed_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(xgmi_link_speed_value != nullptr); - if (xgmi_link_speed_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiLinkSpeed); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *xgmi_link_speed_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_gfxclk_lock_status_get(uint32_t dv_ind, uint32_t* gfxclk_lock_status_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(gfxclk_lock_status_value != nullptr); - if (gfxclk_lock_status_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricGfxClkLockStatus); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *gfxclk_lock_status_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_gfx_activity_acc_get(uint32_t dv_ind, uint32_t* gfx_activity_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(gfx_activity_acc_value != nullptr); - if (gfx_activity_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricGfxActivityAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *gfx_activity_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_mem_activity_acc_get(uint32_t dv_ind, uint32_t* mem_activity_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(mem_activity_acc_value != nullptr); - if (mem_activity_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricMemActivityAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *mem_activity_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_bandwidth_acc_get(uint32_t dv_ind, uint64_t* pcie_bandwidth_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_bandwidth_acc_value != nullptr); - if (pcie_bandwidth_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieBandwidthAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_bandwidth_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_bandwidth_inst_get(uint32_t dv_ind, uint64_t* pcie_bandwidth_inst_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_bandwidth_inst_value != nullptr); - if (pcie_bandwidth_inst_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieBandwidthInst); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_bandwidth_inst_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_l0_recov_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_count_acc_value != nullptr); - if (pcie_count_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieL0RecovCountAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_count_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_replay_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_count_acc_value != nullptr); - if (pcie_count_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieReplayCountAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_count_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_replay_rover_count_acc_get(uint32_t dv_ind, uint64_t* pcie_count_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_count_acc_value != nullptr); - if (pcie_count_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieReplayRollOverCountAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_count_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_nak_sent_count_acc_get(uint32_t dv_ind, uint32_t* pcie_nak_sent_count_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_nak_sent_count_acc_value != nullptr); - if (pcie_nak_sent_count_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieNakSentCountAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_nak_sent_count_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get(uint32_t dv_ind, uint32_t* pcie_nak_rcvd_count_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(pcie_nak_rcvd_count_acc_value != nullptr); - if (pcie_nak_rcvd_count_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricPcieNakReceivedCountAccumulator); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *pcie_nak_rcvd_count_acc_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_uclk_get(uint32_t dv_ind, uint16_t* uclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(uclk_value != nullptr); - if (uclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrUClock); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *uclk_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_temp_hbm_get(uint32_t dv_ind, GPUMetricTempHbm_t* temp_hbm_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(temp_hbm_value != nullptr); - if (temp_hbm_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricTempHbm); - amd::smi::GPUMetricTempHbmTbl_t tmp_hbl_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_hbl_tbl); - const auto max_num_elems = - static_cast(std::end(*temp_hbm_value) - std::begin(*temp_hbm_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_hbl_tbl.size()) ? max_num_elems : tmp_hbl_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_hbl_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(temp_hbm_value, 0, sizeof(*temp_hbm_value)); - std::copy_n(std::begin(tmp_hbl_tbl), copy_size, *temp_hbm_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_vcn_activity_get(uint32_t dv_ind, GPUMetricVcnActivity_t* vcn_activity_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(vcn_activity_value != nullptr); - if (vcn_activity_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity); - amd::smi::GPUMetricVcnActivityTbl_t tmp_vcn_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_vcn_tbl); - const auto max_num_elems = - static_cast(std::end(*vcn_activity_value) - std::begin(*vcn_activity_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_vcn_tbl.size()) ? max_num_elems : tmp_vcn_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_vcn_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(vcn_activity_value, 0, sizeof(*vcn_activity_value)); - std::copy_n(std::begin(tmp_vcn_tbl), copy_size, *vcn_activity_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_jpeg_activity_get(uint32_t dv_ind, GPUMetricJpegActivity_t* jpeg_activity_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(jpeg_activity_value != nullptr); - if (jpeg_activity_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVcnActivity); - amd::smi::GPUMetricJpegActivityTbl_t tmp_jpeg_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_jpeg_tbl); - const auto max_num_elems = - static_cast(std::end(*jpeg_activity_value) - std::begin(*jpeg_activity_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_jpeg_tbl.size()) ? max_num_elems : tmp_jpeg_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_jpeg_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(jpeg_activity_value, 0, sizeof(*jpeg_activity_value)); - std::copy_n(std::begin(tmp_jpeg_tbl), copy_size, *jpeg_activity_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_xgmi_read_data_get(uint32_t dv_ind, GPUMetricXgmiReadDataAcc_t* xgmi_read_data_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(xgmi_read_data_acc_value != nullptr); - if (xgmi_read_data_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiReadDataAccumulator); - amd::smi::GPUMetricXgmiAccTbl_t tmp_xgmi_acc_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_xgmi_acc_tbl); - const auto max_num_elems = - static_cast(std::end(*xgmi_read_data_acc_value) - std::begin(*xgmi_read_data_acc_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_xgmi_acc_tbl.size()) ? max_num_elems : tmp_xgmi_acc_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_xgmi_acc_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(xgmi_read_data_acc_value, 0, sizeof(*xgmi_read_data_acc_value)); - std::copy_n(std::begin(tmp_xgmi_acc_tbl), copy_size, *xgmi_read_data_acc_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_xgmi_write_data_get(uint32_t dv_ind, GPUMetricXgmiWriteDataAcc_t* xgmi_write_data_acc_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(xgmi_write_data_acc_value != nullptr); - if (xgmi_write_data_acc_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricXgmiWriteDataAccumulator); - amd::smi::GPUMetricXgmiAccTbl_t tmp_xgmi_acc_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_xgmi_acc_tbl); - const auto max_num_elems = - static_cast(std::end(*xgmi_write_data_acc_value) - std::begin(*xgmi_write_data_acc_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_xgmi_acc_tbl.size()) ? max_num_elems : tmp_xgmi_acc_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_xgmi_acc_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(xgmi_write_data_acc_value, 0, sizeof(*xgmi_write_data_acc_value)); - std::copy_n(std::begin(tmp_xgmi_acc_tbl), copy_size, *xgmi_write_data_acc_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_gfxclk_get(uint32_t dv_ind, GPUMetricCurrGfxClk_t* current_gfxclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(current_gfxclk_value != nullptr); - if (current_gfxclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrGfxClock); - amd::smi::GPUMetricCurrGfxClkTbl_t tmp_curr_gfxclk_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_gfxclk_tbl); - const auto max_num_elems = - static_cast(std::end(*current_gfxclk_value) - std::begin(*current_gfxclk_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_curr_gfxclk_tbl.size()) ? max_num_elems : tmp_curr_gfxclk_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_curr_gfxclk_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(current_gfxclk_value, 0, sizeof(*current_gfxclk_value)); - std::copy_n(std::begin(tmp_curr_gfxclk_tbl), copy_size, *current_gfxclk_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_socclk_get(uint32_t dv_ind, GPUMetricCurrSocClk_t* current_socclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(current_socclk_value != nullptr); - if (current_socclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrSocClock); - amd::smi::GPUMetricCurrSocClkTbl_t tmp_curr_socclk_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_socclk_tbl); - const auto max_num_elems = - static_cast(std::end(*current_socclk_value) - std::begin(*current_socclk_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_curr_socclk_tbl.size()) ? max_num_elems : tmp_curr_socclk_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_curr_socclk_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(current_socclk_value, 0, sizeof(*current_socclk_value)); - std::copy_n(std::begin(tmp_curr_socclk_tbl), copy_size, *current_socclk_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_vclk0_get(uint32_t dv_ind, GPUMetricCurrVClk0_t* current_vclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(current_vclk_value != nullptr); - if (current_vclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrVClock0); - amd::smi::GPUMetricCurrVClkTbl_t tmp_curr_vclk0_tbl{}; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_vclk0_tbl); - const auto max_num_elems = - static_cast(std::end(*current_vclk_value) - std::begin(*current_vclk_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_curr_vclk0_tbl.size()) ? max_num_elems : tmp_curr_vclk0_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_curr_vclk0_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(current_vclk_value, 0, sizeof(*current_vclk_value)); - std::copy_n(std::begin(tmp_curr_vclk0_tbl), copy_size, *current_vclk_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_vclk1_get(uint32_t dv_ind, uint16_t* current_vclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(current_vclk_value != nullptr); - if (current_vclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrVClock1); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *current_vclk_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_dclk0_get(uint32_t dv_ind, GPUMetricCurrDClk0_t* current_dclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(current_dclk_value != nullptr); - if (current_dclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrDClock0); - amd::smi::GPUMetricCurrDClkTbl_t tmp_curr_dclk0_tbl; - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, tmp_curr_dclk0_tbl); - const auto max_num_elems = - static_cast(std::end(*current_dclk_value) - std::begin(*current_dclk_value)); - const auto copy_size = - static_cast((max_num_elems < tmp_curr_dclk0_tbl.size()) ? max_num_elems : tmp_curr_dclk0_tbl.size()); - ostrstream << __PRETTY_FUNCTION__ - << "\n | ======= end ======= " - << "\n | End Result " - << "\n | Device #: " << dv_ind - << "\n | Metric Type: " << static_cast(gpu_metric_unit) - << "\n | Metric Size: " << tmp_curr_dclk0_tbl.size() - << "\n | Max num of elements: " << max_num_elems - << "\n | Copy size: " << copy_size - << "\n | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - std::memset(current_dclk_value, 0, sizeof(*current_dclk_value)); - std::copy_n(std::begin(tmp_curr_dclk0_tbl), copy_size, *current_dclk_value); - } - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_curr_dclk1_get(uint32_t dv_ind, uint16_t* current_dclk_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(current_dclk_value != nullptr); - if (current_dclk_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricCurrDClock1); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *current_dclk_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_gfx_clock_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgGfxClockFrequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_soc_clock_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgSocClockFrequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_uclock_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgUClockFrequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_vclock0_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgVClock0Frequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_dclock0_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgDClock0Frequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_vclock1_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgVClock1Frequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_avg_dclock1_frequency_get(uint32_t dv_ind, uint16_t* clock_frequency_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(clock_frequency_value != nullptr); - if (clock_frequency_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricAvgDClock1Frequency); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *clock_frequency_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_volt_soc_get(uint32_t dv_ind, uint16_t* voltage_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(voltage_value != nullptr); - if (voltage_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVoltageSoc); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *voltage_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_volt_gfx_get(uint32_t dv_ind, uint16_t* voltage_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(voltage_value != nullptr); - if (voltage_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVoltageGfx); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *voltage_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - -rsmi_status_t -rsmi_dev_metrics_volt_mem_get(uint32_t dv_ind, uint16_t* voltage_value) -{ - TRY - std::ostringstream ostrstream; - ostrstream << __PRETTY_FUNCTION__ << "| ======= start ======="; - LOG_TRACE(ostrstream); - - assert(voltage_value != nullptr); - if (voltage_value == nullptr) { - return rsmi_status_t::RSMI_STATUS_INVALID_ARGS; - } - - const auto gpu_metric_unit(AMDGpuMetricsUnitType_t::kMetricVoltageMem); - auto status_code = rsmi_dev_gpu_metrics_info_query(dv_ind, gpu_metric_unit, *voltage_value); - ostrstream << __PRETTY_FUNCTION__ - << " | ======= end ======= " - << " | End Result " - << " | Device #: " << dv_ind - << " | Metric Type: " << static_cast(gpu_metric_unit) - << " | Returning = " << status_code << " " << getRSMIStatusString(status_code) << " |"; - LOG_INFO(ostrstream); - - return status_code; - CATCH -} - rsmi_status_t rsmi_dev_metrics_header_info_get(uint32_t dv_ind, metrics_table_header_t* header_value) { @@ -7214,10 +5738,10 @@ rsmi_dev_metrics_xcd_counter_get(uint32_t dv_ind, uint16_t* xcd_counter_value) } auto xcd_counter = uint16_t(0); - GPUMetricCurrGfxClk_t curr_gfxclk_table{}; - auto status_code = rsmi_dev_metrics_curr_gfxclk_get(dv_ind, &curr_gfxclk_table); + rsmi_gpu_metrics_t gpu_metrics; + auto status_code = rsmi_dev_gpu_metrics_info_get(dv_ind, &gpu_metrics); if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) { - for (const auto& gfxclk : curr_gfxclk_table) { + for (const auto& gfxclk : gpu_metrics.current_gfxclks) { if ((gfxclk != 0) && (gfxclk != UINT16_MAX)) { xcd_counter++; } @@ -7259,10 +5783,6 @@ rsmi_dev_metrics_log_get(uint32_t dv_ind) CATCH } -// -// End of: new GPU Metrics related work. -// - // UNDOCUMENTED FUNCTIONS // This functions are not declared in rocm_smi.h. They are either not fully diff --git a/src/rocm_smi_gpu_metrics.cc b/src/rocm_smi_gpu_metrics.cc index 44b9a88052..2f1a40837a 100755 --- a/src/rocm_smi_gpu_metrics.cc +++ b/src/rocm_smi_gpu_metrics.cc @@ -2680,7 +2680,6 @@ rsmi_status_t Device::dev_read_gpu_metrics_header_data() LOG_TRACE(ostrstream); // Check if/when metrics table needs to be refreshed. - auto now_ts = actual_timestamp_in_secs(); if ((!m_gpu_metrics_header.m_structure_size) || (!m_gpu_metrics_header.m_format_revision) || (!m_gpu_metrics_header.m_content_revision)) { diff --git a/src/rocm_smi_kfd.cc b/src/rocm_smi_kfd.cc index 3109781e39..9088ec316e 100755 --- a/src/rocm_smi_kfd.cc +++ b/src/rocm_smi_kfd.cc @@ -507,7 +507,9 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc, // Collect count of compute units cu_count += kfd_node_map[gpu_id]->cu_count(); } else { - return err; + //Some GFX revisions do not provide cu_occupancy debugfs method + proc->cu_occupancy = CU_OCCUPANCY_INVALID; + cu_count = 0; } } @@ -982,15 +984,72 @@ int KFDNode::get_gfx_target_version(uint64_t *gfx_target_version) { *gfx_target_version = gfx_version; ss << __PRETTY_FUNCTION__ << " | File: " << properties_path - << " | Successfully read node #" << std::to_string(this->node_indx_) + << " | Read node: " << std::to_string(this->node_indx_) << " for gfx_target_version" - << " | Data (gfx_target_version) *gfx_target_version = " + << " | Data (*gfx_target_version): " << std::to_string(*gfx_target_version) - << " | return = " << std::to_string(ret) + << getRSMIStatusString(amd::smi::ErrnoToRsmiStatus(ret), false) << " | "; LOG_DEBUG(ss); return ret; } +// Public interface for device +// /sys/class/kfd/kfd/topology/nodes/*/gpu_id +int KFDNode::get_gpu_id(uint64_t *gpu_id) { + std::ostringstream ss; + std::string gpuid_path = "/sys/class/kfd/kfd/topology/nodes/" + + std::to_string(this->node_indx_) + "/gpu_id"; + const uint64_t undefined_gpu_id = std::numeric_limits::max(); + std::string gpu_id_string = ""; + *gpu_id = undefined_gpu_id; + int ret = ReadSysfsStr(gpuid_path, &gpu_id_string); + if (ret != 0 || gpu_id_string.empty()) { + ss << __PRETTY_FUNCTION__ + << " | File: " << gpuid_path + << " | Data (*gpu_id): empty or nullptr" + << " | Issue: Could not read node #" << std::to_string(this->node_indx_) + << ". KFD node was an unsupported node or value read was empty." + << " | Return: " + << getRSMIStatusString(amd::smi::ErrnoToRsmiStatus(ret), false) + << " | "; + LOG_ERROR(ss); + return ret; + } + *gpu_id = std::stoull(gpu_id_string); + if (*gpu_id == 0) { // CPU node - return not supported + *gpu_id = undefined_gpu_id; + ret = ENOENT; // map to RSMI_STATUS_NOT_SUPPORTED + } + ss << __PRETTY_FUNCTION__ + << " | File: " << gpuid_path + << " | Read node #: " << std::to_string(this->node_indx_) + << " | Data (*gpu_id): " << std::to_string(*gpu_id) + << " | Return: " + << getRSMIStatusString(amd::smi::ErrnoToRsmiStatus(ret), false) + << " | "; + LOG_DEBUG(ss); + return ret; +} + +// Public interface for device +// /sys/class/kfd/kfd/topology/nodes/ +int KFDNode::get_node_id(uint32_t *node_id) { + std::ostringstream ss; + int ret = 0; + std::string nodeid_path = "/sys/class/kfd/kfd/topology/nodes/" + + std::to_string(this->node_indx_); + ss << __PRETTY_FUNCTION__ + << " | File: " << nodeid_path + << " | Read node #: " << std::to_string(this->node_indx_) + << " | Data (*node_id): " << std::to_string(*node_id) + << " | Return: " + << getRSMIStatusString(amd::smi::ErrnoToRsmiStatus(ret), false) + << " | "; + *node_id = this->node_indx_; + LOG_DEBUG(ss); + return ret; +} + } // namespace smi } // namespace amd diff --git a/tests/rocm_smi_test/functional/gpu_metrics_read.cc b/tests/rocm_smi_test/functional/gpu_metrics_read.cc index a3fa8af4b0..2d7bde7c67 100644 --- a/tests/rocm_smi_test/functional/gpu_metrics_read.cc +++ b/tests/rocm_smi_test/functional/gpu_metrics_read.cc @@ -47,6 +47,7 @@ #include #include +#include #include #include @@ -119,6 +120,13 @@ auto print_error_or_value(std::string title, std::string func_name, const T& met } }; +template +std::string print_unsigned_int(T value) { + std::stringstream ss; + ss << static_cast(value | 0); + + return ss.str(); +} void TestGpuMetricsRead::Run(void) { rsmi_status_t err; @@ -133,8 +141,19 @@ void TestGpuMetricsRead::Run(void) { PrintDeviceHeader(i); IF_VERB(STANDARD) { + std::cout << "\n\n"; std::cout << "\t**GPU METRICS: Using static struct (Backwards Compatibility):\n"; + + metrics_table_header_t header_values; + auto ret = rsmi_dev_metrics_header_info_get(i, &header_values); + if (ret == rsmi_status_t::RSMI_STATUS_SUCCESS) { + std::cout << "\t[Metrics Header]" << "\n"; + std::cout << "\t -> format_revision : " << print_unsigned_int(header_values.format_revision) << "\n"; + std::cout << "\t -> content_revision : " << print_unsigned_int(header_values.content_revision) << "\n"; + std::cout << "\t--------------------" << "\n"; + } } + rsmi_gpu_metrics_t smu; err = rsmi_dev_gpu_metrics_info_get(i, &smu); if (err != RSMI_STATUS_SUCCESS) { @@ -243,491 +262,11 @@ void TestGpuMetricsRead::Run(void) { // Verify api support checking functionality is working err = rsmi_dev_gpu_metrics_info_get(i, nullptr); ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS); - } - - // - auto val_ui16 = uint16_t(0); - auto val_ui32 = uint32_t(0); - auto val_ui64 = uint64_t(0); - auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS); - - std::cout << "\n\t**GPU METRICS: Using direct APIs (newer):\n"; - for (uint32_t i = 0; i < num_monitor_devs(); ++i) { - PrintDeviceHeader(i); - - auto temp_edge_value = val_ui16; - status_code = rsmi_dev_metrics_temp_edge_get(i, &temp_edge_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_edge_get", status_code); - - auto temp_hotspot_value = val_ui16; - status_code = rsmi_dev_metrics_temp_hotspot_get(i, &temp_hotspot_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_hotspot_get", status_code); - - auto temp_mem_value = val_ui16; - status_code = rsmi_dev_metrics_temp_mem_get(i, &temp_mem_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_mem_get", status_code); - - auto temp_vrgfx_value = val_ui16; - status_code = rsmi_dev_metrics_temp_vrgfx_get(i, &temp_vrgfx_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_vrgfx_get", status_code); - - auto temp_vrsoc_value = val_ui16; - status_code = rsmi_dev_metrics_temp_vrsoc_get(i, &temp_vrsoc_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_vrsoc_get", status_code); - - auto temp_vrmem_value = val_ui16; - status_code = rsmi_dev_metrics_temp_vrmem_get(i, &temp_vrmem_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_vrmem_get", status_code); - - GPUMetricTempHbm_t temp_hbm_values{}; - status_code = rsmi_dev_metrics_temp_hbm_get(i, &temp_hbm_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_temp_hbm_get", status_code); - - auto temp_curr_socket_power_value = val_ui16; - status_code = rsmi_dev_metrics_curr_socket_power_get(i, &temp_curr_socket_power_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_socket_power_get", status_code); - - auto temp_energy_accum_value = val_ui64; - status_code = rsmi_dev_metrics_energy_acc_get(i, &temp_energy_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_energy_acc_get", status_code); - - auto temp_avg_socket_power_value = val_ui16; - status_code = rsmi_dev_metrics_avg_socket_power_get(i, &temp_avg_socket_power_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_socket_power_get", status_code); - - auto temp_avg_gfx_activity_value = val_ui16; - status_code = rsmi_dev_metrics_avg_gfx_activity_get(i, &temp_avg_gfx_activity_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_gfx_activity_get", status_code); - - auto temp_avg_umc_activity_value = val_ui16; - status_code = rsmi_dev_metrics_avg_umc_activity_get(i, &temp_avg_umc_activity_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_umc_activity_get", status_code); - - auto temp_avg_mm_activity_value = val_ui16; - status_code = rsmi_dev_metrics_avg_mm_activity_get(i, &temp_avg_mm_activity_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_mm_activity_get", status_code); - - GPUMetricVcnActivity_t temp_vcn_values{}; - status_code = rsmi_dev_metrics_vcn_activity_get(i, &temp_vcn_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_vcn_activity_get", status_code); - - GPUMetricJpegActivity_t temp_jpeg_values{}; - status_code = rsmi_dev_metrics_jpeg_activity_get(i, &temp_jpeg_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_jpeg_activity_get", status_code); - - auto temp_mem_activity_accum_value = val_ui32; - status_code = rsmi_dev_metrics_mem_activity_acc_get(i, &temp_mem_activity_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_mem_activity_acc_get", status_code); - - auto temp_gfx_activity_accum_value = val_ui32; - status_code = rsmi_dev_metrics_gfx_activity_acc_get(i, &temp_gfx_activity_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_gfx_activity_acc_get", status_code); - - auto temp_avg_gfx_clock_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_gfx_clock_frequency_get(i, &temp_avg_gfx_clock_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_gfx_clock_frequency_get", status_code); - - auto temp_avg_soc_clock_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_soc_clock_frequency_get(i, &temp_avg_soc_clock_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_soc_clock_frequency_get", status_code); - - auto temp_avg_uclock_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_uclock_frequency_get(i, &temp_avg_uclock_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_uclock_frequency_get", status_code); - - auto temp_avg_vclock0_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_vclock0_frequency_get(i, &temp_avg_vclock0_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_vclock0_frequency_get", status_code); - - auto temp_avg_dclock0_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_dclock0_frequency_get(i, &temp_avg_dclock0_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_dclock0_frequency_get", status_code); - - auto temp_avg_vclock1_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_vclock1_frequency_get(i, &temp_avg_vclock1_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_vclock1_frequency_get", status_code); - - auto temp_avg_dclock1_freq_value = val_ui16; - status_code = rsmi_dev_metrics_avg_dclock1_frequency_get(i, &temp_avg_dclock1_freq_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_avg_dclock1_frequency_get", status_code); - - auto temp_curr_vclk1_value = val_ui16; - status_code = rsmi_dev_metrics_curr_vclk1_get(i, &temp_curr_vclk1_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_vclk1_get", status_code); - - auto temp_curr_dclk1_value = val_ui16; - status_code = rsmi_dev_metrics_curr_dclk1_get(i, &temp_curr_dclk1_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_dclk1_get", status_code); - - auto temp_curr_uclk_value = val_ui16; - status_code = rsmi_dev_metrics_curr_uclk_get(i, &temp_curr_uclk_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_uclk_get", status_code); - - GPUMetricCurrDClk0_t temp_curr_dclk0_values{}; - status_code = rsmi_dev_metrics_curr_dclk0_get(i, &temp_curr_dclk0_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_dclk0_get", status_code); - - GPUMetricCurrGfxClk_t temp_curr_gfxclk_values{}; - status_code = rsmi_dev_metrics_curr_gfxclk_get(i, &temp_curr_gfxclk_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_gfxclk_get", status_code); - - GPUMetricCurrSocClk_t temp_curr_socclk_values{}; - status_code = rsmi_dev_metrics_curr_socclk_get(i, &temp_curr_socclk_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_socclk_get", status_code); - - GPUMetricCurrVClk0_t temp_curr_vclk0_values{}; - status_code = rsmi_dev_metrics_curr_vclk0_get(i, &temp_curr_vclk0_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_vclk0_get", status_code); - - auto temp_indep_throttle_status_value = val_ui64; - status_code = rsmi_dev_metrics_indep_throttle_status_get(i, &temp_indep_throttle_status_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_indep_throttle_status_get", status_code); - - auto temp_throttle_status_value = val_ui32; - status_code = rsmi_dev_metrics_throttle_status_get(i, &temp_throttle_status_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_throttle_status_get", status_code); - - auto temp_gfxclk_lock_status_value = val_ui32; - status_code = rsmi_dev_metrics_gfxclk_lock_status_get(i, &temp_gfxclk_lock_status_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_gfxclk_lock_status_get", status_code); - - auto temp_curr_fan_speed_value = val_ui16; - status_code = rsmi_dev_metrics_curr_fan_speed_get(i, &temp_curr_fan_speed_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_curr_fan_speed_get", status_code); - - auto temp_pcie_link_width_value = val_ui16; - status_code = rsmi_dev_metrics_pcie_link_width_get(i, &temp_pcie_link_width_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_link_width_get", status_code); - - auto temp_pcie_link_speed_value = val_ui16; - status_code = rsmi_dev_metrics_pcie_link_speed_get(i, &temp_pcie_link_speed_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_link_speed_get", status_code); - - auto temp_pcie_bandwidth_accum_value = val_ui64; - status_code = rsmi_dev_metrics_pcie_bandwidth_acc_get(i, &temp_pcie_bandwidth_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_bandwidth_acc_get", status_code); - - auto temp_pcie_bandwidth_inst_value = val_ui64; - status_code = rsmi_dev_metrics_pcie_bandwidth_inst_get(i, &temp_pcie_bandwidth_inst_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_bandwidth_inst_get", status_code); - - auto temp_pcie_l0_recov_count_accum_value = val_ui64; - status_code = rsmi_dev_metrics_pcie_l0_recov_count_acc_get(i, &temp_pcie_l0_recov_count_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_l0_recov_count_acc_get", status_code); - - auto temp_pcie_replay_count_accum_value = val_ui64; - status_code = rsmi_dev_metrics_pcie_replay_count_acc_get(i, &temp_pcie_replay_count_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_replay_count_acc_get", status_code); - - auto temp_pcie_replay_rover_count_accum_value = val_ui64; - status_code = rsmi_dev_metrics_pcie_replay_rover_count_acc_get(i, &temp_pcie_replay_rover_count_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_replay_rover_count_acc_get", status_code); - - auto temp_pcie_nak_sent_count_accum_value = val_ui32; - status_code = rsmi_dev_metrics_pcie_nak_sent_count_acc_get(i, &temp_pcie_nak_sent_count_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_nak_sent_count_acc_get", status_code); - - auto temp_pcie_nak_rcvd_count_accum_value = val_ui32; - status_code = rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get(i, &temp_pcie_nak_rcvd_count_accum_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get", status_code); - - auto temp_xgmi_link_width_value = val_ui16; - status_code = rsmi_dev_metrics_xgmi_link_width_get(i, &temp_xgmi_link_width_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_xgmi_link_width_get", status_code); - - auto temp_xgmi_link_speed_value = val_ui16; - status_code = rsmi_dev_metrics_xgmi_link_speed_get(i, &temp_xgmi_link_speed_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_xgmi_link_speed_get", status_code); - - GPUMetricXgmiReadDataAcc_t temp_xgmi_read_values{}; - status_code = rsmi_dev_metrics_xgmi_read_data_get(i, &temp_xgmi_read_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_xgmi_read_data_get", status_code); - - GPUMetricXgmiWriteDataAcc_t temp_xgmi_write_values{}; - status_code = rsmi_dev_metrics_xgmi_write_data_get(i, &temp_xgmi_write_values); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_xgmi_write_data_get", status_code); - - auto temp_voltage_soc_value = val_ui16; - status_code = rsmi_dev_metrics_volt_soc_get(i, &temp_voltage_soc_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_volt_soc_get", status_code); - - auto temp_voltage_gfx_value = val_ui16; - status_code = rsmi_dev_metrics_volt_gfx_get(i, &temp_voltage_gfx_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_volt_gfx_get", status_code); - - auto temp_voltage_mem_value = val_ui16; - status_code = rsmi_dev_metrics_volt_mem_get(i, &temp_voltage_mem_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_volt_mem_get", status_code); - - auto temp_system_clock_counter_value = val_ui64; - status_code = rsmi_dev_metrics_system_clock_counter_get(i, &temp_system_clock_counter_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_system_clock_counter_get", status_code); - - auto temp_firmware_timestamp_value = val_ui64; - status_code = rsmi_dev_metrics_firmware_timestamp_get(i, &temp_firmware_timestamp_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_firmware_timestamp_get", status_code); - - auto temp_xcd_counter_value = val_ui16; - status_code = rsmi_dev_metrics_xcd_counter_get(i, &temp_xcd_counter_value); - if (status_code != RSMI_STATUS_NOT_SUPPORTED) { - CHK_ERR_ASRT(status_code); - } - MetricResults.emplace("rsmi_dev_metrics_xcd_counter_get", status_code); - - IF_VERB(STANDARD) { - std::cout << "\n"; - std::cout << "\t[Temperature]" << "\n"; - std::cout << print_error_or_value("\t -> temp_edge(): ", "rsmi_dev_metrics_temp_edge_get", temp_edge_value) << "\n"; - std::cout << print_error_or_value("\t -> temp_hotspot(): ", "rsmi_dev_metrics_temp_hotspot_get", temp_hotspot_value) << "\n"; - std::cout << print_error_or_value("\t -> temp_mem(): ", "rsmi_dev_metrics_temp_mem_get", temp_mem_value) << "\n"; - std::cout << print_error_or_value("\t -> temp_vrgfx(): ", "rsmi_dev_metrics_temp_vrgfx_get", temp_vrgfx_value) << "\n"; - std::cout << print_error_or_value("\t -> temp_vrsoc(): ", "rsmi_dev_metrics_temp_vrsoc_get", temp_vrsoc_value) << "\n"; - std::cout << print_error_or_value("\t -> temp_vrmem(): ", "rsmi_dev_metrics_temp_vrmem_get", temp_vrmem_value) << "\n"; - std::cout << print_error_or_value("\t -> temp_hbm[]: ", "rsmi_dev_metrics_temp_hbm_get", temp_hbm_values) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Power/Energy]" << "\n"; - std::cout << print_error_or_value("\t -> current_socket_power(): ", "rsmi_dev_metrics_curr_socket_power_get", temp_curr_socket_power_value) << "\n"; - std::cout << print_error_or_value("\t -> energy_accum(): ", "rsmi_dev_metrics_energy_acc_get", temp_energy_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> average_socket_power(): ", "rsmi_dev_metrics_avg_socket_power_get", temp_avg_socket_power_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Utilization]" << "\n"; - std::cout << print_error_or_value("\t -> average_gfx_activity(): ", "rsmi_dev_metrics_avg_gfx_activity_get", temp_avg_gfx_activity_value) << "\n"; - std::cout << print_error_or_value("\t -> average_umc_activity(): ", "rsmi_dev_metrics_avg_umc_activity_get", temp_avg_umc_activity_value) << "\n"; - std::cout << print_error_or_value("\t -> average_mm_activity(): ", "rsmi_dev_metrics_avg_mm_activity_get", temp_avg_mm_activity_value) << "\n"; - std::cout << print_error_or_value("\t -> vcn_activity[]: ", "rsmi_dev_metrics_vcn_activity_get", temp_vcn_values) << "\n"; - std::cout << print_error_or_value("\t -> jpeg_activity[]: ", "rsmi_dev_metrics_jpeg_activity_get", temp_jpeg_values) << "\n"; - - std::cout << "\n"; - std::cout << print_error_or_value("\t -> mem_activity_accum(): ", "rsmi_dev_metrics_mem_activity_acc_get", temp_mem_activity_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> gfx_activity_accum(): ", "rsmi_dev_metrics_gfx_activity_acc_get", temp_gfx_activity_accum_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Average Clock]" << "\n"; - std::cout << print_error_or_value("\t -> average_gfx_clock_frequency(): ", "rsmi_dev_metrics_avg_gfx_clock_frequency_get", temp_avg_gfx_clock_freq_value) << "\n"; - std::cout << print_error_or_value("\t -> average_soc_clock_frequency(): ", "rsmi_dev_metrics_avg_soc_clock_frequency_get", temp_avg_soc_clock_freq_value) << "\n"; - std::cout << print_error_or_value("\t -> average_uclock_frequency(): ", "rsmi_dev_metrics_avg_uclock_frequency_get", temp_avg_uclock_freq_value) << "\n"; - std::cout << print_error_or_value("\t -> average_vclock0_frequency(): ", "rsmi_dev_metrics_avg_vclock0_frequency_get", temp_avg_vclock0_freq_value) << "\n"; - std::cout << print_error_or_value("\t -> average_dclock0_frequency(): ", "rsmi_dev_metrics_avg_dclock0_frequency_get", temp_avg_dclock0_freq_value) << "\n"; - std::cout << print_error_or_value("\t -> average_vclock1_frequency(): ", "rsmi_dev_metrics_avg_vclock1_frequency_get", temp_avg_vclock1_freq_value) << "\n"; - std::cout << print_error_or_value("\t -> average_dclock1_frequency(): ", "rsmi_dev_metrics_avg_dclock1_frequency_get", temp_avg_dclock1_freq_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Current Clock]" << "\n"; - std::cout << print_error_or_value("\t -> current_vclock1(): ", "rsmi_dev_metrics_curr_vclk1_get", temp_curr_vclk1_value) << "\n"; - std::cout << print_error_or_value("\t -> current_dclock1(): ", "rsmi_dev_metrics_curr_dclk1_get", temp_curr_dclk1_value) << "\n"; - std::cout << print_error_or_value("\t -> current_uclock(): ", "rsmi_dev_metrics_curr_uclk_get", temp_curr_uclk_value) << "\n"; - std::cout << print_error_or_value("\t -> current_dclk0[]: ", "rsmi_dev_metrics_curr_dclk0_get", temp_curr_dclk0_values) << "\n"; - std::cout << print_error_or_value("\t -> current_gfxclk[]: ", "rsmi_dev_metrics_curr_gfxclk_get", temp_curr_gfxclk_values) << "\n"; - std::cout << print_error_or_value("\t -> current_soc_clock[]: ", "rsmi_dev_metrics_curr_socclk_get", temp_curr_socclk_values) << "\n"; - std::cout << print_error_or_value("\t -> current_vclk0[]: ", "rsmi_dev_metrics_curr_vclk0_get", temp_curr_vclk0_values) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Throttle]" << "\n"; - std::cout << print_error_or_value("\t -> indep_throttle_status(): ", "rsmi_dev_metrics_indep_throttle_status_get", temp_indep_throttle_status_value) << "\n"; - std::cout << print_error_or_value("\t -> throttle_status(): ", "rsmi_dev_metrics_throttle_status_get", temp_throttle_status_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Gfx Clock Lock]" << "\n"; - std::cout << print_error_or_value("\t -> gfxclk_lock_status(): ", "rsmi_dev_metrics_gfxclk_lock_status_get", temp_gfxclk_lock_status_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Current Fan Speed]" << "\n"; - std::cout << print_error_or_value("\t -> current_fan_speed(): ", "rsmi_dev_metrics_curr_fan_speed_get", temp_curr_fan_speed_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Link/Bandwidth/Speed]" << "\n"; - std::cout << print_error_or_value("\t -> pcie_link_width(): ", "rsmi_dev_metrics_pcie_link_width_get", temp_pcie_link_width_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_link_speed(): ", "rsmi_dev_metrics_pcie_link_speed_get", temp_pcie_link_speed_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_bandwidth_accum(): ", "rsmi_dev_metrics_pcie_bandwidth_acc_get", temp_pcie_bandwidth_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_bandwidth_inst(): ", "rsmi_dev_metrics_pcie_bandwidth_inst_get", temp_pcie_bandwidth_inst_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_l0_recov_count_accum(): ", "rsmi_dev_metrics_pcie_l0_recov_count_acc_get", temp_pcie_l0_recov_count_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_replay_count_accum(): ", "rsmi_dev_metrics_pcie_replay_count_acc_get", temp_pcie_replay_count_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_replay_rollover_count_accum(): ", "rsmi_dev_metrics_pcie_replay_rover_count_acc_get", temp_pcie_replay_rover_count_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_nak_sent_count_accum(): ", "rsmi_dev_metrics_pcie_nak_sent_count_acc_get", temp_pcie_nak_sent_count_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> pcie_nak_rcvd_count_accum(): ", "rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get", temp_pcie_nak_rcvd_count_accum_value) << "\n"; - std::cout << print_error_or_value("\t -> xgmi_link_width(): ", "rsmi_dev_metrics_xgmi_link_width_get", temp_xgmi_link_width_value) << "\n"; - std::cout << print_error_or_value("\t -> xgmi_link_speed(): ", "rsmi_dev_metrics_xgmi_link_speed_get", temp_xgmi_link_speed_value) << "\n"; - std::cout << print_error_or_value("\t -> xgmi_read_data[]: ", "rsmi_dev_metrics_xgmi_read_data_get", temp_xgmi_read_values) << "\n"; - std::cout << print_error_or_value("\t -> xgmi_write_data[]: ", "rsmi_dev_metrics_xgmi_write_data_get", temp_xgmi_write_values) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Voltage]" << "\n"; - std::cout << print_error_or_value("\t -> voltage_soc(): ", "rsmi_dev_metrics_volt_soc_get", temp_voltage_soc_value) << "\n"; - std::cout << print_error_or_value("\t -> voltage_gfx(): ", "rsmi_dev_metrics_volt_gfx_get", temp_voltage_gfx_value) << "\n"; - std::cout << print_error_or_value("\t -> voltage_mem(): ", "rsmi_dev_metrics_volt_mem_get", temp_voltage_mem_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[Timestamp]" << "\n"; - std::cout << print_error_or_value("\t -> system_clock_counter(): ", "rsmi_dev_metrics_system_clock_counter_get", temp_system_clock_counter_value) << "\n"; - std::cout << print_error_or_value("\t -> firmware_timestamp(): ", "rsmi_dev_metrics_firmware_timestamp_get", temp_firmware_timestamp_value) << "\n"; - - std::cout << "\n"; - std::cout << "\t[XCD CounterVoltage]" << "\n"; - std::cout << print_error_or_value("\t -> xcd_counter(): ", "rsmi_dev_metrics_xcd_counter_get", temp_xcd_counter_value) << "\n"; - std::cout << "\n\n"; + auto temp_xcd_counter_value = uint16_t(0); + err = rsmi_dev_metrics_xcd_counter_get(i, &temp_xcd_counter_value); + if (err != RSMI_STATUS_NOT_SUPPORTED) { + CHK_ERR_ASRT(err); } } - } diff --git a/tests/rocm_smi_test/functional/measure_api_execution_time.cc b/tests/rocm_smi_test/functional/measure_api_execution_time.cc index e8bc54b044..4cbbec7553 100644 --- a/tests/rocm_smi_test/functional/measure_api_execution_time.cc +++ b/tests/rocm_smi_test/functional/measure_api_execution_time.cc @@ -167,930 +167,14 @@ void TestMeasureApiExecutionTime::Run(void) { skip = false; std::cout << "----------------------------------------------------------------------------" << std::endl; - //Test execution time for each individual gpu metric auto val_ui16 = uint16_t(0); - auto val_ui32 = uint32_t(0); - auto val_ui64 = uint64_t(0); - GPUMetricTempHbm_t temp_hbm_values; - GPUMetricVcnActivity_t temp_vcn_values; - GPUMetricJpegActivity_t temp_jpeg_values; - GPUMetricCurrDClk0_t temp_curr_dclk0_values; - GPUMetricCurrGfxClk_t temp_curr_gfxclk_values; - GPUMetricCurrSocClk_t temp_curr_socclk_values; - GPUMetricCurrVClk0_t temp_curr_vclk0_values; - GPUMetricXgmiReadDataAcc_t temp_xgmi_read_values; - GPUMetricXgmiWriteDataAcc_t temp_xgmi_write_values; auto status_code(rsmi_status_t::RSMI_STATUS_SUCCESS); - - start = std::chrono::high_resolution_clock::now(); auto start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_edge_get(dv_ind, &val_ui16); - } - auto stop_api = std::chrono::high_resolution_clock::now(); - auto duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_edge_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_hotspot_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_hotspot_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_mem_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_mem_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_vrgfx_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_vrgfx_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_vrsoc_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_vrsoc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_vrmem_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_vrmem_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_temp_hbm_get(dv_ind, &temp_hbm_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_temp_hbm_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_socket_power_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_socket_power_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_energy_acc_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_energy_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_socket_power_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_socket_power_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_gfx_activity_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_gfx_activity_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_umc_activity_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_umc_activity_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_mm_activity_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_mm_activity_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_vcn_activity_get(dv_ind, &temp_vcn_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_vcn_activity_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_jpeg_activity_get(dv_ind, &temp_jpeg_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_jpeg_activity_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_mem_activity_acc_get(dv_ind, &val_ui32); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_mem_activity_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_gfx_activity_acc_get(dv_ind, &val_ui32); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_gfx_activity_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_gfx_clock_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_gfx_clock_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_soc_clock_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_soc_clock_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_uclock_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_uclock_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_vclock0_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_vclock0_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_dclock0_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_dclock0_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_vclock1_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_vclock1_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_avg_dclock1_frequency_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_avg_dclock1_frequency_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_vclk1_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_vclk1_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_dclk1_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_dclk1_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_uclk_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_uclk_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_dclk0_get(dv_ind, &temp_curr_dclk0_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_dclk0_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_gfxclk_get(dv_ind, &temp_curr_gfxclk_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_gfxclk_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_socclk_get(dv_ind, &temp_curr_socclk_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_socclk_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_vclk0_get(dv_ind, &temp_curr_vclk0_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_vclk0_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_indep_throttle_status_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_indep_throttle_status_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_throttle_status_get(dv_ind, &val_ui32); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_throttle_status_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_gfxclk_lock_status_get(dv_ind, &val_ui32); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_gfxclk_lock_status_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_curr_fan_speed_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_curr_fan_speed_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_link_width_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_link_width_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_link_speed_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_link_speed_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_bandwidth_acc_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_bandwidth_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_bandwidth_inst_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_bandwidth_inst_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_l0_recov_count_acc_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_l0_recov_count_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_replay_count_acc_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_replay_count_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_replay_rover_count_acc_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_replay_rover_count_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_nak_sent_count_acc_get(dv_ind, &val_ui32); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_nak_sent_count_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get(dv_ind, &val_ui32); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_pcie_nak_rcvd_count_acc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_xgmi_link_width_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_xgmi_link_width_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_xgmi_link_speed_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_xgmi_link_speed_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_xgmi_read_data_get(dv_ind, &temp_xgmi_read_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_xgmi_read_data_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_xgmi_write_data_get(dv_ind, &temp_xgmi_write_values); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_xgmi_write_data_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_volt_soc_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_volt_soc_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_volt_gfx_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_volt_gfx_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_volt_mem_get(dv_ind, &val_ui16); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_volt_mem_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_system_clock_counter_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_system_clock_counter_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); - for (int i=0; i < repeat; ++i) { - status_code = rsmi_dev_metrics_firmware_timestamp_get(dv_ind, &val_ui64); - } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); - if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ - skip = true; - } - if (!skip) { - std::cout << "\rsmi_dev_metrics_firmware_timestamp_get() execution time: " - << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * repeat); - } - skip = false; - std::cout << "----------------------------------------------------------------------------" << std::endl; - - start_api = std::chrono::high_resolution_clock::now(); for (int i=0; i < repeat; ++i) { status_code = rsmi_dev_metrics_xcd_counter_get(dv_ind, &val_ui16); } - stop_api = std::chrono::high_resolution_clock::now(); - duration_api = std::chrono::duration_cast(stop_api - start_api); + auto stop_api = std::chrono::high_resolution_clock::now(); + auto duration_api = std::chrono::duration_cast(stop_api - start_api); if (status_code != rsmi_status_t::RSMI_STATUS_SUCCESS){ skip = true; } @@ -1105,10 +189,9 @@ void TestMeasureApiExecutionTime::Run(void) { stop = std::chrono::high_resolution_clock::now(); duration = std::chrono::duration_cast(stop - start); if (!skip) { - const auto kTOTAL_GPU_METRICS_APIS = uint16_t(57); std::cout << "\rTotal execution time (All APIs): " << (float(duration_api.count()) / repeat) << " microseconds" << std::endl; - EXPECT_LT(duration_api.count(), 500 * (repeat * kTOTAL_GPU_METRICS_APIS)); + EXPECT_LT(duration_api.count(), (500 * repeat)); } skip = false; std::cout << "============================================================================" << std::endl; diff --git a/tests/rocm_smi_test/functional/sys_info_read.cc b/tests/rocm_smi_test/functional/sys_info_read.cc index d7d681f5c9..2f0508f5f2 100755 --- a/tests/rocm_smi_test/functional/sys_info_read.cc +++ b/tests/rocm_smi_test/functional/sys_info_read.cc @@ -206,12 +206,33 @@ void TestSysInfoRead::Run(void) { err = rsmi_dev_target_graphics_version_get(i, &val_ui64); IF_VERB(STANDARD) { - std::cout << "\t**Graphics Target version: " << std::dec + std::cout << "\t**Target GFX version: " << std::dec << val_ui64 << "\n"; } EXPECT_EQ(err, RSMI_STATUS_SUCCESS); EXPECT_NE(val_ui64, std::numeric_limits::max()); err = rsmi_dev_target_graphics_version_get(i, nullptr); EXPECT_EQ(err, RSMI_STATUS_INVALID_ARGS); + + err = rsmi_dev_guid_get(i, &val_ui64); + IF_VERB(STANDARD) { + std::cout << "\t**GUID: " << std::dec + << val_ui64 << "\n"; + } + EXPECT_EQ(err, RSMI_STATUS_SUCCESS); + EXPECT_NE(val_ui64, std::numeric_limits::max()); + err = rsmi_dev_guid_get(i, nullptr); + EXPECT_EQ(err, RSMI_STATUS_INVALID_ARGS); + + err = rsmi_dev_node_id_get(i, &val_ui32); + IF_VERB(STANDARD) { + std::cout << "\t**Node ID: " << std::dec + << val_ui32 << "\n"; + } + EXPECT_EQ(err, RSMI_STATUS_SUCCESS); + EXPECT_NE(val_ui32, std::numeric_limits::max()); + err = rsmi_dev_node_id_get(i, nullptr); + EXPECT_EQ(err, RSMI_STATUS_INVALID_ARGS); + } }