diff --git a/projects/rocm-smi-lib/docs/ROCm_SMI_Manual.pdf b/projects/rocm-smi-lib/docs/ROCm_SMI_Manual.pdf index b809f8b20e..954af57023 100644 Binary files a/projects/rocm-smi-lib/docs/ROCm_SMI_Manual.pdf and b/projects/rocm-smi-lib/docs/ROCm_SMI_Manual.pdf differ diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 0656591a6e..b18168011a 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -227,13 +227,23 @@ typedef enum { RSMI_GPU_BLOCK_SDMA, RSMI_GPU_BLOCK_GFX, - RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_GFX, + RSMI_GPU_BLOCK_LAST = RSMI_GPU_BLOCK_GFX } rsmi_gpu_block_t; /// \cond Ignore in docs. typedef rsmi_gpu_block_t rsmi_gpu_block; /// \endcond -typedef struct { +/** + * @brief Types of memory + */ +typedef enum { + RSMI_MEM_TYPE_FIRST = 0, + + RSMI_MEM_TYPE_VRAM = RSMI_MEM_TYPE_FIRST, //!< VRAM memory + RSMI_MEM_TYPE_VIS_VRAM, //!< VRAM memory that is visible + RSMI_MEM_TYPE_GTT, //!< GTT memory + + RSMI_MEM_TYPE_LAST = RSMI_MEM_TYPE_GTT } rsmi_memory_type_t; /** * @brief This values of this enum are used as frequency identifiers. @@ -420,7 +430,12 @@ typedef struct { uint64_t uncorrectable_err; //!< Accumulated uncorrectable errors } rsmi_error_count_t; - +/*****************************************************************************/ +/** @defgroup InitShutAdmin Initialization and Shutdown + * These functions are used for initialization of ROCm SMI and clean up when + * done. + * @{ + */ /** * @brief Initialize ROCm SMI. * @@ -441,6 +456,13 @@ rsmi_status_t rsmi_init(uint64_t init_flags); */ rsmi_status_t rsmi_shut_down(void); +/** @} */ // end of InitShut + +/*****************************************************************************/ +/** @defgroup IDQuer Identifier Queries + * These functions provide identification information. + * @{ + */ /** * @brief Get the number of devices that have monitor information. * @@ -455,6 +477,54 @@ rsmi_status_t rsmi_shut_down(void); */ rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices); +/** + * @brief Get the device id associated with the device with provided device + * index. + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t @p id, + * this function will write the device id value to the uint64_t pointed to by + * @p id. This ID is an identification of the type of device, so calling this + * function for different devices will give the same value if they are kind + * of device. Consequently, this function should not be used to distinguish + * one device from another. rsmi_dev_pci_id_get() should be used to get a + * unique identifier. + * + * @param[in] dv_ind a device index + * + * @param[inout] id a pointer to uint64_t to which the device id will be + * written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint64_t *id); + +/** + * @brief Get the name of a gpu device. + * + * @details Given a device index @p dv_ind, a pointer to a caller provided + * char buffer @p name, and a length of this buffer @p len, this function + * will write the name of the device (up to @p len characters) buffer @p name. + * + * @param[in] dv_ind a device index + * + * @param[inout] name a pointer to a caller provided char buffer to which the + * name will be written + * + * @param[in] len the length of the caller provided buffer @p name. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len); + +/** @} */ // end of IDQuer + +/*****************************************************************************/ +/** @defgroup PCIeQuer PCIe Queries + * These functions provide information about PCIe. + * @{ + */ /** * @brief Get the list of possible PCIe bandwidths that are available. * @@ -476,23 +546,54 @@ rsmi_status_t rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *bandwidth); /** - * @brief Get percentage of time device is busy doing any processing + * @brief Get the unique PCI device identifier associated for a device * - * @details Given a device index @p dv_ind, this function returns the - * percentage of time that the specified device is busy. The device is - * considered busy if any one or more of its sub-blocks are working, and idle - * if none of the sub-blocks are working. + * @details Give a device index @p dv_ind and a pointer to a uint64_t @p + * bdfid, this function will write the Bus/Device/Function PCI identifier + * (BDFID) associated with device @p dv_ind to the value pointed to by + * @p bdfid. * - * @param[in] dv_ind a device index + * @param[in] dv_ind a device index * - * @param[inout] busy_percent a pointer to the uint32_t to which the busy - * percent will be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call + * @param[inout] bdfid a pointer to uint64_t to which the device bdfid value + * will be written * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + + */ +rsmi_status_t rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid); + +/** + * @brief Get PCIe traffic information + * + * @details Give a device index @p dv_ind and pointers to a uint64_t's, @p + * sent, @p received and @p max_pkt_sz, this function will write the number + * of bytes sent and received in 1 second to @p sent and @p received, + * respectively. The maximum possible packet size will be written to + * @p max_pkt_sz. + * + * @param[in] dv_ind a device index + * + * @param[inout] sent a pointer to uint64_t to which the number of bytes sent + * will be written in 1 second. If pointer is NULL, it will be ignored. + * + * @param[inout] received a pointer to uint64_t to which the number of bytes + * received will be written. If pointer is NULL, it will be ignored. + * + * @param[inout] max_pkt_sz a pointer to uint64_t to which the maximum packet + * size will be written. If pointer is NULL, it will be ignored. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + */ +rsmi_status_t rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent, + uint64_t *received, uint64_t *max_pkt_sz); + +/** @} */ // end of PCIeQuer +/*****************************************************************************/ +/** @defgroup PCIeCont PCIe Control + * These functions provide some control over PCIe. + * @{ */ -rsmi_status_t -rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent); /** * @brief Control the set of allowed PCIe bandwidths that can be used. @@ -521,297 +622,141 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent); */ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask); -/** - * @brief Get the unique PCI device identifier associated for a device - * - * @details Give a device index @p dv_ind and a pointer to a uint64_t @p - * bdfid, this function will write the Bus/Device/Function PCI identifier - * (BDFID) associated with device @p dv_ind to the value pointed to by - * @p bdfid. - * - * @param[in] dv_ind a device index - * - * @param[inout] bdfid a pointer to uint64_t to which the device bdfid value - * will be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. +/** @} */ // end of PCIeCont +/*****************************************************************************/ +/** @defgroup PowerQuer Power Queries + * These functions provide information about power usage. + * @{ */ -rsmi_status_t rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid); - /** - * @brief Get PCIe traffic information - * - * @details Give a device index @p dv_ind and pointers to a uint64_t's, @p - * sent, @p received and @p max_pkt_sz, this function will write the number - * of bytes sent and received in 1 second to @p sent and @p received, - * respectively. The maximum possible packet size will be written to - * @max_pkt_sz. - * - * @param[in] dv_ind a device index - * - * @param[inout] sent a pointer to uint64_t to which the number of bytes sent - * will be written in 1 second. If pointer is NULL, it will be ignored. - * - * @param[inout] received a pointer to uint64_t to which the number of bytes - * received will be written. If pointer is NULL, it will be ignored. - * - * @param[inout] max_pkt_sz a pointer to uint64_t to which the maximum packet - * size will be written. If pointer is NULL, it will be ignored. - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - */ -rsmi_status_t rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent, - uint64_t *received, uint64_t *max_pkt_sz); - -/** - * @brief Get the device id associated with the device with provided device - * index. - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t @p id, - * this function will write the device id value to the uint64_t pointed to by - * @p id. This ID is an identification of the type of device, so calling this - * function for different devices will give the same value if they are kind - * of device. Consequently, this function should not be used to distinguish - * one device from another. rsmi_dev_pci_id_get() should be used to get a - * unique identifier. - * - * @param[in] dv_ind a device index - * - * @param[inout] id a pointer to uint64_t to which the device id will be - * written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint64_t *id); - - -/** - * @brief Get the performance level of the device with provided + * @brief Get the average power consumption of the device with provided * device index. * - * @details Given a device index @p dv_ind and a pointer to a uint32_t @p - * perf, this function will write the ::rsmi_dev_perf_level_t to the uint32_t - * pointed to by @p perf - * - * @param[in] dv_ind a device index - * - * @param[inout] perf a pointer to ::rsmi_dev_perf_level_t to which the - * performance level will be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, - rsmi_dev_perf_level_t *perf); - -/** - * @brief Set the PowerPlay performance level associated with the device with - * provided device index with the provided value. - * - * @details Given a device index @p dv_ind and an ::rsmi_dev_perf_level_t @p - * perf_level, this function will set the PowerPlay performance level for the - * device to the value @p perf_lvl. - * - * @param[in] dv_ind a device index - * - * @param[in] perf_lvl the value to which the performance level should be set - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t -rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl); - -/** - * @brief Get the overdrive percent associated with the device with provided - * device index. - * - * @details Given a device index @p dv_ind and a pointer to a uint32_t @p od, - * this function will write the overdrive percentage to the uint32_t pointed - * to by @p od - * - * @param[in] dv_ind a device index - * - * @param[inout] od a pointer to uint32_t to which the overdrive percentage - * will be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od); - - -/** - * @brief Set the overdrive percent associated with the device with provided - * device index with the provided value. See details for WARNING. - * - * @details Given a device index @p dv_ind and an overdrive level @p od, - * this function will set the overdrive level for the device to the value - * @p od. The overdrive level is an integer value between 0 and 20, inclusive, - * which represents the overdrive percentage; e.g., a value of 5 specifies - * an overclocking of 5%. - * - * The overdrive level is specific to the gpu system clock. - * - * The overdrive level is the percentage above the maximum Performance Level - * to which overclocking will be limited. The overclocking percentage does - * not apply to clock speeds other than the maximum. This percentage is - * limited to 20%. - * - * ******WARNING****** - * Operating your AMD GPU outside of official AMD specifications or outside of - * factory settings, including but not limited to the conducting of - * overclocking (including use of this overclocking software, even if such - * software has been directly or indirectly provided by AMD or otherwise - * affiliated in any way with AMD), may cause damage to your AMD GPU, system - * components and/or result in system failure, as well as cause other problems. - * DAMAGES CAUSED BY USE OF YOUR AMD GPU OUTSIDE OF OFFICIAL AMD SPECIFICATIONS - * OR OUTSIDE OF FACTORY SETTINGS ARE NOT COVERED UNDER ANY AMD PRODUCT - * WARRANTY AND MAY NOT BE COVERED BY YOUR BOARD OR SYSTEM MANUFACTURER'S - * WARRANTY. Please use this utility with caution. - * - * @param[in] dv_ind a device index - * - * @param[in] od the value to which the overdrive level should be set - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od); - -/** - * @brief Get the list of possible system clock speeds of device for a - * specified clock type. - * - * @details Given a device index @p dv_ind, a clock type @p clk_type, and a - * pointer to a to an ::rsmi_frequencies_t structure @p f, this function will - * fill in @p f with the possible clock speeds, and indication of the current - * clock speed selection. - * - * @param[in] dv_ind a device index - * - * @param[in] clk_type the type of clock for which the frequency is desired - * - * @param[inout] f a pointer to a caller provided ::rsmi_frequencies_t structure - * to which the frequency information will be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, - rsmi_clk_type_t clk_type, rsmi_frequencies_t *f); - -/** - * @brief Control the set of allowed frequencies that can be used for the - * specified clock. - * - * @details Given a device index @p dv_ind, a clock type @p clk_type, and a - * 64 bit bitmask @p freq_bitmask, this function will limit the set of - * allowable frequencies. If a bit in @p freq_bitmask has a value of 1, then - * the frequency (as ordered in an ::rsmi_frequencies_t returned by - * rsmi_dev_gpu_clk_freq_get()) corresponding to that bit index will be - * allowed. - * - * This function will change the performance level to - * ::RSMI_DEV_PERF_LEVEL_MANUAL in order to modify the set of allowable - * frequencies. Caller will need to set to ::RSMI_DEV_PERF_LEVEL_AUTO in order - * to get back to default state. - * - * All bits with indices greater than or equal to - * ::rsmi_frequencies_t::num_supported will be ignored. - * - * @param[in] dv_ind a device index - * - * @param[in] clk_type the type of clock for which the set of frequencies - * will be modified - * - * @param[in] freq_bitmask A bitmask indicating the indices of the - * frequencies that are to be enabled (1) and disabled (0). Only the lowest - * ::rsmi_frequencies_t.num_supported bits of this mask are relevant. - */ -rsmi_status_t rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, - rsmi_clk_type_t clk_type, uint64_t freq_bitmask); -/** - * @brief Get the name of a gpu device. - * - * @details Given a device index @p dv_ind, a pointer to a caller provided - * char buffer @p name, and a length of this buffer @p len, this function - * will write the name of the device (up to @p len characters) buffer @p name. - * - * @param[in] dv_ind a device index - * - * @param[inout] name a pointer to a caller provided char buffer to which the - * name will be written - * - * @param[in] len the length of the caller provided buffer @p name. - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len); - -/** - * @brief Get the temperature metric value for the specified metric, from the - * specified temperature sensor on the specified device. - * - * @details Given a device index @p dv_ind, a 0-based sensor index - * @p sensor_ind, a ::rsmi_temperature_metric_t @p metric and a pointer to an - * int64_t @p temperature, this function will write the value of the metric - * indicated by @p metric to the memory location @p temperature. + * @details Given a device index @p dv_ind and a pointer to a uint64_t + * @p power, this function will write the current average power consumption to + * the uint64_t in microwatts pointed to by @p power. This function requires + * root privilege. * * @param[in] dv_ind a device index * * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. * If a device has more than one sensor, it could be greater than 0. * - * @param[in] metric enum indicated which temperature value should be - * retrieved - * - * @param[inout] temperature a pointer to int64_t to which the temperature - * will be written, in millidegrees Celcius. + * @param[inout] power a pointer to uint64_t to which the average power + * consumption will be written * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. * */ -rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind, - rsmi_temperature_metric_t metric, int64_t *temperature); +rsmi_status_t +rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power); /** - * @brief Retrieve the error counts for a GPU block - * - * @details Given a device index @p dv_ind, an ::rsmi_gpu_block_t @p block and a - * pointer to an ::rsmi_error_count_t @p ec, this function will write the error - * count values for the GPU block indicated by @p block to memory pointed to by - * @p ec. - * - * @param[in] dv_ind a device index - * - * @param[in] block The block for which error counts should be retrieved - * - * @param[inout] ec A pointer to an ::rsmi_error_count_t to which the error - * counts should be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t rsmi_dev_error_count_get(uint32_t dv_ind, - rsmi_gpu_block_t block, rsmi_error_count_t *ec); -/** - * @brief Reset the fan to automatic driver control + * @brief Get the cap on power which, when reached, causes the system to take + * action to reduce power. * - * @details This function returns control of the fan to the system + * @details When power use rises above the value @p power, the system will + * take action to reduce power use. The power level returned through + * @p power will be in microWatts. + * + * @param[in] dv_ind a device index + * + * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. + * If a device has more than one sensor, it could be greater than 0. + * + * @param[inout] cap a pointer to a uint64_t that indicates the power cap, + * in microwatts + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t +rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap); + +/** + * @brief Get the range of valid values for the power cap + * + * @details This function will return the maximum possible valid power cap + * @p max and the minimum possible valid power cap @p min + * + * @param[in] dv_ind a device index + * + * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. + * If a device has more than one sensor, it could be greater than 0. + * + * @param[inout] max a pointer to a uint64_t that indicates the maximum + * possible power cap, in microwatts + * + * @param[inout] min a pointer to a uint64_t that indicates the minimum + * possible power cap, in microwatts + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t +rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind, + uint64_t *max, uint64_t *min); + +/** @} */ // end of PowerQuer + +/*****************************************************************************/ +/** @defgroup PowerCont Power Control + * These functions provide ways to control power usage. + * @{ + */ +/** + * @brief Set the power cap value + * + * @details This function will set the power cap to the provided value @p cap. + * @p cap must be between the minimum and maximum power cap values set by the + * system, which can be obtained from ::rsmi_dev_power_cap_range_get. + * + * @param[in] dv_ind a device index + * + * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. + * If a device has more than one sensor, it could be greater than 0. + * + * @param[inout] cap a uint64_t that indicates the desired power cap, in + * microwatts + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t +rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap); + +/** + * @brief Set the power profile + * + * @details Given a device index @p dv_ind, a sensor index sensor_ind, and a + * @p profile, this function will attempt to set the current profile to the + * provided profile. The provided profile must be one of the currently + * supported profiles, as indicated by a call to + * ::rsmi_dev_power_profile_presets_get() * * @param[in] dv_ind a device index * * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. * If a device has more than one sensor, it could be greater than 0. * + * @param[in] profile a ::rsmi_power_profile_preset_masks_t that hold the mask + * of the desired new power profile + * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * */ -rsmi_status_t rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind); +rsmi_status_t +rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind, + rsmi_power_profile_preset_masks_t profile); +/** @} */ // end of PowerCont +/*****************************************************************************/ +/** @defgroup PhysQuer Physcial State Queries + * These functions provide information about the physical characteristics of + * the device. + * @{ + */ /** * @brief Get the fan speed in RPMs of the device with the specified device * index and 0-based sensor index. @@ -881,6 +826,52 @@ rsmi_status_t rsmi_dev_fan_speed_get(uint32_t dv_ind, rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed); +/** + * @brief Get the temperature metric value for the specified metric, from the + * specified temperature sensor on the specified device. + * + * @details Given a device index @p dv_ind, a 0-based sensor index + * @p sensor_ind, a ::rsmi_temperature_metric_t @p metric and a pointer to an + * int64_t @p temperature, this function will write the value of the metric + * indicated by @p metric to the memory location @p temperature. + * + * @param[in] dv_ind a device index + * + * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. + * If a device has more than one sensor, it could be greater than 0. + * + * @param[in] metric enum indicated which temperature value should be + * retrieved + * + * @param[inout] temperature a pointer to int64_t to which the temperature + * will be written, in millidegrees Celcius. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind, + rsmi_temperature_metric_t metric, int64_t *temperature); +/** @} */ // end of PhysQuer + +/*****************************************************************************/ +/** @defgroup PhysCont Physcial State Control + * These functions provide control over the physical state of a device. + * @{ + */ +/** + * @brief Reset the fan to automatic driver control + * + * @details This function returns control of the fan to the system + * + * @param[in] dv_ind a device index + * + * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. + * If a device has more than one sensor, it could be greater than 0. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + */ +rsmi_status_t rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind); + /** * @brief Set the fan speed for the specified device with the provided speed, * in RPMs. @@ -902,6 +893,92 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind, rsmi_status_t rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed); +/** @} */ // end of PhysCont +/*****************************************************************************/ +/** @defgroup PerfQuer Clock, Power and Performance Queries + * These functions provide information about clock frequencies and + * performance. + * @{ + */ + +/** + * @brief Get percentage of time device is busy doing any processing + * + * @details Given a device index @p dv_ind, this function returns the + * percentage of time that the specified device is busy. The device is + * considered busy if any one or more of its sub-blocks are working, and idle + * if none of the sub-blocks are working. + * + * @param[in] dv_ind a device index + * + * @param[inout] busy_percent a pointer to the uint32_t to which the busy + * percent will be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call + * + */ +rsmi_status_t +rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent); + +/** + * @brief Get the performance level of the device with provided + * device index. + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t @p + * perf, this function will write the ::rsmi_dev_perf_level_t to the uint32_t + * pointed to by @p perf + * + * @param[in] dv_ind a device index + * + * @param[inout] perf a pointer to ::rsmi_dev_perf_level_t to which the + * performance level will be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, + rsmi_dev_perf_level_t *perf); + +/** + * @brief Get the overdrive percent associated with the device with provided + * device index. + * + * @details Given a device index @p dv_ind and a pointer to a uint32_t @p od, + * this function will write the overdrive percentage to the uint32_t pointed + * to by @p od + * + * @param[in] dv_ind a device index + * + * @param[inout] od a pointer to uint32_t to which the overdrive percentage + * will be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od); + +/** + * @brief Get the list of possible system clock speeds of device for a + * specified clock type. + * + * @details Given a device index @p dv_ind, a clock type @p clk_type, and a + * pointer to a to an ::rsmi_frequencies_t structure @p f, this function will + * fill in @p f with the possible clock speeds, and indication of the current + * clock speed selection. + * + * @param[in] dv_ind a device index + * + * @param[in] clk_type the type of clock for which the frequency is desired + * + * @param[inout] f a pointer to a caller provided ::rsmi_frequencies_t structure + * to which the frequency information will be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, + rsmi_clk_type_t clk_type, rsmi_frequencies_t *f); + /** * @brief This function retrieves the voltage/frequency curve information * @@ -918,25 +995,6 @@ rsmi_status_t rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv); -/** - * @brief Set the frequency limits for the specified clock - * - * @details Given a device index @p dv_ind, a clock type (::rsmi_clk_type_t) - * @p clk, and a pointer to a ::rsmi_range_t @p range containing the desired - * upper and lower frequency limits, this function will attempt to set the - * frequency limits to those specified in @p range. - * - * @param[in] dv_ind a device index - * - * @param[in] clk The clock type for which the limits should be imposed. - * - * @param[in] range A pointer to the ::rsmi_range_t containing the desired limits - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - */ -rsmi_status_t rsmi_dev_od_freq_range_set(uint32_t dv_ind, rsmi_clk_type_t clk, - rsmi_range_t *range); - /** * @brief This function will retrieve the current valid regions in the * frequency/voltage space. @@ -967,97 +1025,6 @@ rsmi_status_t rsmi_dev_od_freq_range_set(uint32_t dv_ind, rsmi_clk_type_t clk, rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind, uint32_t *num_regions, rsmi_freq_volt_region_t *buffer); -/** - * @brief Get the average power consumption of the device with provided - * device index. - * - * @details Given a device index @p dv_ind and a pointer to a uint64_t - * @p power, this function will write the current average power consumption to - * the uint64_t in microwatts pointed to by @p power. This function requires - * root privilege. - * - * @param[in] dv_ind a device index - * - * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. - * If a device has more than one sensor, it could be greater than 0. - * - * @param[inout] power a pointer to uint64_t to which the average power - * consumption will be written - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t -rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power); - -/** - * @brief Get the cap on power which, when reached, causes the system to take - * action to reduce power. - * - * @details When power use rises above the value @p power, the system will - * take action to reduce power use. The power level returned through - * @p power will be in microWatts. - * - * @param[in] dv_ind a device index - * - * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. - * If a device has more than one sensor, it could be greater than 0. - * - * @param[inout] cap a pointer to a uint64_t that indicates the power cap, - * in microwatts - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t -rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap); - -/** - * @brief Get the range of valid values for the power cap - * - * @details This function will return the maximum possible valid power cap - * @p max and the minimum possible valid power cap @p min - * - * @param[in] dv_ind a device index - * - * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. - * If a device has more than one sensor, it could be greater than 0. - * - * @param[inout] max a pointer to a uint64_t that indicates the maximum - * possible power cap, in microwatts - * - * @param[inout] min a pointer to a uint64_t that indicates the minimum - * possible power cap, in microwatts - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t -rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind, - uint64_t *max, uint64_t *min); - -/** - * @brief Set the power cap value - * - * @details This function will set the power cap to the provided value @p cap. - * @p cap must be between the minimum and maximum power cap values set by the - * system, which can be obtained from ::rsmi_dev_power_cap_range_get. - * - * @param[in] dv_ind a device index - * - * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. - * If a device has more than one sensor, it could be greater than 0. - * - * @param[inout] cap a uint64_t that indicates the desired power cap, in - * microwatts - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. - * - */ -rsmi_status_t -rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap); - - /** * @brief Get the list of available preset power profiles and an indication of * which profile is currently active. @@ -1090,29 +1057,143 @@ rsmi_status_t rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_power_profile_status_t *status); +/** @} */ // end of PerfQuer +/*****************************************************************************/ + +/** @defgroup PerfCont Clock, Power and Performance Control + * These functions provide control over clock frequencies, power and + * performance. + * @{ + */ /** - * @brief Set the power profile + * @brief Set the PowerPlay performance level associated with the device with + * provided device index with the provided value. * - * @details Given a device index @p dv_ind, a sensor index sensor_ind, and a - * @p profile, this function will attempt to set the current profile to the - * provided profile. The provided profile must be one of the currently - * supported profiles, as indicated by a call to - * ::rsmi_dev_power_profile_presets_get() + * @details Given a device index @p dv_ind and an ::rsmi_dev_perf_level_t @p + * perf_level, this function will set the PowerPlay performance level for the + * device to the value @p perf_lvl. * - * @param[in] dv_ind a device index + * @param[in] dv_ind a device index * - * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. - * If a device has more than one sensor, it could be greater than 0. + * @param[in] perf_lvl the value to which the performance level should be set * - * @param[in] profile a ::rsmi_power_profile_preset_masks_t that hold the mask - * of the desired new power profile - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. * */ rsmi_status_t -rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind, - rsmi_power_profile_preset_masks_t profile); +rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl); + +/** + * @brief Set the overdrive percent associated with the device with provided + * device index with the provided value. See details for WARNING. + * + * @details Given a device index @p dv_ind and an overdrive level @p od, + * this function will set the overdrive level for the device to the value + * @p od. The overdrive level is an integer value between 0 and 20, inclusive, + * which represents the overdrive percentage; e.g., a value of 5 specifies + * an overclocking of 5%. + * + * The overdrive level is specific to the gpu system clock. + * + * The overdrive level is the percentage above the maximum Performance Level + * to which overclocking will be limited. The overclocking percentage does + * not apply to clock speeds other than the maximum. This percentage is + * limited to 20%. + * + * ******WARNING****** + * Operating your AMD GPU outside of official AMD specifications or outside of + * factory settings, including but not limited to the conducting of + * overclocking (including use of this overclocking software, even if such + * software has been directly or indirectly provided by AMD or otherwise + * affiliated in any way with AMD), may cause damage to your AMD GPU, system + * components and/or result in system failure, as well as cause other problems. + * DAMAGES CAUSED BY USE OF YOUR AMD GPU OUTSIDE OF OFFICIAL AMD SPECIFICATIONS + * OR OUTSIDE OF FACTORY SETTINGS ARE NOT COVERED UNDER ANY AMD PRODUCT + * WARRANTY AND MAY NOT BE COVERED BY YOUR BOARD OR SYSTEM MANUFACTURER'S + * WARRANTY. Please use this utility with caution. + * + * @param[in] dv_ind a device index + * + * @param[in] od the value to which the overdrive level should be set + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od); + +/** + * @brief Control the set of allowed frequencies that can be used for the + * specified clock. + * + * @details Given a device index @p dv_ind, a clock type @p clk_type, and a + * 64 bit bitmask @p freq_bitmask, this function will limit the set of + * allowable frequencies. If a bit in @p freq_bitmask has a value of 1, then + * the frequency (as ordered in an ::rsmi_frequencies_t returned by + * rsmi_dev_gpu_clk_freq_get()) corresponding to that bit index will be + * allowed. + * + * This function will change the performance level to + * ::RSMI_DEV_PERF_LEVEL_MANUAL in order to modify the set of allowable + * frequencies. Caller will need to set to ::RSMI_DEV_PERF_LEVEL_AUTO in order + * to get back to default state. + * + * All bits with indices greater than or equal to + * ::rsmi_frequencies_t::num_supported will be ignored. + * + * @param[in] dv_ind a device index + * + * @param[in] clk_type the type of clock for which the set of frequencies + * will be modified + * + * @param[in] freq_bitmask A bitmask indicating the indices of the + * frequencies that are to be enabled (1) and disabled (0). Only the lowest + * ::rsmi_frequencies_t.num_supported bits of this mask are relevant. + */ +rsmi_status_t rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, + rsmi_clk_type_t clk_type, uint64_t freq_bitmask); + +/** + * @brief Set the frequency limits for the specified clock + * + * @details Given a device index @p dv_ind, a clock type (::rsmi_clk_type_t) + * @p clk, and a pointer to a ::rsmi_range_t @p range containing the desired + * upper and lower frequency limits, this function will attempt to set the + * frequency limits to those specified in @p range. + * + * @param[in] dv_ind a device index + * + * @param[in] clk The clock type for which the limits should be imposed. + * + * @param[in] range A pointer to the ::rsmi_range_t containing the desired limits + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + */ +rsmi_status_t rsmi_dev_od_freq_range_set(uint32_t dv_ind, rsmi_clk_type_t clk, + rsmi_range_t *range); + +/** @} */ // end of PerfCont + +/*****************************************************************************/ +/** @defgroup VersQuer Version Queries + * These functions provide version information about various subsystems. + * @{ + */ + +/** + * @brief Get the build version information for the currently running build of + * RSMI. + * + * @details Get the major, minor, patch and build string for RSMI build + * currently in use through @p version + * + * @param[inout] version A pointer to an ::rsmi_version_t structure that will + * be updated with the version information upon return. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call + * + */ +rsmi_status_t +rsmi_version_get(rsmi_version_t *version); /** * @brief Get the VBIOS identifer string @@ -1136,6 +1217,36 @@ rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len); +/** @} */ // end of VersQuer + +/*****************************************************************************/ +/** @defgroup ErrQuer Error Queries + * These functions provide error information about RSMI calls as well as + * device errors. + * @{ + */ + +/** + * @brief Retrieve the error counts for a GPU block + * + * @details Given a device index @p dv_ind, an ::rsmi_gpu_block_t @p block and a + * pointer to an ::rsmi_error_count_t @p ec, this function will write the error + * count values for the GPU block indicated by @p block to memory pointed to by + * @p ec. + * + * @param[in] dv_ind a device index + * + * @param[in] block The block for which error counts should be retrieved + * + * @param[inout] ec A pointer to an ::rsmi_error_count_t to which the error + * counts should be written + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t rsmi_dev_error_count_get(uint32_t dv_ind, + rsmi_gpu_block_t block, rsmi_error_count_t *ec); + /** * @brief Get a description of a provided RSMI error status * @@ -1153,21 +1264,7 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len); rsmi_status_t rsmi_status_string(rsmi_status_t status, const char **status_string); -/** - * @brief Get the build version information for the currently running build of - * RSMI. - * - * @details Get the major, minor, patch and build string for RSMI build - * currently in use through @p version - * - * @param[inout] version A pointer to an ::rsmi_version_t structure that will - * be updated with the version information upon return. - * - * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call - * - */ -rsmi_status_t -rsmi_version_get(rsmi_version_t *version); +/** @} */ // end of ErrQuer #ifdef __cplusplus } diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/err_cnt_read.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/err_cnt_read.cc index cde2f6de85..15b90da948 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/err_cnt_read.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/err_cnt_read.cc @@ -85,16 +85,15 @@ void TestErrCntRead::Close() { void TestErrCntRead::Run(void) { rsmi_status_t err; rsmi_error_count_t ec; - - TestBase::Run(); + TestBase::Run(); for (uint32_t i = 0; i < num_monitor_devs(); ++i) { PrintDeviceHeader(i); for (uint32_t b = RSMI_GPU_BLOCK_FIRST; b <= RSMI_GPU_BLOCK_LAST; ++b) { err = rsmi_dev_error_count_get(i, static_cast(b), &ec); - + if (err == RSMI_STATUS_NOT_SUPPORTED) { std::cout << "\t**Error Count for " << GetBlockNameStr(static_cast(b)) << diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/perf_level_read_write.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/perf_level_read_write.cc index d36b486103..26aa43dfe2 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/functional/perf_level_read_write.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/functional/perf_level_read_write.cc @@ -54,7 +54,8 @@ #include "rocm_smi_test/functional/perf_level_read_write.h" #include "rocm_smi_test/test_common.h" -static const std::map kDevPerfLvlNameMap = { +static const std::map + kDevPerfLvlNameMap = { {RSMI_DEV_PERF_LEVEL_AUTO, "RSMI_DEV_PERF_LEVEL_AUTO"}, {RSMI_DEV_PERF_LEVEL_LOW, "RSMI_DEV_PERF_LEVEL_LOW"}, {RSMI_DEV_PERF_LEVEL_HIGH, "RSMI_DEV_PERF_LEVEL_HIGH"}, @@ -129,7 +130,7 @@ void TestPerfLevelReadWrite::Run(void) { " ..." << std::endl; } ret = rsmi_dev_perf_level_set(dv_ind, - static_cast(pfl_i)); + static_cast(pfl_i)); CHK_ERR_ASRT(ret) ret = rsmi_dev_perf_level_get(dv_ind, &pfl); CHK_ERR_ASRT(ret)