diff --git a/docs/README.md b/docs/README.md index 17fa6c35d2..38b56a9aed 100755 --- a/docs/README.md +++ b/docs/README.md @@ -3,6 +3,11 @@ The ROCm System Management Interface Library, or ROCm SMI library, is part of the Radeon Open Compute [ROCm](https://github.com/RadeonOpenCompute) software stack . It is a C library for Linux that provides a user space interface for applications to monitor and control GPU applications. +# Important note about Versioning and Backward Compatibility +The ROCm SMI library is currently under development, and therefore subject to change either at the ABI or API level. The intention is to keep the API as stable as possible even while in development, but in some cases we may need to break backwards compatibility in order to ensure future stability and usability. Following [Semantic Versioning](https://semver.org/) rules, while the ROCm SMI library is in high state of change, the major version will remain 0, and backward compatibility is not ensured. + +Once new development has leveled off, the major version will become greater than 0, and backward compatibility will be enforced between major versions. + # Building ROCm SMI #### Additional Required software for building @@ -64,7 +69,7 @@ int main() { ret = rsmi_num_monitor_devices(&num_devices); for (int i=0; i < num_devices; ++i) { - ret = rsmi_dev_id_get(i &dev_id); + ret = rsmi_dev_id_get(i, &dev_id); // dev_id holds the device ID of device i, upon a // successful call } diff --git a/docs/ROCm_SMI_Intro.pdf b/docs/ROCm_SMI_Manual.pdf similarity index 99% rename from docs/ROCm_SMI_Intro.pdf rename to docs/ROCm_SMI_Manual.pdf index 0668762d6f..b809f8b20e 100644 Binary files a/docs/ROCm_SMI_Intro.pdf and b/docs/ROCm_SMI_Manual.pdf differ diff --git a/include/rocm_smi/rocm_smi.h b/include/rocm_smi/rocm_smi.h index c5586e7628..ceadd20324 100755 --- a/include/rocm_smi/rocm_smi.h +++ b/include/rocm_smi/rocm_smi.h @@ -47,6 +47,9 @@ #ifdef __cplusplus extern "C" { +#include +#else +#include #endif // __cplusplus #include @@ -57,10 +60,8 @@ extern "C" { * All required function, structure, enum, etc. definitions should be defined * in this file. * - * @unstable The rocm_smi library api is new, and therefore subject to change - * either at the ABI or API level. Once committed, every effort will be made - * to not break backward compatibility, but it may not be achieveable in some - * cases. Instead of marking every function prototype as "unstable", we are + * @brief The rocm_smi library api is new, and therefore subject to change + * either at the ABI or API level. Instead of marking every function prototype as "unstable", we are * instead saying the API is unstable (i.e., changes are possible) while the * major version remains 0. This means that if the API/ABI changes, we will * not increment the major version to 1. Once the ABI stabilizes, we will @@ -185,8 +186,8 @@ typedef enum { /** * @brief Pre-set Profile Selections. These bitmasks can be AND'd with the - * rsmi_power_profile_status::available_profiles returned from - * rsmi_dev_power_profile_presets_get() to determine which power profiles + * ::rsmi_power_profile_status.available_profiles returned from + * ::rsmi_dev_power_profile_presets_get() to determine which power profiles * are supported by the system. */ typedef enum { @@ -303,14 +304,26 @@ typedef struct { } rsmi_od_vddc_point; /** - * @brief This structure holds 2 ::rsmi_od_vddc_point's, representing the - * diagonal corners of a rectangular region in freq-voltage space. + * @brief This structure holds 2 ::rsmi_range's, one for frequency and one for + * voltage. These 2 ranges indicate the range of possible values for the + * corresponding ::rsmi_od_vddc_point. */ typedef struct { - rsmi_od_vddc_point min_corner; //!< The "lower-left" corner of rectangle - rsmi_od_vddc_point max_corner; //!< The "upper-right" corner of rectangle + rsmi_range freq_range; //!< The frequency range for this VDDC Curve point + rsmi_range volt_range; //!< The voltage range for this VDDC Curve point } rsmi_freq_volt_region; +/** + * ::RSMI_NUM_VOLTAGE_CURVE_POINTS number of ::rsmi_od_vddc_point's + */ +typedef struct { + /** + * Array of ::RSMI_NUM_VOLTAGE_CURVE_POINTS ::rsmi_od_vddc_point's that + * make up the voltage frequency curve points. + */ + rsmi_od_vddc_point vc_points[RSMI_NUM_VOLTAGE_CURVE_POINTS]; +} rsmi_od_volt_curve; + /** * @brief This structure holds the frequency-voltage values for a device. */ @@ -324,7 +337,7 @@ typedef struct { /** * @brief The current voltage curve */ - rsmi_od_vddc_point curve[RSMI_NUM_VOLTAGE_CURVE_POINTS]; + rsmi_od_volt_curve curve; uint32_t num_regions; //!< The number of voltage curve regions } rsmi_od_volt_freq_data; @@ -338,7 +351,7 @@ typedef enum { } rsmi_freq_ind; /** - * @brief Initialize Rocm SMI. + * @brief Initialize ROCm SMI. * * @details When called, this initializes internal data structures, * including those corresponding to sources of information that SMI provides. @@ -351,7 +364,7 @@ typedef enum { rsmi_status_t rsmi_init(uint64_t init_flags); /** - * @brief Shutdown Rocm SMI. + * @brief Shutdown ROCm SMI. * * @details Do any necessary clean up. */ @@ -372,17 +385,18 @@ rsmi_status_t rsmi_shut_down(void); rsmi_status_t rsmi_num_monitor_devices(uint32_t *num_devices); /** - * @brief Get the list of possible pci bandwidths that are available. + * @brief Get the list of possible PCIe bandwidths that are available. * * @details Given a device index @p dv_ind and a pointer to a to an - * rsmi_pcie_bandwidth structure @p bandwidth, this function will fill in + * ::rsmi_pcie_bandwidth structure @p bandwidth, this function will fill in * @p bandwidth with the possible T/s values and associated number of lanes, * and indication of the current selection. * * @param[in] dv_ind a device index * - * @param[inout] bandwidth a pointer to a caller provided rsmi_pcie_bandwidth - * structure to which the frequency information will be written + * @param[inout] bandwidth a pointer to a caller provided + * ::rsmi_pcie_bandwidth structure to which the frequency information will be + * written * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. * @@ -415,7 +429,7 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent); * @details Given a device index @p dv_ind and a 64 bit bitmask @p bw_bitmask, * this function will limit the set of allowable bandwidths. If a bit in @p * bw_bitmask has a value of 1, then the frequency (as ordered in an - * ::rsmi_frequencies returned by rsmi_dev_get_gpu_clk_freq()) corresponding + * ::rsmi_frequencies returned by ::rsmi_dev_gpu_clk_freq_get()) corresponding * to that bit index will be allowed. * * This function will change the performance level to @@ -423,15 +437,16 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent); * band_widths. Caller will need to set to ::RSMI_DEV_PERF_LEVEL_AUTO in order * to get back to default state. * - * All bits with indices greater than or equal to - * rsmi_pcie_bandwidth.transfer_rate.num_supported will be ignored. + * All bits with indices greater than or equal to the value of the + * ::rsmi_frequencies::num_supported field of ::rsmi_pcie_bandwidth will be + * ignored. * * @param[in] dv_ind a device index * * @param[in] bw_bitmask A bitmask indicating the indices of the * bandwidths that are to be enabled (1) and disabled (0). Only the lowest - * rsmi_pcie_bandwidth.transfer_rate.num_supported bits of this mask are - * relevant. + * ::rsmi_frequencies::num_supported (of ::rsmi_pcie_bandwidth) bits of + * this mask are relevant. */ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask); @@ -499,7 +514,7 @@ rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, * @brief Set the PowerPlay performance level associated with the device with * provided device index with the provided value. * - * @details Given a device index @p dv_ind and an rsmi_dev_perf_lvl @p + * @details Given a device index @p dv_ind and an ::rsmi_dev_perf_level @p * perf_level, this function will set the PowerPlay performance level for the * device to the value @p perf_lvl. * @@ -600,7 +615,7 @@ rsmi_status_t rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, * 64 bit bitmask @p freq_bitmask, this function will limit the set of * allowable frequencies. If a bit in @p freq_bitmask has a value of 1, then * the frequency (as ordered in an ::rsmi_frequencies returned by - * rsmi_dev_get_gpu_clk_freq()) corresponding to that bit index will be + * rsmi_dev_gpu_clk_freq_get()) corresponding to that bit index will be * allowed. * * This function will change the performance level to @@ -642,7 +657,7 @@ rsmi_status_t rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len); /** - * @brief Get the temperature metric value for the specifed metric, from the + * @brief Get the temperature metric value for the specified metric, from the * specified temperature sensor on the specified device. * * @details Given a device index @p dv_ind, a 0-based sensor index @@ -700,7 +715,7 @@ rsmi_status_t rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed); /** - * @brief Get the fan speed for the specfied device in RPMs. + * @brief Get the fan speed for the specified device in RPMs. * * @details Given a device index @p dv_ind * this function will get the fan speed. @@ -747,7 +762,7 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *max_speed); /** - * @brief Set the fan speed for the specfied device with the provided speed, + * @brief Set the fan speed for the specified device with the provided speed, * in RPMs. * * @details Given a device index @p dv_ind and a integer value indicating @@ -783,6 +798,25 @@ rsmi_status_t rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data *odv); +/** + * @brief Set the frequency limits for the specified clock + * + * @details Given a device index @p dv_ind, a clock type (::rsmi_clk_type) + * @p clk, and a pointer to a ::rsmi_range @p range containing the desired + * upper and lower frequency limits, this function will attempt to set the + * frequency limits to those specified in @p range. + * + * @param[in] dv_ind a device index + * + * @param[in] clk The clock type for which the limits should be imposed. + * + * @param[in] range A pointer to the ::rsmi_range containing the desired limits + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + */ +rsmi_status_t rsmi_dev_od_freq_range_set(uint32_t dv_ind, rsmi_clk_type clk, + rsmi_range *range); + /** * @brief This function will retrieve the current valid regions in the * frequency/voltage space. @@ -909,24 +943,24 @@ rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap); * which profile is currently active. * * @details Given a device index @p dv_ind and a pointer to a - * rsmi_power_profile_status @p status, this function will set the bits of - * the rsmi_power_profile_status.available_profiles bit field of @p status to + * ::rsmi_power_profile_status @p status, this function will set the bits of + * the ::rsmi_power_profile_status.available_profiles bit field of @p status to * 1 if the profile corresponding to the respective - * rsmi_power_profile_preset_masks profiles are enabled. For example, if both + * ::rsmi_power_profile_preset_masks profiles are enabled. For example, if both * the VIDEO and VR power profiles are available selections, then - * RSMI_PWR_PROF_PRST_VIDEO_MASK AND'ed with - * rsmi_power_profile_status.available_profiles will be non-zero as will - * RSMI_PWR_PROF_PRST_VR_MASK AND'ed with - * rsmi_power_profile_status.available_profiles. Additionally, - * rsmi_power_profile_status.current will be set to the - * rsmi_power_profile_preset_masks of the profile that is currently active. + * ::RSMI_PWR_PROF_PRST_VIDEO_MASK AND'ed with + * ::rsmi_power_profile_status.available_profiles will be non-zero as will + * ::RSMI_PWR_PROF_PRST_VR_MASK AND'ed with + * ::rsmi_power_profile_status.available_profiles. Additionally, + * ::rsmi_power_profile_status.current will be set to the + * ::rsmi_power_profile_preset_masks of the profile that is currently active. * * @param[in] dv_ind a device index * * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. * If a device has more than one sensor, it could be greater than 0. * - * @param[inout] status a pointer to rsmi_power_profile_status that will be + * @param[inout] status a pointer to ::rsmi_power_profile_status that will be * populated by a call to this function * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. @@ -950,7 +984,7 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. * If a device has more than one sensor, it could be greater than 0. * - * @param[in] profile a rsmi_power_profile_preset_masks that hold the mask + * @param[in] profile a ::rsmi_power_profile_preset_masks that hold the mask * of the desired new power profile * * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. @@ -959,6 +993,29 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind, rsmi_power_profile_preset_masks profile); + +/** + * @brief Get the VBIOS identifer string + * + * @details Given a device ID @p dv_ind, and a pointer to a char buffer, + * @p vbios, this function will write the VBIOS string (up to @p len + * characters) for device @p dv_ind to @p vbios. The caller must ensure that + * it is safe to write at least @p len characters to @p vbios. + * + * @param[in] dv_ind a device index + * + * @param[inout] vbios A pointer to a buffer of char's to which the VBIOS name + * will be written + * + * @param[in] len The number of char's pointed to by @p vbios which can safely + * be written to by this function. + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * + */ +rsmi_status_t +rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len); + /** * @brief Get a description of a provided RSMI error status * diff --git a/include/rocm_smi/rocm_smi_common.h b/include/rocm_smi/rocm_smi_common.h index 4379b0b8dc..d7e4e487bf 100755 --- a/include/rocm_smi/rocm_smi_common.h +++ b/include/rocm_smi/rocm_smi_common.h @@ -45,10 +45,14 @@ #ifndef INCLUDE_ROCM_SMI_ROCM_SMI_COMMON_H_ #define INCLUDE_ROCM_SMI_ROCM_SMI_COMMON_H_ -#define DBG_FILE_ERROR(STR) \ +#define DBG_FILE_ERROR(FN, WR_STR) \ if (env_->debug_output_bitfield & RSMI_DEBUG_SYSFS_FILE_PATHS) { \ std::cout << "*****" << __FUNCTION__ << std::endl; \ - std::cout << "*****Opening file: " << (STR) << std::endl; \ + std::cout << "*****Opening file: " << (FN) << std::endl; \ + if ((WR_STR) != nullptr) { \ + std::cout << "***** for writing. Writing: \"" << *(WR_STR) << "\""; \ + } else { std::cout << "***** for reading.";} \ + std::cout << std::endl; \ std::cout << " at " << __FILE__ << ":" << __LINE__ << std::endl;\ } @@ -56,11 +60,24 @@ #define RSMI_DEBUG_SYSFS_FILE_PATHS 1 struct RocmSMI_env_vars { - // Store env. variables here + // Bitfield that is AND'd with various RSMI_DEBUG_* bits to determine + // which debugging information should be turned on. Env. variable + // RSMI_DEBUG_BITFIELD is used to set all the debug info bits. uint32_t debug_output_bitfield; + + // The integer value of sysfs field enum that is to be over-ridden. + // Env. variable RSMI_DEBUG_ENUM_OVERRIDE is used to specify this. uint32_t enum_override; + + // Sysfs path overrides + + // Env. var. RSMI_DEBUG_DRM_ROOT_OVERRIDE const char *path_DRM_root_override; + + // Env. var. RSMI_DEBUG_HWMON_ROOT_OVERRIDE const char *path_HWMon_root_override; + + // Env. var. RSMI_DEBUG_PP_ROOT_OVERRIDE const char *path_power_root_override; }; diff --git a/include/rocm_smi/rocm_smi_device.h b/include/rocm_smi/rocm_smi_device.h index c4943eb01b..77a5c027ad 100755 --- a/include/rocm_smi/rocm_smi_device.h +++ b/include/rocm_smi/rocm_smi_device.h @@ -68,6 +68,7 @@ enum DevInfoTypes { kDevPowerProfileMode, kDevUsage, kDevPowerODVoltage, + kDevVBiosVer, }; class Device { @@ -102,6 +103,9 @@ class Device { std::string path_; uint32_t index_; const RocmSMI_env_vars *env_; + template int openSysfsFileStream(DevInfoTypes type, T *fs, + bool write = false); + int readDevInfoStr(DevInfoTypes type, std::string *retStr); int readDevInfoMultiLineStr(DevInfoTypes type, std::vector *retVec); diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 2c8f5dae58..8121c31ef4 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -680,12 +680,11 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, nullptr, kOD_MCLK_label_array_index + 1); assert(val_vec[kOD_VDDC_CURVE_label_array_index] == "OD_VDDC_CURVE:"); - freq_volt_string_to_point(val_vec[kOD_VDDC_CURVE_label_array_index + 1], - &(p->curve[0])); - freq_volt_string_to_point(val_vec[kOD_VDDC_CURVE_label_array_index + 2], - &(p->curve[1])); - freq_volt_string_to_point(val_vec[kOD_VDDC_CURVE_label_array_index + 3], - &(p->curve[2])); + + uint32_t tmp = kOD_VDDC_CURVE_label_array_index + 1; + for (uint32_t i = 0; i < RSMI_NUM_VOLTAGE_CURVE_POINTS; ++i) { + freq_volt_string_to_point(val_vec[tmp + i], &(p->curve.vc_points[i])); + } assert(val_vec[kOD_OD_RANGE_label_array_index] == "OD_RANGE:"); od_value_pair_str_to_range(val_vec[kOD_OD_RANGE_label_array_index + 1], @@ -708,14 +707,8 @@ static void get_vc_region(uint32_t start_ind, assert(val_vec->size() >= kOD_OD_RANGE_label_array_index + 2); assert((*val_vec)[kOD_OD_RANGE_label_array_index] == "OD_RANGE:"); - rsmi_range rg; - od_value_pair_str_to_range((*val_vec)[start_ind], &rg); - p->min_corner.frequency = rg.lower_bound; - p->max_corner.frequency = rg.upper_bound; - - od_value_pair_str_to_range((*val_vec)[start_ind + 1], &rg); - p->min_corner.voltage = rg.lower_bound; - p->max_corner.voltage = rg.upper_bound; + od_value_pair_str_to_range((*val_vec)[start_ind], &p->freq_range); + od_value_pair_str_to_range((*val_vec)[start_ind + 1], &p->volt_range); return; } @@ -734,7 +727,6 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind, rsmi_status_t ret; assert(num_regions != nullptr); - assert(*num_regions > 0); assert(p != nullptr); ret = get_dev_value_vec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec); @@ -755,7 +747,7 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind, *num_regions); for (uint32_t i=0; i < *num_regions; ++i) { - get_vc_region(kOD_VDDC_CURVE_start_index + i, &val_vec, p + i); + get_vc_region(kOD_VDDC_CURVE_start_index + i*2, &val_vec, p + i); } return RSMI_STATUS_SUCCESS; @@ -1143,6 +1135,7 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, return ret; CATCH } + rsmi_status_t rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data *odv) { TRY @@ -1151,6 +1144,7 @@ rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data *odv) { return ret; CATCH } + rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind, uint32_t *num_regions, rsmi_freq_volt_region *buffer) { TRY @@ -1375,6 +1369,26 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent) { CATCH } +rsmi_status_t +rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) { + if (vbios == nullptr || len == 0) { + return RSMI_STATUS_INVALID_ARGS; + } + + TRY + GET_DEV_FROM_INDX + std::string val_str; + int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str); + + uint32_t ln = val_str.copy(vbios, len); + + vbios[std::min(len - 1, ln)] = '\0'; + + return errno_to_rsmi_status(ret); + + CATCH +} + rsmi_status_t rsmi_version_get(rsmi_version *version) { TRY diff --git a/src/rocm_smi_device.cc b/src/rocm_smi_device.cc index 1f10f85a5a..23675a9e6a 100755 --- a/src/rocm_smi_device.cc +++ b/src/rocm_smi_device.cc @@ -68,6 +68,7 @@ static const char *kDevGPUPCIEClkFname = "pp_dpm_pcie"; static const char *kDevPowerProfileModeFName = "pp_power_profile_mode"; static const char *kDevPowerODVoltageFName = "pp_od_clk_voltage"; static const char *kDevUsageFName = "gpu_busy_percent"; +static const char *kDevVBiosVerFName = "vbios_version"; static const char *kDevPerfLevelAutoStr = "auto"; static const char *kDevPerfLevelLowStr = "low"; @@ -89,6 +90,7 @@ static const std::map kDevAttribNameMap = { {kDevPowerProfileMode, kDevPowerProfileModeFName}, {kDevUsage, kDevUsageFName}, {kDevPowerODVoltage, kDevPowerODVoltageFName}, + {kDevVBiosVer, kDevVBiosVerFName}, }; static const std::map kDevPerfLvlMap = { @@ -121,27 +123,46 @@ Device::Device(std::string p, RocmSMI_env_vars const *e) : path_(p), env_(e) { Device:: ~Device() { } -int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { - auto tempPath = path_; +template +int Device::openSysfsFileStream(DevInfoTypes type, T *fs, bool write) { + auto sysfs_path = path_; - assert(retStr != nullptr); + if (env_->path_DRM_root_override && type == env_->enum_override) { + sysfs_path = env_->path_DRM_root_override; - tempPath += "/device/"; - tempPath += kDevAttribNameMap.at(type); + if (write) { + sysfs_path += ".write"; + } + } - DBG_FILE_ERROR(tempPath); - if (!isRegularFile(tempPath)) { + sysfs_path += "/device/"; + sysfs_path += kDevAttribNameMap.at(type); + + DBG_FILE_ERROR(sysfs_path, (std::string *)nullptr); + if (!isRegularFile(sysfs_path)) { return EISDIR; } - std::ifstream fs; - fs.open(tempPath); + fs->open(sysfs_path); - DBG_FILE_ERROR(tempPath); - if (!fs.is_open()) { + if (!fs->is_open()) { return errno; } + return 0; +} + +int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { + std::ifstream fs; + int ret = 0; + + assert(retStr != nullptr); + + ret = openSysfsFileStream(type, &fs); + if (ret != 0) { + return ret; + } + fs >> *retStr; fs.close(); @@ -150,23 +171,19 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) { auto tempPath = path_; - tempPath += "/device/"; - tempPath += kDevAttribNameMap.at(type); - std::ofstream fs; - fs.open(tempPath); + int ret; - DBG_FILE_ERROR(tempPath); - if (!isRegularFile(tempPath)) { - return EISDIR; + ret = openSysfsFileStream(type, &fs, true); + if (ret != 0) { + return ret; } - DBG_FILE_ERROR(tempPath); - if (!fs.is_open()) { - return errno; + try { + fs << valStr; + } catch (...) { + std::cout << "Write to file threw exception" << std::endl; } - - fs << valStr; fs.close(); return 0; @@ -213,6 +230,7 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) { case kDevGPUMClk: case kDevGPUSClk: case kDevPCIEBW: + case kDevPowerODVoltage: return writeDevInfoStr(type, val); case kDevOverDriveLevel: @@ -229,21 +247,14 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type, std::vector *retVec) { auto tempPath = path_; std::string line; + int ret; + std::ifstream fs; assert(retVec != nullptr); - if (env_->path_DRM_root_override && type == env_->enum_override) { - tempPath = env_->path_DRM_root_override; - } - tempPath += "/device/"; - tempPath += kDevAttribNameMap.at(type); - - std::ifstream fs(tempPath); - std::stringstream buffer; - - DBG_FILE_ERROR(tempPath); - if (!isRegularFile(tempPath)) { - return EISDIR; + ret = openSysfsFileStream(type, &fs); + if (ret != 0) { + return ret; } while (std::getline(fs, line)) { @@ -313,6 +324,7 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) { case kDevUsage: case kDevOverDriveLevel: case kDevDevID: + case kDevVBiosVer: return readDevInfoStr(type, val); break; diff --git a/src/rocm_smi_monitor.cc b/src/rocm_smi_monitor.cc index ed64e29de6..e14de9f40d 100755 --- a/src/rocm_smi_monitor.cc +++ b/src/rocm_smi_monitor.cc @@ -138,7 +138,7 @@ int Monitor::writeMonitor(MonitorTypes type, uint32_t sensor_id, std::string val) { std::string sysfs_path = MakeMonitorPath(type, sensor_id); - DBG_FILE_ERROR(sysfs_path) + DBG_FILE_ERROR(sysfs_path, &val) return WriteSysfsStr(sysfs_path, val); } @@ -150,7 +150,7 @@ int Monitor::readMonitor(MonitorTypes type, uint32_t sensor_id, std::string temp_str; std::string sysfs_path = MakeMonitorPath(type, sensor_id); - DBG_FILE_ERROR(sysfs_path) + DBG_FILE_ERROR(sysfs_path, (std::string *)nullptr) return ReadSysfsStr(sysfs_path, val); } diff --git a/src/rocm_smi_power_mon.cc b/src/rocm_smi_power_mon.cc index cab97d8188..963a3723f0 100755 --- a/src/rocm_smi_power_mon.cc +++ b/src/rocm_smi_power_mon.cc @@ -136,7 +136,7 @@ int PowerMon::readPowerValue(PowerMonTypes type, uint64_t *power) { tempPath += "/"; tempPath += kMonitorNameMap.at(type); - DBG_FILE_ERROR(tempPath) + DBG_FILE_ERROR(tempPath, (std::string *)nullptr) int ret = ReadSysfsStr(tempPath, &fstr); if (ret) { diff --git a/tests/rocm_smi_test/functional/bdfid_read.cc b/tests/rocm_smi_test/functional/sys_info_read.cc similarity index 65% rename from tests/rocm_smi_test/functional/bdfid_read.cc rename to tests/rocm_smi_test/functional/sys_info_read.cc index 2e443f699e..e4047f9679 100755 --- a/tests/rocm_smi_test/functional/bdfid_read.cc +++ b/tests/rocm_smi_test/functional/sys_info_read.cc @@ -51,54 +51,83 @@ #include "gtest/gtest.h" #include "rocm_smi/rocm_smi.h" -#include "rocm_smi_test/functional/bdfid_read.h" +#include "rocm_smi_test/functional/sys_info_read.h" #include "rocm_smi_test/test_common.h" -TestBDFIDRead::TestBDFIDRead() : TestBase() { - set_title("RSMI BDFID Read Test"); - set_description("The BDFID Read tests verifies that the BDFID " - "value can be read properly."); +TestSysInfoRead::TestSysInfoRead() : TestBase() { + set_title("RSMI System Info Read Test"); + set_description("This test verifies that system information such as the " + "BDFID, RSMI version, VBIOS version, etc. can be read properly."); } -TestBDFIDRead::~TestBDFIDRead(void) { +TestSysInfoRead::~TestSysInfoRead(void) { } -void TestBDFIDRead::SetUp(void) { +void TestSysInfoRead::SetUp(void) { TestBase::SetUp(); return; } -void TestBDFIDRead::DisplayTestInfo(void) { +void TestSysInfoRead::DisplayTestInfo(void) { TestBase::DisplayTestInfo(); } -void TestBDFIDRead::DisplayResults(void) const { +void TestSysInfoRead::DisplayResults(void) const { TestBase::DisplayResults(); return; } -void TestBDFIDRead::Close() { +void TestSysInfoRead::Close() { // This will close handles opened within rsmitst utility calls and call // rsmi_shut_down(), so it should be done after other hsa cleanup TestBase::Close(); } -void TestBDFIDRead::Run(void) { +void TestSysInfoRead::Run(void) { rsmi_status_t err; uint64_t val_ui64; + char buffer[80]; + rsmi_version ver = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, nullptr}; TestBase::Run(); for (uint32_t i = 0; i < num_monitor_devs(); ++i) { PrintDeviceHeader(i); + err = rsmi_dev_vbios_version_get(i, buffer, 80); + + if (err != RSMI_STATUS_SUCCESS) { + if (err == RSMI_STATUS_FILE_ERROR) { + IF_VERB(STANDARD) { + std::cout << "\t**VBIOS read: Not supported on this machine" + << std::endl; + } + } else { + CHK_ERR_ASRT(err) + } + } else { + IF_VERB(STANDARD) { + std::cout << "\t**VBIOS Version: " << std::hex << buffer << std::endl; + } + } + err = rsmi_dev_pci_id_get(i, &val_ui64); CHK_ERR_ASRT(err) IF_VERB(STANDARD) { std::cout << "\t**PCI ID (BDFID): 0x" << std::hex << val_ui64; std::cout << " (" << std::dec << val_ui64 << ")" << std::endl; } + + err = rsmi_version_get(&ver); + CHK_ERR_ASRT(err) + + ASSERT_TRUE(ver.major != 0xFFFFFFFF && ver.minor != 0xFFFFFFFF && + ver.patch != 0xFFFFFFFF && ver.build != nullptr); + IF_VERB(STANDARD) { + std::cout << "\t**RocM SMI Library version: " << ver.major << "." << + ver.minor << "." << ver.patch << " (" << ver.build << ")" << std::endl; + } } } diff --git a/tests/rocm_smi_test/functional/bdfid_read.h b/tests/rocm_smi_test/functional/sys_info_read.h similarity index 88% rename from tests/rocm_smi_test/functional/bdfid_read.h rename to tests/rocm_smi_test/functional/sys_info_read.h index d22934ed75..e033962df6 100755 --- a/tests/rocm_smi_test/functional/bdfid_read.h +++ b/tests/rocm_smi_test/functional/sys_info_read.h @@ -42,17 +42,17 @@ * DEALINGS WITH THE SOFTWARE. * */ -#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_BDFID_READ_H_ -#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_BDFID_READ_H_ +#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_SYS_INFO_READ_H_ +#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_SYS_INFO_READ_H_ #include "rocm_smi_test/test_base.h" -class TestBDFIDRead : public TestBase { +class TestSysInfoRead : public TestBase { public: - TestBDFIDRead(); + TestSysInfoRead(); - // @Brief: Destructor for test case of TestBDFIDRead - virtual ~TestBDFIDRead(); + // @Brief: Destructor for test case of TestSysInfoRead + virtual ~TestSysInfoRead(); // @Brief: Setup the environment for measurement virtual void SetUp(); @@ -70,4 +70,4 @@ class TestBDFIDRead : public TestBase { virtual void DisplayTestInfo(void); }; -#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_BDFID_READ_H_ +#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_SYS_INFO_READ_H_ diff --git a/tests/rocm_smi_test/functional/volt_freq_curv_read.cc b/tests/rocm_smi_test/functional/volt_freq_curv_read.cc index c1d7a7d1db..0039158d7b 100755 --- a/tests/rocm_smi_test/functional/volt_freq_curv_read.cc +++ b/tests/rocm_smi_test/functional/volt_freq_curv_read.cc @@ -84,7 +84,7 @@ void TestVoltCurvRead::Close() { TestBase::Close(); } -static void pt_rng_mhz(std::string title, rsmi_range *r) { +static void pt_rng_Mhz(std::string title, rsmi_range *r) { assert(r != nullptr); std::cout << title << std::endl; @@ -92,16 +92,24 @@ static void pt_rng_mhz(std::string title, rsmi_range *r) { r->upper_bound/1000000 << " MHz" << std::endl; } +static void pt_rng_mV(std::string title, rsmi_range *r) { + assert(r != nullptr); + + std::cout << title << std::endl; + std::cout << "\t\t** " << r->lower_bound << " to " << r->upper_bound << + " mV" << std::endl; +} + static void print_pnt(rsmi_od_vddc_point *pt) { std::cout << "\t\t** Frequency: " << pt->frequency/1000000 << "MHz" << std::endl; std::cout << "\t\t** Voltage: " << pt->voltage << "mV" << std::endl; } -static void pt_vddc_curve(rsmi_od_vddc_point *c) { +static void pt_vddc_curve(rsmi_od_volt_curve *c) { assert(c != nullptr); for (uint32_t i = 0; i < RSMI_NUM_VOLTAGE_CURVE_POINTS; ++i) { - print_pnt(&c[i]); + print_pnt(&c->vc_points[i]); } } @@ -109,26 +117,25 @@ static void print_rsmi_od_volt_freq_data(rsmi_od_volt_freq_data *odv) { assert(odv != nullptr); std::cout.setf(std::ios::dec, std::ios::basefield); - pt_rng_mhz("\t\tCurrent SCLK frequency range:", &odv->curr_sclk_range); - pt_rng_mhz("\t\tCurrent MCLK frequency range:", &odv->curr_mclk_range); - pt_rng_mhz("\t\tMin/Max Possible SCLK frequency range:", + pt_rng_Mhz("\t\tCurrent SCLK frequency range:", &odv->curr_sclk_range); + pt_rng_Mhz("\t\tCurrent MCLK frequency range:", &odv->curr_mclk_range); + pt_rng_Mhz("\t\tMin/Max Possible SCLK frequency range:", &odv->sclk_freq_limits); - pt_rng_mhz("\t\tMin/Max Possible MCLK frequency range:", + pt_rng_Mhz("\t\tMin/Max Possible MCLK frequency range:", &odv->mclk_freq_limits); std::cout << "\t\tCurrent Freq/Volt. curve:" << std::endl; - pt_vddc_curve(odv->curve); + pt_vddc_curve(&odv->curve); std::cout << "\tNumber of Freq./Volt. regions: " << odv->num_regions << std::endl; } static void print_odv_region(rsmi_freq_volt_region *region) { - std::cout << "\t\t\"lower-left\" corner:" << std::endl; - print_pnt(®ion->min_corner); - std::cout << "\t\t\"upper-right\" corner:" << std::endl; - print_pnt(®ion->max_corner); + pt_rng_Mhz("\t\tFrequency range:", ®ion->freq_range); + pt_rng_mV("\t\tVoltage range:", ®ion->volt_range); } + static void print_rsmi_od_volt_freq_regions(uint32_t num_regions, rsmi_freq_volt_region *regions) { for (uint32_t i = 0; i < num_regions; ++i) { diff --git a/tests/rocm_smi_test/main.cc b/tests/rocm_smi_test/main.cc index b1c0896c03..1818553b95 100755 --- a/tests/rocm_smi_test/main.cc +++ b/tests/rocm_smi_test/main.cc @@ -58,7 +58,7 @@ #include "functional/perf_level_read.h" #include "functional/overdrive_read.h" #include "functional/frequencies_read.h" -#include "functional/bdfid_read.h" +#include "functional/sys_info_read.h" #include "functional/gpu_busy_read.h" #include "functional/power_read.h" #include "functional/overdrive_read_write.h" @@ -161,8 +161,8 @@ TEST(rsmitstReadWrite, TestPciBWReadWrite) { TestPciBWReadWrite tst; RunGenericTest(&tst); } -TEST(rsmitstReadOnly, TestBDFIDRead) { - TestBDFIDRead tst; +TEST(rsmitstReadOnly, TestSysInfoRead) { + TestSysInfoRead tst; RunGenericTest(&tst); } TEST(rsmitstReadOnly, TestGPUBusyRead) {