diff --git a/projects/rocm-smi-lib/CMakeLists.txt b/projects/rocm-smi-lib/CMakeLists.txt index c3c6f99645..1c6d7c9c74 100755 --- a/projects/rocm-smi-lib/CMakeLists.txt +++ b/projects/rocm-smi-lib/CMakeLists.txt @@ -103,8 +103,6 @@ set(CMAKE_CXX_FLAGS if (${ADDRESS_SANITIZER}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address") set(CMAKE_EXE_LINKER_FLAGS -fsanitize=address) - message(STATUS "ADDRESS_SANITIZE: CMAKE_CXX_FLAGS=: ${CMAKE_CXX_FLAGS}") - message(STATUS "ADDRESS_SANITIZE: CMAKE_EXE_LINKER_FLAGS=: ${CMAKE_EXE_LINKER_FLAGS}") else () ## Security breach mitigation flags set(CMAKE_CXX_FLAGS diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index 5da867e48f..b7b5696f02 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -311,7 +311,7 @@ typedef enum { RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_GPU_POST_RESET } rsmi_evt_notification_type_t; -/* +/** * Macro to generate event bitmask from event id */ #define RSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) @@ -786,14 +786,25 @@ typedef rsmi_od_volt_freq_data_t rsmi_od_volt_freq_data; * @brief The following structures hold the gpu metrics values for a device. */ -struct metrics_table_header { +/** + * @brief Size and version information of metrics data + */ +struct metrics_table_header_t { + // TODO(amd) Doxygen documents + /// \cond Ignore in docs. uint16_t structure_size; uint8_t format_revision; uint8_t content_revision; + /// \endcond }; +/** + * @brief The following structure holds the gpu metrics values for a device. + */ typedef struct { - struct metrics_table_header common_header; + // TODO(amd) Doxygen documents + /// \cond Ignore in docs. + struct metrics_table_header_t common_header; /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; @@ -842,10 +853,8 @@ typedef struct { /* Link width/speed */ uint8_t pcie_link_width; uint8_t pcie_link_speed; // in 0.1 GT/s -}rsmi_gpu_metrics_t; -/// \cond Ignore in docs. -typedef rsmi_gpu_metrics_t rsmi_gpu_metrics; -/// \endcond + /// \endcond +} rsmi_gpu_metrics_t; /** * @brief This structure holds error counts. @@ -1267,8 +1276,8 @@ rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len); * @param[inout] minor a pointer to a uint32_t into which minor number will * be copied * - * @retval :: RSMI_STATUS_SUCCESS is returned upon successful call. - * @retval :: RSMI_STATUS_INIT_ERROR if failed to get minor number during + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * @retval ::RSMI_STATUS_INIT_ERROR if failed to get minor number during * initialization. * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid * @@ -1910,7 +1919,6 @@ rsmi_status_t rsmi_dev_fan_speed_max_get(uint32_t dv_ind, */ rsmi_status_t rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type, rsmi_temperature_metric_t metric, int64_t *temperature); -/** @} */ // end of PhysQuer /** * @brief Get the voltage metric value for the specified metric, from the @@ -2297,6 +2305,9 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, * @brief Set the PowerPlay performance level associated with the device with * provided device index with the provided value. * + * @deprecated ::rsmi_dev_perf_level_set_v1() is preferred, with an + * interface that more closely matches the rest of the rocm_smi API. + * * @details Given a device index @p dv_ind and an ::rsmi_dev_perf_level_t @p * perf_level, this function will set the PowerPlay performance level for the * device to the value @p perf_lvl. @@ -2314,6 +2325,72 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl); +/** + * @brief Set the PowerPlay performance level associated with the device with + * provided device index with the provided value. + * + * @details Given a device index @p dv_ind and an ::rsmi_dev_perf_level_t @p + * perf_level, this function will set the PowerPlay performance level for the + * device to the value @p perf_lvl. + * + * @param[in] dv_ind a device index + * + * @param[in] perf_lvl the value to which the performance level should be set + * + * @retval ::RSMI_STATUS_SUCCESS is returned upon successful call. + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_PERMISSION function requires root access + * + */ +rsmi_status_t +rsmi_dev_perf_level_set_v1(uint32_t dv_ind, rsmi_dev_perf_level_t perf_lvl); + +/** + * @brief Set the overdrive percent associated with the device with provided + * device index with the provided value. See details for WARNING. + * + * @deprecated This function is deprecated. ::rsmi_dev_overdrive_level_set_v1 + * has the same functionaltiy, with an interface that more closely + * matches the rest of the rocm_smi API. + * + * @details Given a device index @p dv_ind and an overdrive level @p od, + * this function will set the overdrive level for the device to the value + * @p od. The overdrive level is an integer value between 0 and 20, inclusive, + * which represents the overdrive percentage; e.g., a value of 5 specifies + * an overclocking of 5%. + * + * The overdrive level is specific to the gpu system clock. + * + * The overdrive level is the percentage above the maximum Performance Level + * to which overclocking will be limited. The overclocking percentage does + * not apply to clock speeds other than the maximum. This percentage is + * limited to 20%. + * + * ******WARNING****** + * Operating your AMD GPU outside of official AMD specifications or outside of + * factory settings, including but not limited to the conducting of + * overclocking (including use of this overclocking software, even if such + * software has been directly or indirectly provided by AMD or otherwise + * affiliated in any way with AMD), may cause damage to your AMD GPU, system + * components and/or result in system failure, as well as cause other problems. + * DAMAGES CAUSED BY USE OF YOUR AMD GPU OUTSIDE OF OFFICIAL AMD SPECIFICATIONS + * OR OUTSIDE OF FACTORY SETTINGS ARE NOT COVERED UNDER ANY AMD PRODUCT + * WARRANTY AND MAY NOT BE COVERED BY YOUR BOARD OR SYSTEM MANUFACTURER'S + * WARRANTY. Please use this utility with caution. + * + * @param[in] dv_ind a device index + * + * @param[in] od the value to which the overdrive level should be set + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_PERMISSION function requires root access + * + */ +rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od); + /** * @brief Set the overdrive percent associated with the device with provided * device index with the provided value. See details for WARNING. @@ -2353,7 +2430,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_lvl); * @retval ::RSMI_STATUS_PERMISSION function requires root access * */ -rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od); +rsmi_status_t rsmi_dev_overdrive_level_set_v1(uint32_t dv_ind, uint32_t od); /** * @brief Control the set of allowed frequencies that can be used for the diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_counters.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_counters.h index dc7c740ecd..3d6addb2e2 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_counters.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_counters.h @@ -92,9 +92,9 @@ class Event { explicit Event(rsmi_event_type_t event, uint32_t dev_ind); ~Event(void); - uint32_t openPerfHandle(); - uint32_t startCounter(void); - uint32_t stopCounter(void); + int32_t openPerfHandle(); + int32_t startCounter(void); + int32_t stopCounter(void); uint32_t getValue(rsmi_counter_value_t *val); uint32_t dev_file_ind(void) const {return dev_file_ind_;} uint32_t dev_ind(void) const {return dev_ind_;} @@ -111,8 +111,8 @@ class Event { int32_t fd_; perf_event_attr attr_; - uint32_t get_event_file_info(void); - uint32_t get_event_type(uint32_t *ev_type); + int32_t get_event_file_info(void); + int32_t get_event_type(uint32_t *ev_type); }; diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index f5100f926b..8e45ecfb53 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -199,6 +199,8 @@ class Device { void set_evt_notif_anon_file_ptr(FILE *f) {evt_notif_anon_file_ptr_ = f;} FILE *evt_notif_anon_file_ptr(void) const {return evt_notif_anon_file_ptr_;} void set_evt_notif_anon_fd(int fd) {evt_notif_anon_fd_ = fd;} + void set_evt_notif_anon_fd(uint32_t fd) { + evt_notif_anon_fd_ = static_cast(fd);} int evt_notif_anon_fd(void) const {return evt_notif_anon_fd_;} void fillSupportedFuncs(void); void DumpSupportedFunctions(void); diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h index 844b2e08bb..b6ebd07c76 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h @@ -78,7 +78,7 @@ class RocmSMI { std::vector>& monitor_devices() {return monitor_devices_;} uint32_t DiscoverAmdgpuDevices(void); - uint32_t DiscoverAMDPowerMonitors(bool force_update = false); + int DiscoverAMDPowerMonitors(bool force_update = false); // Will execute "func" for every Device object known about, or until func // returns non-zero; diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h index d58365ca5c..a6da48bf84 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h @@ -101,16 +101,16 @@ class Monitor { const std::string path(void) const {return path_;} int readMonitor(MonitorTypes type, uint32_t sensor_ind, std::string *val); int writeMonitor(MonitorTypes type, uint32_t sensor_ind, std::string val); - uint32_t setTempSensorLabelMap(void); + int32_t setTempSensorLabelMap(void); uint32_t getTempSensorIndex(rsmi_temperature_type_t type); rsmi_temperature_type_t getTempSensorEnum(uint64_t ind); - uint32_t setVoltSensorLabelMap(void); + int32_t setVoltSensorLabelMap(void); uint32_t getVoltSensorIndex(rsmi_voltage_type_t type); rsmi_voltage_type_t getVoltSensorEnum(uint64_t ind); void fillSupportedFuncs(SupportedFuncMap *supported_funcs); private: - std::string MakeMonitorPath(MonitorTypes type, int32_t sensor_id); + std::string MakeMonitorPath(MonitorTypes type, uint32_t sensor_id); std::string path_; const RocmSMI_env_vars *env_; std::map temp_type_index_map_; diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h index c67bf41981..8df82d3dfa 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h @@ -83,7 +83,7 @@ GetDevValueVec(amd::smi::DevInfoTypes type, rsmi_status_t GetDevBinaryVec(amd::smi::DevInfoTypes type, uint32_t dv_ind, std::vector *val_vec); -rsmi_status_t ErrnoToRsmiStatus(uint32_t err); +rsmi_status_t ErrnoToRsmiStatus(int err); struct pthread_wrap { public: diff --git a/projects/rocm-smi-lib/oam/src/amd_oam.cc b/projects/rocm-smi-lib/oam/src/amd_oam.cc index 60778492a1..23023eebc8 100755 --- a/projects/rocm-smi-lib/oam/src/amd_oam.cc +++ b/projects/rocm-smi-lib/oam/src/amd_oam.cc @@ -65,7 +65,7 @@ static int rsmi_status_to_amdoam_errorcode(rsmi_status_t status) { if (status > RSMI_STATUS_INIT_ERROR) return -AMDOAM_STATUS_ERROR; else - return -status; + return -1 * static_cast(status); } static int handleRSMIException() { @@ -178,12 +178,13 @@ CATCH return AMDOAM_STATUS_SUCCESS; } -static int get_num_sensors(std::string hwmon_path, std::string fn_reg) { +static uint32_t +get_num_sensors(std::string hwmon_path, std::string fn_reg) { uint32_t sensor_max = 0; std::string fn_reg_ex = "\\b" + fn_reg + "([0-9]+)([^ ]*)"; std::string fn; std::smatch m; - uint32_t temp = 0; + int32_t temp = 0; std::string s1("in"); std::regex re(fn_reg_ex); auto hwmon_dir = opendir(hwmon_path.c_str()); @@ -198,10 +199,12 @@ static int get_num_sensors(std::string hwmon_path, std::string fn_reg) { std::string("$1")); temp = stoi(output); + assert(temp >= 0); + if (s1.compare(fn_reg) == 0) ++temp; - if (temp > sensor_max) - sensor_max = temp; + if (static_cast(temp) > sensor_max) + sensor_max = static_cast(temp); } dentry = readdir(hwmon_dir); } @@ -350,7 +353,7 @@ get_device_error_count(oam_dev_handle_t *handle, return RSMI_STATUS_NOT_SUPPORTED; } if (ret != RSMI_STATUS_SUCCESS) { - return ret; + return static_cast(ret); } assert(val_vec.size() == 2); @@ -368,6 +371,6 @@ get_device_error_count(oam_dev_handle_t *handle, assert(junk == "ce:"); fs2 >> count->total_error_count; - return ret; + return static_cast(ret); CATCH } diff --git a/projects/rocm-smi-lib/rocm_smi/docs/ROCm_SMI_Manual.pdf b/projects/rocm-smi-lib/rocm_smi/docs/ROCm_SMI_Manual.pdf index c8378b8e1b..1426e30c1d 100644 Binary files a/projects/rocm-smi-lib/rocm_smi/docs/ROCm_SMI_Manual.pdf and b/projects/rocm-smi-lib/rocm_smi/docs/ROCm_SMI_Manual.pdf differ diff --git a/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg b/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg index a600a361c8..617aede42f 100644 --- a/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg +++ b/projects/rocm-smi-lib/rocm_smi/docs/amd_smi_doxygen.cfg @@ -234,7 +234,8 @@ ALIASES = # A mapping has the form "name=value". For example adding "class=itcl::class" # will allow you to use the command class in the itcl::class meaning. -TCL_SUBST = +# Doxygen says following tag is obsolete; comment to quiet warning +# TCL_SUBST = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For @@ -1005,7 +1006,8 @@ VERBATIM_HEADERS = YES # generated with the -Duse-libclang=ON option for CMake. # The default value is: NO. -CLANG_ASSISTED_PARSING = NO +# comment following to quiet warning +# CLANG_ASSISTED_PARSING = NO # If clang assisted parsing is enabled you can provide the compiler with command # line options that you would normally use when invoking the compiler. Note that @@ -1013,7 +1015,8 @@ CLANG_ASSISTED_PARSING = NO # specified with INPUT and INCLUDE_PATH. # This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. -CLANG_OPTIONS = +# comment following to quiet warning +# CLANG_OPTIONS = #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index @@ -2096,7 +2099,8 @@ EXTERNAL_PAGES = YES # interpreter (i.e. the result of 'which perl'). # The default file (with absolute path) is: /usr/bin/perl. -PERL_PATH = /usr/bin/perl +# PERL_PATH is now obsolete +# PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool @@ -2118,7 +2122,8 @@ CLASS_DIAGRAMS = YES # the mscgen tool resides. If left empty the tool is assumed to be found in the # default search path. -MSCGEN_PATH = +# MSCGEN_PATH is now obsolete +# MSCGEN_PATH = # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The diff --git a/projects/rocm-smi-lib/rocm_smi/example/rocm_smi_example.cc b/projects/rocm-smi-lib/rocm_smi/example/rocm_smi_example.cc index df0c941310..08f7710451 100755 --- a/projects/rocm-smi-lib/rocm_smi/example/rocm_smi_example.cc +++ b/projects/rocm-smi-lib/rocm_smi/example/rocm_smi_example.cc @@ -53,14 +53,27 @@ #include "rocm_smi/rocm_smi.h" -#define CHK_RSMI_RET(RET) { \ +#define PRINT_RSMI_ERR(RET) { \ if (RET != RSMI_STATUS_SUCCESS) { \ const char *err_str; \ std::cout << "RSMI call returned " << (RET) \ << " at line " << __LINE__ << std::endl; \ rsmi_status_string((RET), &err_str); \ std::cout << err_str << std::endl; \ - return RET; \ + } \ +} + +#define CHK_RSMI_RET(RET) { \ + PRINT_RSMI_ERR(RET) \ + if (RET != RSMI_STATUS_SUCCESS) { \ + return (RET); \ + } \ +} + +#define CHK_RSMI_RET_I(RET) { \ + PRINT_RSMI_ERR(RET) \ + if (RET != RSMI_STATUS_SUCCESS) { \ + return static_cast(RET); \ } \ } @@ -68,7 +81,7 @@ if ((RET) == RSMI_STATUS_PERMISSION) { \ std::cout << "This command requires root access." << std::endl; \ } else { \ - CHK_RSMI_RET(RET) \ + CHK_RSMI_RET_I(RET) \ } \ } @@ -174,7 +187,7 @@ static rsmi_status_t test_power_profile(uint32_t dv_ind) { power_profile_string(status.current) << std::endl; std::cout << "Resetting perf level to auto..." << std::endl; - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); CHK_RSMI_RET(ret) std::cout << "Done." << std::endl; @@ -235,16 +248,16 @@ static rsmi_status_t test_set_overdrive(uint32_t dv_ind) { print_test_header("Overdrive Control", dv_ind); std::cout << "Set Overdrive level to 0%..." << std::endl; - ret = rsmi_dev_overdrive_level_set(dv_ind, 0); + ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 0); CHK_RSMI_RET(ret) std::cout << "Set Overdrive level to 10%..." << std::endl; - ret = rsmi_dev_overdrive_level_set(dv_ind, 10); + ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 10); CHK_RSMI_RET(ret) ret = rsmi_dev_overdrive_level_get(dv_ind, &val); CHK_RSMI_RET(ret) std::cout << "\t**New OverDrive Level:" << val << std::endl; std::cout << "Reset Overdrive level to 0%..." << std::endl; - ret = rsmi_dev_overdrive_level_set(dv_ind, 0); + ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 0); CHK_RSMI_RET(ret) ret = rsmi_dev_overdrive_level_get(dv_ind, &val); CHK_RSMI_RET(ret) @@ -256,7 +269,7 @@ static rsmi_status_t test_set_overdrive(uint32_t dv_ind) { static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) { rsmi_status_t ret; int64_t orig_speed; - int64_t new_speed; + double new_speed; int64_t cur_spd; print_test_header("Fan Speed Control", dv_ind); @@ -270,11 +283,11 @@ static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) { return RSMI_STATUS_SUCCESS; } - new_speed = static_cast(1.1 * static_cast(orig_speed)); + new_speed = 1.1 * static_cast(orig_speed); std::cout << "Setting fan speed to " << new_speed << std::endl; - ret = rsmi_dev_fan_speed_set(dv_ind, 0, new_speed); + ret = rsmi_dev_fan_speed_set(dv_ind, 0, static_cast(new_speed)); CHK_RSMI_RET(ret) sleep(4); @@ -322,13 +335,13 @@ static rsmi_status_t test_set_perf_level(uint32_t dv_ind) { std::cout << "Set Performance Level to " << (uint32_t)pfl << " ..." << std::endl; - ret = rsmi_dev_perf_level_set(dv_ind, pfl); + ret = rsmi_dev_perf_level_set_v1(dv_ind, pfl); CHK_RSMI_RET(ret) ret = rsmi_dev_perf_level_get(dv_ind, &pfl); CHK_RSMI_RET(ret) std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << std::endl; std::cout << "Reset Perf level to " << orig_pfl << " ..." << std::endl; - ret = rsmi_dev_perf_level_set(dv_ind, orig_pfl); + ret = rsmi_dev_perf_level_set_v1(dv_ind, orig_pfl); CHK_RSMI_RET(ret) ret = rsmi_dev_perf_level_get(dv_ind, &pfl); CHK_RSMI_RET(ret) @@ -377,7 +390,7 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) { ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF); CHK_RSMI_RET(ret) - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO); CHK_RSMI_RET(ret) } return RSMI_STATUS_SUCCESS; @@ -397,7 +410,7 @@ int main() { rsmi_status_t ret; ret = rsmi_init(0); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::string val_str; std::vector val_vec; @@ -413,7 +426,7 @@ int main() { rsmi_num_monitor_devices(&num_monitor_devs); for (uint32_t i = 0; i< num_monitor_devs; ++i) { ret = rsmi_dev_id_get(i, &val_ui16); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Device ID: 0x" << std::hex << val_ui64 << std::endl; ret = rsmi_dev_gpu_metrics_info_get(i, &p); @@ -421,49 +434,50 @@ int main() { std::cout << "\t**GPU METRICS" << std::endl; ret = rsmi_dev_perf_level_get(i, &pfl); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Performance Level:" << perf_level_string(pfl) << std::endl; ret = rsmi_dev_overdrive_level_get(i, &val_ui32); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**OverDrive Level:" << val_ui32 << std::endl; ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Supported GPU Memory clock frequencies: "; std::cout << f.num_supported << std::endl; print_frequencies(&f); ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Supported GPU clock frequencies: "; std::cout << f.num_supported << std::endl; print_frequencies(&f); char name[20]; ret = rsmi_dev_name_get(i, name, 20); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Monitor name: " << name << std::endl; ret = rsmi_dev_temp_metric_get(i, 0, RSMI_TEMP_CURRENT, &val_i64); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Temperature: " << val_i64/1000 << "C" << std::endl; - ret = rsmi_dev_volt_metric_get(i, RSMI_VOLT_TYPE_VDDGFX, RSMI_VOLT_CURRENT, &val_i64); - CHK_RSMI_RET(ret) + ret = rsmi_dev_volt_metric_get(i, RSMI_VOLT_TYPE_VDDGFX, + RSMI_VOLT_CURRENT, &val_i64); + CHK_RSMI_RET_I(ret) std::cout << "\t**Voltage: " << val_i64 << "mV" << std::endl; ret = rsmi_dev_fan_speed_get(i, 0, &val_i64); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) ret = rsmi_dev_fan_speed_max_get(i, 0, &val_ui64); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Current Fan Speed: "; - std::cout << val_i64/val_ui64*100; + std::cout << val_i64/static_cast(val_ui64)*100; std::cout << "% ("<< val_i64 << "/" << val_ui64 << ")" << std::endl; ret = rsmi_dev_fan_rpms_get(i, 0, &val_i64); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) std::cout << "\t**Current fan RPMs: " << val_i64 << std::endl; ret = rsmi_dev_power_cap_get(i, 0, &val_ui64); @@ -486,22 +500,22 @@ int main() { std::cout << "***** Testing write api's" << std::endl; for (uint32_t i = 0; i< num_monitor_devs; ++i) { ret = test_set_overdrive(i); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) ret = test_set_perf_level(i); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) ret = test_set_freq(i); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) ret = test_set_fan_speed(i); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) ret = test_power_cap(i); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) ret = test_power_profile(i); - CHK_RSMI_RET(ret) + CHK_RSMI_RET_I(ret) } return 0; diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 38ab8caa8e..e87ce75bf8 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -103,7 +103,7 @@ static uint64_t get_multiplier_from_str(char units_char) { break; default: - assert(!"Unexpected units for frequency"); + assert(false); // Unexpected units for frequency throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__); } return multiplier; @@ -155,7 +155,8 @@ static uint64_t freq_string_to_int(const std::vector &freq_lines, throw amd::smi::rsmi_exception(RSMI_STATUS_NO_DATA, __FUNCTION__); } - lanes[i] = std::stoi(star_str.substr(1), nullptr); + lanes[i] = + static_cast(std::stoi(star_str.substr(1), nullptr)); } } return static_cast(freq*multiplier); @@ -375,7 +376,7 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type, template static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type, - uint32_t dv_ind, int32_t sensor_ind, T val) { + uint32_t dv_ind, uint32_t sensor_ind, T val) { GET_DEV_FROM_INDX assert(dev->monitor() != nullptr); @@ -394,7 +395,7 @@ static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type, return RSMI_STATUS_INVALID_ARGS; } - uint32_t ret = smi.DiscoverAMDPowerMonitors(); + int ret = smi.DiscoverAMDPowerMonitors(); if (ret != 0) { return amd::smi::ErrnoToRsmiStatus(ret); } @@ -685,7 +686,7 @@ static rsmi_status_t get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) { TRY std::string val_str; - int64_t val_u64; + uint64_t val_u64; assert(id != nullptr); if (id == nullptr) { @@ -781,7 +782,7 @@ rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { } errno = 0; - int64_t val_ul = strtoul(val_str.c_str(), nullptr, 10); + uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10); if (val_ul > 0xFFFFFFFF) { return RSMI_STATUS_UNEXPECTED_SIZE; @@ -796,6 +797,14 @@ rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { rsmi_status_t rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) { + if (dv_ind < 0) { + return RSMI_STATUS_INVALID_ARGS; + } + return rsmi_dev_overdrive_level_set_v1(static_cast(dv_ind), od); +} + +rsmi_status_t +rsmi_dev_overdrive_level_set_v1(uint32_t dv_ind, uint32_t od) { TRY REQUIRE_ROOT_ACCESS @@ -809,6 +818,11 @@ rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) { rsmi_status_t rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_level) { + return rsmi_dev_perf_level_set_v1(static_cast(dv_ind), perf_level); +} + +rsmi_status_t +rsmi_dev_perf_level_set_v1(uint32_t dv_ind, rsmi_dev_perf_level_t perf_level) { TRY REQUIRE_ROOT_ACCESS @@ -959,8 +973,8 @@ static const uint32_t kOD_OD_RANGE_label_array_index = kOD_VDDC_CURVE_label_array_index + 4; static const uint32_t kOD_VDDC_CURVE_start_index = kOD_OD_RANGE_label_array_index + 3; -static const uint32_t kOD_VDDC_CURVE_num_lines = - kOD_VDDC_CURVE_start_index + 4; +// static const uint32_t kOD_VDDC_CURVE_num_lines = +// kOD_VDDC_CURVE_start_index + 4; static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, rsmi_od_volt_freq_data_t *p) { @@ -1049,7 +1063,7 @@ rsmi_status_t rsmi_dev_od_clk_info_set(uint32_t dv_ind, rsmi_freq_ind_t level, }; // Set perf. level to manual so that we can then set the power profile - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); if (ret != RSMI_STATUS_SUCCESS) { return ret; } @@ -1097,7 +1111,7 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint, rsmi_status_t ret; // Set perf. level to manual so that we can then set the power profile - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); if (ret != RSMI_STATUS_SUCCESS) { return ret; } @@ -1222,7 +1236,7 @@ static rsmi_status_t set_power_profile(uint32_t dv_ind, assert(ind_map.find(profile) != ind_map.end()); // Set perf. level to manual so that we can then set the power profile - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); if (ret != RSMI_STATUS_SUCCESS) { return ret; } @@ -1297,8 +1311,6 @@ rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type, rsmi_status_t rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block, uint64_t *fw_version) { - rsmi_status_t ret; - TRY CHK_SUPPORT_VAR(fw_version, block) @@ -1373,12 +1385,7 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block, return RSMI_STATUS_INVALID_ARGS; } - ret = get_dev_value_int(dev_type, dv_ind, fw_version); - if (ret != 0) { - return amd::smi::ErrnoToRsmiStatus(ret); - } - - return RSMI_STATUS_SUCCESS; + return get_dev_value_int(dev_type, dv_ind, fw_version); CATCH } @@ -1438,7 +1445,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, std::shared_ptr dev = smi.monitor_devices()[dv_ind]; assert(dev != nullptr); - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); if (ret != RSMI_STATUS_SUCCESS) { return ret; } @@ -1503,8 +1510,13 @@ get_id_name_str_from_line(uint64_t id, std::string ln, if (std::stoul(token1, nullptr, 16) == id) { int64_t pos = ln_str->tellg(); - pos = ln.find_first_not_of("\t ", pos); - ret_str = ln.substr(pos); + assert(pos >= 0); + if (pos < 0) { + throw amd::smi::rsmi_exception( + RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__); + } + size_t s_pos = ln.find_first_not_of("\t ", static_cast(pos)); + ret_str = ln.substr(static_cast(s_pos)); } return ret_str; } @@ -1537,7 +1549,8 @@ static rsmi_status_t get_backup_name(uint16_t id, char *name, size_t len) { static rsmi_status_t get_dev_name_from_file(uint32_t dv_ind, char *name, size_t len) { std::string val_str; - rsmi_status_t ret = get_dev_value_line(amd::smi::kDevDevProdName, dv_ind, &val_str); + rsmi_status_t ret = + get_dev_value_line(amd::smi::kDevDevProdName, dv_ind, &val_str); if (ret != 0) { return amd::smi::ErrnoToRsmiStatus(ret); @@ -1885,12 +1898,12 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) { std::shared_ptr dev = smi.monitor_devices()[dv_ind]; assert(dev != nullptr); - ret = rsmi_dev_perf_level_set(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); + ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_MANUAL); if (ret != RSMI_STATUS_SUCCESS) { return ret; } - uint32_t ret_i; + int32_t ret_i; ret_i = dev->writeDevInfo(amd::smi::kDevPCIEClk, freq_enable_str); return amd::smi::ErrnoToRsmiStatus(ret_i); @@ -2372,7 +2385,7 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, break; default: - assert(!"Unexpected memory type"); + assert(false); // Unexpected memory type return RSMI_STATUS_INVALID_ARGS; } @@ -2405,7 +2418,7 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, break; default: - assert(!"Unexpected memory type"); + assert(false); // Unexpected memory type return RSMI_STATUS_INVALID_ARGS; } @@ -2641,7 +2654,7 @@ rsmi_version_str_get(rsmi_sw_component_t component, char *ver_str, break; default: - assert(!"Unexpected component type provided"); + assert(false); // Unexpected component type provided return RSMI_STATUS_INVALID_ARGS; } @@ -2757,7 +2770,7 @@ rsmi_dev_counter_destroy(rsmi_event_handle_t evnt_handle) { return RSMI_STATUS_INVALID_ARGS; } - uint32_t ret = 0; + int ret = 0; amd::smi::evt::Event *evt = reinterpret_cast(evnt_handle); uint32_t dv_ind = evt->dev_ind(); @@ -2783,7 +2796,7 @@ rsmi_counter_control(rsmi_event_handle_t evt_handle, REQUIRE_ROOT_ACCESS - uint32_t ret = 0; + int ret = 0; if (evt_handle == 0) { return RSMI_STATUS_INVALID_ARGS; @@ -2799,7 +2812,7 @@ rsmi_counter_control(rsmi_event_handle_t evt_handle, break; default: - assert(!"Unexpected perf counter command"); + assert(false); // Unexpected perf counter command return RSMI_STATUS_INVALID_ARGS; } return amd::smi::ErrnoToRsmiStatus(ret); @@ -2832,8 +2845,11 @@ rsmi_counter_read(rsmi_event_handle_t evt_handle, if (ret == 0 && value->value > 0xFFFFFFFFFFFF) { ret = evt->getValue(value); } - - return amd::smi::ErrnoToRsmiStatus(ret); + if (ret == 0) { + return RSMI_STATUS_SUCCESS; + } else { + return RSMI_STATUS_UNEXPECTED_SIZE; + } CATCH } @@ -3006,7 +3022,7 @@ rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages, tmp_stat = RSMI_MEM_PAGE_STATUS_RESERVED; break; default: - assert(!"Unexpected retired memory page status code read"); + assert(false); // Unexpected retired memory page status code read return RSMI_STATUS_UNKNOWN_ERROR; } records[i].status = tmp_stat; @@ -3080,7 +3096,7 @@ rsmi_dev_xgmi_error_status(uint32_t dv_ind, rsmi_xgmi_status_t *status) { break; default: - assert(!"Unexpected XGMI error status read"); + assert(false); // Unexpected XGMI error status read return RSMI_STATUS_UNKNOWN_ERROR; } return RSMI_STATUS_SUCCESS; @@ -3142,20 +3158,20 @@ rsmi_topo_get_link_weight(uint32_t dv_ind_src, uint32_t dv_ind_dst, rsmi_status_t status; uint32_t node_ind_dst; - uint32_t ret = smi.get_node_index(dv_ind_dst, &node_ind_dst); + int ret = smi.get_node_index(dv_ind_dst, &node_ind_dst); - if (!ret) { + if (ret == 0) { amd::smi::IO_LINK_TYPE type; ret = kfd_node->get_io_link_type(node_ind_dst, &type); - if (!ret) { + if (ret == 0) { if (type == amd::smi::IOLINK_TYPE_XGMI) { ret = kfd_node->get_io_link_weight(node_ind_dst, weight); - if (!ret) + if (ret == 0) status = RSMI_STATUS_SUCCESS; else status = RSMI_STATUS_INIT_ERROR; } else { - assert(!"Unexpected IO Link type read"); + assert(false); // Unexpected IO Link type read status = RSMI_STATUS_NOT_SUPPORTED; } } else { @@ -3173,7 +3189,7 @@ rsmi_topo_get_link_weight(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t io_link_weight; ret = smi.get_io_link_weight(numa_number_src, numa_number_dst, &io_link_weight); - if (!ret) { + if (ret == 0) { *weight = *weight + io_link_weight; // from src numa CPU node to dst numa CPU node } else { @@ -3183,11 +3199,11 @@ rsmi_topo_get_link_weight(uint32_t dv_ind_src, uint32_t dv_ind_dst, } status = RSMI_STATUS_SUCCESS; } else { - assert(!"Error to read numa node number"); + assert(false); // Error to read numa node number status = RSMI_STATUS_INIT_ERROR; } } else { - assert(!"Error to read numa node weight"); + assert(false); // Error to read numa node weight status = RSMI_STATUS_INIT_ERROR; } } @@ -3217,9 +3233,9 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, rsmi_status_t status; uint32_t node_ind_dst; - uint32_t ret = smi.get_node_index(dv_ind_dst, &node_ind_dst); + int ret = smi.get_node_index(dv_ind_dst, &node_ind_dst); - if (!ret) { + if (ret == 0) { amd::smi::IO_LINK_TYPE io_link_type; ret = kfd_node->get_io_link_type(node_ind_dst, &io_link_type); if (!ret) { @@ -3228,7 +3244,7 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, *hops = 1; status = RSMI_STATUS_SUCCESS; } else { - assert(!"Unexpected IO Link type read"); + assert(false); // Unexpected IO Link type read status = RSMI_STATUS_NOT_SUPPORTED; } } else { @@ -3242,7 +3258,7 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, uint64_t io_link_weight; ret = smi.get_io_link_weight(numa_number_src, numa_number_dst, &io_link_weight); - if (!ret) + if (ret == 0) *hops = 3; // from src CPU node to dst CPU node else *hops = 4; // More than one CPU hops, hard coded as 4 @@ -3259,7 +3275,7 @@ rsmi_topo_get_link_type(uint32_t dv_ind_src, uint32_t dv_ind_dst, status = RSMI_STATUS_INIT_ERROR; } } else { - assert(!"Error to get numa node number"); + assert(false); // Error to get numa node number status = RSMI_STATUS_INIT_ERROR; } } @@ -3379,7 +3395,7 @@ rsmi_dev_supported_variant_iterator_open( break; default: - assert(!"Unexpected iterator type"); + assert(false); // Unexpected iterator type return RSMI_STATUS_INVALID_ARGS; } return RSMI_STATUS_SUCCESS; @@ -3566,7 +3582,7 @@ rsmi_event_notification_init(uint32_t dv_ind) { } dev->set_evt_notif_anon_fd(args.anon_fd); - FILE *anon_file_ptr = fdopen(args.anon_fd, "r"); + FILE *anon_file_ptr = fdopen(static_cast(args.anon_fd), "r"); if (anon_file_ptr == nullptr) { close(dev->evt_notif_anon_fd()); return amd::smi::ErrnoToRsmiStatus(errno); @@ -3749,5 +3765,5 @@ rsmi_test_refcount(uint64_t refcnt_type) { return -1; } - return smi.ref_count(); + return static_cast(smi.ref_count()); } diff --git a/projects/rocm-smi-lib/src/rocm_smi_counters.cc b/projects/rocm-smi-lib/src/rocm_smi_counters.cc index 03b869ea32..60ab72c58b 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_counters.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_counters.cc @@ -138,7 +138,7 @@ GetSupportedEventGroups(uint32_t dev_num, dev_evt_grp_set_t *supported_grps) { std::string grp_path_base; std::string grp_path; - uint32_t ret; + int32_t ret; grp_path_base = kPathDeviceEventRoot; grp_path_base += '/'; @@ -225,9 +225,9 @@ parse_field_config(std::string fstr, evnt_info_t *val) { val->field_size = static_cast(end_bit - start_bit + 1); } -static uint32_t +static int32_t get_event_bitfield_info(std::string *config_path, evnt_info_t *val) { - uint32_t err; + int32_t err; std::string fstr; @@ -240,9 +240,9 @@ get_event_bitfield_info(std::string *config_path, evnt_info_t *val) { return 0; } -uint32_t +int32_t Event::get_event_file_info(void) { - uint32_t err; + int32_t err; std::string fn = evt_path_root_; std::string fstr; @@ -282,7 +282,7 @@ Event::get_event_file_info(void) { return 0; } -uint32_t +int32_t Event::get_event_type(uint32_t *ev_type) { assert(ev_type != nullptr); if (ev_type == nullptr) { @@ -316,9 +316,9 @@ get_perf_attr_config(std::vector *ev_info) { return ret_val; } -uint32_t +int32_t amd::smi::evt::Event::openPerfHandle(void) { - uint32_t ret; + int32_t ret; memset(&attr_, 0, sizeof(struct perf_event_attr)); @@ -350,7 +350,7 @@ amd::smi::evt::Event::openPerfHandle(void) { return 0; } -uint32_t +int32_t amd::smi::evt::Event::startCounter(void) { int32_t ret; @@ -370,7 +370,7 @@ amd::smi::evt::Event::startCounter(void) { return 0; } -uint32_t +int32_t amd::smi::evt::Event::stopCounter(void) { int32_t ret; @@ -389,23 +389,25 @@ amd::smi::evt::Event::stopCounter(void) { static ssize_t readn(int fd, void *buf, size_t n) { - ssize_t left = n; + size_t left = n; ssize_t bytes; while (left) { bytes = read(fd, buf, left); - if (!bytes) /* reach EOF */ - return (n - left); + if (!bytes) { /* reach EOF */ + return static_cast(n - left); + } if (bytes < 0) { if (errno == EINTR) /* read got interrupted */ continue; else return -errno; } - left -= bytes; + + left -= static_cast(bytes); buf = reinterpret_cast((reinterpret_cast(buf) + bytes)); } - return n; + return static_cast(n); } uint32_t diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index 3b00390ce3..ce8fe3797f 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -683,6 +683,8 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) { std::string tempStr; int ret; + int tmp_val; + switch (type) { case kDevDevID: case kDevSubSysDevID: @@ -695,7 +697,11 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) { if (tempStr == "") { return EINVAL; } - *val = std::stoi(tempStr, 0, 16); + tmp_val = std::stoi(tempStr, 0, 16); + if (tmp_val < 0) { + return EINVAL; + } + *val = static_cast(tmp_val); break; case kDevUsage: @@ -942,7 +948,6 @@ bool Device::DeviceAPISupported(std::string name, uint64_t variant, uint64_t sub_variant) { SupportedFuncMapIt func_it; VariantMapIt var_it; - SubVariantIt sub_var_it; fillSupportedFuncs(); func_it = supported_funcs_.find(name); @@ -981,7 +986,8 @@ bool Device::DeviceAPISupported(std::string name, uint64_t variant, return subvariant_match(&(var_it->second), sub_variant); } } - assert(!"We should not reach here"); + assert(false); // We should not reach here + return false; } diff --git a/projects/rocm-smi-lib/src/rocm_smi_io_link.cc b/projects/rocm-smi-lib/src/rocm_smi_io_link.cc index 83530a2cba..ddcd0d87ec 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_io_link.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_io_link.cc @@ -191,7 +191,7 @@ int DiscoverIOLinks(std::map, continue; } - uint32_t node_indx = std::stoi(dentry_kfd->d_name); + uint32_t node_indx = static_cast(std::stoi(dentry_kfd->d_name)); std::shared_ptr link; uint32_t link_indx; std::string io_link_path_root = IOLinkPathRoot(node_indx); @@ -211,7 +211,7 @@ int DiscoverIOLinks(std::map, continue; } - link_indx = std::stoi(dentry_io_link->d_name); + link_indx = static_cast(std::stoi(dentry_io_link->d_name)); link = std::shared_ptr(new IOLink(node_indx, link_indx)); link->Initialize(); @@ -263,7 +263,7 @@ int DiscoverIOLinksPerNode(uint32_t node_indx, std::mapd_name); + link_indx = static_cast(std::stoi(dentry->d_name)); link = std::shared_ptr(new IOLink(node_indx, link_indx)); link->Initialize(); diff --git a/projects/rocm-smi-lib/src/rocm_smi_kfd.cc b/projects/rocm-smi-lib/src/rocm_smi_kfd.cc index 70939c5b56..063917227b 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_kfd.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_kfd.cc @@ -86,10 +86,12 @@ static const char *kKFDPasidFName = "pasid"; // static const char *kKFDNodePropWAVE_FRONT_SIZEStr = "wave_front_size"; static const char *kKFDNodePropARRAY_COUNTStr = "array_count"; -static const char *kKFDNodePropSIMD_ARRAYS_PER_ENGINEStr = "simd_arrays_per_engine"; +static const char *kKFDNodePropSIMD_ARRAYS_PER_ENGINEStr = + "simd_arrays_per_engine"; static const char *kKFDNodePropCU_PER_SIMD_ARRAYStr = "cu_per_simd_array"; // static const char *kKFDNodePropSIMD_PER_CUStr = "simd_per_cu"; -// static const char *kKFDNodePropMAX_SLOTS_SCRATCH_CUStr = "max_slots_scratch_cu"; +// static const char *kKFDNodePropMAX_SLOTS_SCRATCH_CUStr = +// "max_slots_scratch_cu"; // static const char *kKFDNodePropVENDOR_IDStr = "vendor_id"; // static const char *kKFDNodePropDEVICE_IDStr = "device_id"; @@ -231,7 +233,7 @@ static int ReadKFDGpuId(uint32_t kfd_node_id, uint64_t *gpu_id) { return ENXIO; } - *gpu_id = std::stoi(gpu_id_str); + *gpu_id = static_cast(std::stoi(gpu_id_str)); return 0; } @@ -294,7 +296,8 @@ int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated, int err; std::string tmp; - procs[*num_procs_found].process_id = std::stoi(proc_id_str); + procs[*num_procs_found].process_id = + static_cast(std::stoi(proc_id_str)); std::string pasid_str_path = kKFDProcPathRoot; pasid_str_path += "/"; @@ -312,7 +315,7 @@ int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated, closedir(proc_dir); return EINVAL; } - procs[*num_procs_found].pasid = std::stoi(tmp); + procs[*num_procs_found].pasid = static_cast(std::stoi(tmp)); } ++(*num_procs_found); @@ -382,7 +385,7 @@ int GetProcessGPUs(uint32_t pid, std::unordered_set *gpu_set) { uint64_t val; try { - val = std::stoi(tmp); + val = static_cast(std::stoi(tmp)); } catch (...) { std::cerr << "Error; read invalid data: " << tmp << " from " << q_gpu_id_str << std::endl; @@ -431,7 +434,7 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc, if (!is_number(tmp)) { return EINVAL; } - proc->pasid = std::stoi(tmp); + proc->pasid = static_cast(std::stoi(tmp)); proc->vram_usage = 0; proc->sdma_usage = 0; @@ -439,7 +442,8 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc, uint32_t cu_count = 0; static amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); - static std::map>& kfd_node_map = smi.kfd_node_map(); + static std::map>& kfd_node_map = + smi.kfd_node_map(); for (itr = gpu_set->begin(); itr != gpu_set->end(); itr++) { uint64_t gpu_id = (*itr); @@ -531,7 +535,7 @@ int DiscoverKFDNodes(std::map> *nodes) { continue; } - node_indx = std::stoi(dentry->d_name); + node_indx = static_cast(std::stoi(dentry->d_name)); if (!KFDNodeSupported(node_indx)) { dentry = readdir(kfd_node_dir); @@ -675,7 +679,8 @@ KFDNode::Initialize(void) { ret = get_property_value(kKFDNodePropSIMD_ARRAYS_PER_ENGINEStr, &tmp_val); if (ret != 0) { throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR, - "Failed to initialize rocm_smi library (get number of shader arrays per engine)."); + "Failed to initialize rocm_smi library " + "(get number of shader arrays per engine)."); } cu_count_ = uint32_t(tmp_val); ret = get_property_value(kKFDNodePropARRAY_COUNTStr, &tmp_val); diff --git a/projects/rocm-smi-lib/src/rocm_smi_main.cc b/projects/rocm-smi-lib/src/rocm_smi_main.cc index 28d5bf60be..06ddd6c73f 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_main.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_main.cc @@ -80,7 +80,8 @@ static uint32_t GetDeviceIndex(const std::string s) { size_t tmp = t.find_last_not_of("0123456789"); t.erase(0, tmp+1); - return stoi(t); + assert(stoi(t) >= 0); + return static_cast(stoi(t)); } // Find the drm minor from from sysfs path "/sys/class/drm/cardX/device/drm". @@ -110,10 +111,10 @@ static uint32_t GetDrmRenderMinor(const std::string s) { dentry = readdir(drm_dir); } - if (closedir(drm_dir)) + if (closedir(drm_dir)) { return 0; - - return drm_minor; + } + return static_cast(drm_minor); } static int SameDevice(const std::string fileA, const std::string fileB) { @@ -192,7 +193,7 @@ static uint32_t ConstructBDFID(std::string path, uint64_t *bdfid) { assert(ret < 256); if (ret <= 0 || ret >= 256) { - return -1; + return 1; } // We are looking for the last element in the path that has the form @@ -229,7 +230,7 @@ static uint32_t GetMonitorDevices(const std::shared_ptr &d, if (d->monitor() != nullptr) { // Calculate BDFID and set for this device if (ConstructBDFID(d->path(), &bdfid) != 0) { - return -1; + return 1; } d->set_bdfid(bdfid); device_list->push_back(d); @@ -241,6 +242,7 @@ void RocmSMI::Initialize(uint64_t flags) { auto i = 0; uint32_t ret; + int i_ret; assert(ref_count_ == 1); if (ref_count_ != 1) { @@ -278,16 +280,16 @@ RocmSMI::Initialize(uint64_t flags) { } std::map> tmp_map; - ret = DiscoverKFDNodes(&tmp_map); - if (ret != 0) { + i_ret = DiscoverKFDNodes(&tmp_map); + if (i_ret != 0) { throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR, "Failed to initialize rocm_smi library (KFD node discovery)."); } std::map, std::shared_ptr> io_link_map_tmp; - ret = DiscoverIOLinks(&io_link_map_tmp); - if (ret != 0) { + i_ret = DiscoverIOLinks(&io_link_map_tmp); + if (i_ret != 0) { throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR, "Failed to initialize rocm_smi library (IO Links discovery)."); } @@ -358,14 +360,16 @@ RocmSMI& RocmSMI::getInstance(uint64_t flags) { return singleton; } -static int GetEnvVarInteger(const char *ev_str) { +static uint32_t GetEnvVarUInteger(const char *ev_str) { #ifdef NDEBUG (void)ev_str; #else ev_str = getenv(ev_str); if (ev_str) { - return atoi(ev_str); + int ret = atoi(ev_str); + assert(ret >= 0); + return static_cast(ret); } #endif return 0; @@ -374,18 +378,18 @@ static int GetEnvVarInteger(const char *ev_str) { // Get and store env. variables in this method void RocmSMI::GetEnvVariables(void) { #ifdef NDEBUG - (void)GetEnvVarInteger(nullptr); // This is to quiet release build warning. + (void)GetEnvVarUInteger(nullptr); // This is to quiet release build warning. env_vars_.debug_output_bitfield = 0; env_vars_.path_DRM_root_override = nullptr; env_vars_.path_HWMon_root_override = nullptr; env_vars_.path_power_root_override = nullptr; env_vars_.enum_override = 0; #else - env_vars_.debug_output_bitfield = GetEnvVarInteger("RSMI_DEBUG_BITFIELD"); + env_vars_.debug_output_bitfield = GetEnvVarUInteger("RSMI_DEBUG_BITFIELD"); env_vars_.path_DRM_root_override = getenv("RSMI_DEBUG_DRM_ROOT_OVERRIDE"); env_vars_.path_HWMon_root_override = getenv("RSMI_DEBUG_HWMON_ROOT_OVERRIDE"); env_vars_.path_power_root_override = getenv("RSMI_DEBUG_PP_ROOT_OVERRIDE"); - env_vars_.enum_override = GetEnvVarInteger("RSMI_DEBUG_ENUM_OVERRIDE"); + env_vars_.enum_override = GetEnvVarUInteger("RSMI_DEBUG_ENUM_OVERRIDE"); #endif } @@ -453,7 +457,7 @@ static bool isAMDGPU(std::string dev_path) { } uint32_t RocmSMI::DiscoverAmdgpuDevices(void) { - auto ret = 0; + uint32_t ret = 0; // If this gets called more than once, clear previous findings. devices_.clear(); @@ -545,7 +549,7 @@ uint32_t RocmSMI::DiscoverAMDMonitors(void) { // Instead, we will discover() all the power monitors the first time // they are needed and then check for previous discovery on each subsequent // call. -uint32_t RocmSMI::DiscoverAMDPowerMonitors(bool force_update) { +int RocmSMI::DiscoverAMDPowerMonitors(bool force_update) { if (force_update) { power_mons_.clear(); } @@ -606,7 +610,7 @@ uint32_t RocmSMI::DiscoverAMDPowerMonitors(bool force_update) { uint32_t RocmSMI::IterateSMIDevices( std::function&, void *)> func, void *p) { if (func == nullptr) { - return -1; + return 1; } auto d = devices_.begin(); diff --git a/projects/rocm-smi-lib/src/rocm_smi_monitor.cc b/projects/rocm-smi-lib/src/rocm_smi_monitor.cc index f7ca2631c2..1d6de35aba 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_monitor.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_monitor.cc @@ -281,7 +281,7 @@ Monitor::~Monitor(void) { } std::string -Monitor::MakeMonitorPath(MonitorTypes type, int32_t sensor_id) { +Monitor::MakeMonitorPath(MonitorTypes type, uint32_t sensor_id) { std::string tempPath = path_; std::string fn = kMonitorNameMap.at(type); @@ -313,7 +313,7 @@ int Monitor::readMonitor(MonitorTypes type, uint32_t sensor_id, return ReadSysfsStr(sysfs_path, val); } -uint32_t +int32_t Monitor::setTempSensorLabelMap(void) { std::string type_str; int ret; @@ -352,7 +352,7 @@ Monitor::setTempSensorLabelMap(void) { return 0; } -uint32_t +int32_t Monitor::setVoltSensorLabelMap(void) { std::string type_str; int ret; @@ -406,7 +406,7 @@ static int get_supported_sensors(std::string dir_path, std::string fn_reg_ex, auto dentry = readdir(hwmon_dir); std::smatch match; - int64_t mon_val; + uint64_t mon_val; char *endptr; try { @@ -418,7 +418,7 @@ static int get_supported_sensors(std::string dir_path, std::string fn_reg_ex, if (std::regex_search(fn, match, re)) { assert(match.size() == 2); // 1 for whole match + 1 for sub-match errno = 0; - mon_val = strtol(match.str(1).c_str(), &endptr, 10); + mon_val = strtoul(match.str(1).c_str(), &endptr, 10); assert(errno == 0); assert(*endptr == '\0'); if (errno) { @@ -611,7 +611,7 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) { static_cast(getVoltSensorEnum(supported_monitors[i])) << MONITOR_TYPE_BIT_POSITION; } else { - assert(!"Unexpected monitor type"); + assert(false); // Unexpected monitor type } } (*supported_variants)[kMonInfoVarTypeToRSMIVariant.at(*var)] = diff --git a/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc b/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc index f843caa782..3e1d7e0d45 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc @@ -86,7 +86,7 @@ static int parse_power_str(std::string s, PowerMonTypes type, uint64_t *val) { break; default: - assert(!"Invalid search Power type requested"); + assert(false); // Invalid search Power type requested return EINVAL; } @@ -128,7 +128,7 @@ static int parse_power_str(std::string s, PowerMonTypes type, uint64_t *val) { break; default: - assert(!"Invalid search Power type requested"); + assert(false); // Invalid search Power type requested return EINVAL; } ss.clear(); diff --git a/projects/rocm-smi-lib/src/rocm_smi_utils.cc b/projects/rocm-smi-lib/src/rocm_smi_utils.cc index 273e547417..41c2559f22 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_utils.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_utils.cc @@ -216,7 +216,7 @@ rsmi_status_t GetDevBinaryVec(amd::smi::DevInfoTypes type, return ErrnoToRsmiStatus(ret); } -rsmi_status_t ErrnoToRsmiStatus(uint32_t err) { +rsmi_status_t ErrnoToRsmiStatus(int err) { switch (err) { case 0: return RSMI_STATUS_SUCCESS; case ESRCH: return RSMI_STATUS_NOT_FOUND; diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc index 05f735ae5e..6984736e75 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.cc @@ -80,6 +80,10 @@ void TestBase::MakeHeaderStr(const char *inStr, } } +void TestBase::SetUp(void) { + SetUp(0); +} + void TestBase::SetUp(uint64_t init_flags) { std::string label; rsmi_status_t err; diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h index 9e07bf3873..d2ced349bc 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_base.h @@ -59,7 +59,8 @@ class TestBase { // i.e. init runtime, prepare packet... // The init_flags option will override any flags set for the whole test // suite - virtual void SetUp(uint64_t init_flags = 0); + void SetUp(uint64_t init_flags); + virtual void SetUp(void); // @Brief: Core measurement codes executing here virtual void Run(void); diff --git a/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc b/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc index 22f0320e33..9d4916d056 100755 --- a/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc +++ b/projects/rocm-smi-lib/tests/rocm_smi_test/test_common.cc @@ -80,9 +80,8 @@ static const char * kRasErrStateStrings[] = { "Error Unknown", // RSMI_RAS_ERR_STATE_PARITY "Single, Correctable", // RSMI_RAS_ERR_STATE_SING_C "Multiple, Uncorrectable", // RSMI_RAS_ERR_STATE_MULT_UC - "Poison" // RSMI_RAS_ERR_STATE_POISON - "Off", // RSMI_RAS_ERR_STATE_DISABLED - "On", // RSMI_RAS_ERR_STATE_ENABLED + "Poison", // RSMI_RAS_ERR_STATE_POISON + "Enabled", // RSMI_RAS_ERR_STATE_ENABLED }; static_assert( sizeof(kRasErrStateStrings)/sizeof(char *) == (RSMI_RAS_ERR_STATE_LAST + 1),