From 47a8ea99d1fb2765b159f1b23894c9a988bcfc38 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Mon, 22 Oct 2018 17:26:38 -0500 Subject: [PATCH] Comment corrections, debug output and assert added [ROCm/rocm_smi_lib commit: b2f1ee17643c30a9ac4be502e67a85dfff54c3b3] --- projects/rocm-smi-lib/CMakeLists.txt | 4 +- .../rocm-smi-lib/include/rocm_smi/rocm_smi.h | 44 +++++++------ .../include/rocm_smi/rocm_smi_common.h | 63 +++++++++++++++++++ .../include/rocm_smi/rocm_smi_device.h | 4 +- .../include/rocm_smi/rocm_smi_main.h | 5 +- .../include/rocm_smi/rocm_smi_monitor.h | 5 +- .../include/rocm_smi/rocm_smi_power_mon.h | 7 ++- projects/rocm-smi-lib/src/rocm_smi.cc | 8 ++- projects/rocm-smi-lib/src/rocm_smi_device.cc | 9 ++- projects/rocm-smi-lib/src/rocm_smi_main.cc | 26 +++++++- projects/rocm-smi-lib/src/rocm_smi_monitor.cc | 6 +- .../rocm-smi-lib/src/rocm_smi_power_mon.cc | 6 +- 12 files changed, 147 insertions(+), 40 deletions(-) create mode 100755 projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h diff --git a/projects/rocm-smi-lib/CMakeLists.txt b/projects/rocm-smi-lib/CMakeLists.txt index 65e055bb29..4568c1e052 100755 --- a/projects/rocm-smi-lib/CMakeLists.txt +++ b/projects/rocm-smi-lib/CMakeLists.txt @@ -29,7 +29,6 @@ cmake_minimum_required(VERSION 3.5.0) set(ROCM_SMI "rocm_smi") -set(RSMI_PACKAGE "rsmi") set(ROCM_SMI_COMPONENT "lib${ROCM_SMI}") set(ROCM_SMI_TARGET "${ROCM_SMI}64") @@ -78,6 +77,7 @@ set(LIB_VERSION_STRING set(BUILD_VERSION_STRING "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_PATCH}") +set(CPACK_PACKAGE_FILE_NAME "rocm_smi_lib64-${LIB_VERSION_STRING}") ## Verbose output. set(CMAKE_VERBOSE_MAKEFILE on) @@ -109,6 +109,7 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_monitor.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h") +set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h") # rocm_smi_device.h @@ -149,7 +150,6 @@ install(TARGETS ${ROCM_SMI_TARGET} install(FILES ${SOURCE_DIR}/include/rocm_smi/rocm_smi.h DESTINATION rocm_smi/include/rocm_smi) ## Add the packaging directives for the runtime library. -set(CPACK_PACKAGE_NAME ${ROCM_SMI_PACKAGE}) # TODO set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ) set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index a6f98dc61a..4bbfd0d91f 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -192,13 +192,19 @@ typedef uint64_t rsmi_bit_field; * currently active. */ typedef struct { - //!< Which profiles are supported by this system + /** + * Which profiles are supported by this system + */ rsmi_bit_field available_profiles; - //!< Which power profile is currently active + /** + * Which power profile is currently active + */ rsmi_power_profile_preset_masks current; - //!< How many power profiles are available + /** + * How many power profiles are available + */ uint32_t num_profiles; } rsmi_power_profile_status; @@ -448,10 +454,6 @@ rsmi_status_t rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len); * * @details Given a device index @p dv_ind, a 0-based sensor index * - * @param sensor_ind, a metric @p metric and a pointer to an int64_t - * @p temperature, this function will write the temperature value for that - * metric in millidegrees Celcius to the int64_t pointed to by @p temperature. - * * @param[in] dv_ind a device index * * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. @@ -505,30 +507,26 @@ rsmi_status_t rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed); /** - * @brief Set the fan speed for the specfied device with the provided speed, - * in RPMs. + * @brief Get the fan speed for the specfied device in RPMs. * - * @details Given a device index @p dv_ind and a integer value indicating - * speed @p speed, this function will attempt to set the fan speed to @p speed. - * An error will be returned if the specified speed is outside the allowable - * range for the device. The maximum value is RSMI_MAX_FAN_SPEED and the - * minimum is 0. + * @details Given a device index @p dv_ind + * this function will get the fan speed. * * @param[in] dv_ind a device index * - * @details Given a device index @p dv_ind and a pointer to a uint32_t - * @p speed, this function will write the current fan speed (a value - * between 0 and 255) to the uint32_t pointed to by @p speed + * @details Given a device index @p dv_ind and a pointer to a uint32_t + * @p speed, this function will write the current fan speed (a value + * between 0 and 255) to the uint32_t pointed to by @p speed * - * @param[in] dv_ind a device index + * @param[in] dv_ind a device index * - * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. - * If a device has more than one sensor, it could be greater than 0. + * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. + * If a device has more than one sensor, it could be greater than 0. * - * @param[inout] speed a pointer to uint32_t to which the speed will be - * written + * @param[inout] speed a pointer to uint32_t to which the speed will be + * written * - * @retval RSMI_STATUS_SUCCESS is returned upon successful call. + * @retval RSMI_STATUS_SUCCESS is returned upon successful call. * */ rsmi_status_t rsmi_dev_fan_speed_get(uint32_t dv_ind, diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h new file mode 100755 index 0000000000..057de8d90d --- /dev/null +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_common.h @@ -0,0 +1,63 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2018, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#ifndef ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_COMMON_H_ +#define ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_COMMON_H_ + +#define DBG_FILE_ERROR(STR) \ + if (env_->debug_output_bitfield & RSMI_DEBUG_SYSFS_FILE_PATHS) { \ + std::cout << "*****" << __FUNCTION__ << std::endl; \ + std::cout << "*****Opening file: " << (STR) << std::endl; \ + std::cout << " at " << __FILE__ << ":" << __LINE__ << std::endl;\ + } + +// Add different debug filters here, as powers of 2; e.g, 1, 2, 4, 8, ... +#define RSMI_DEBUG_SYSFS_FILE_PATHS 1 + +struct RocmSMI_env_vars { + // Store env. variables here + uint32_t debug_output_bitfield; +}; + +#endif // ROCM_SMI_LIB_INCLUDE_ROCM_SMI_ROCM_SMI_COMMON_H_ diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index 3045e6f080..f7ec051b4d 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -52,6 +52,7 @@ #include "rocm_smi/rocm_smi_monitor.h" #include "rocm_smi/rocm_smi_power_mon.h" +#include "rocm_smi/rocm_smi_common.h" namespace amd { namespace smi { @@ -67,7 +68,7 @@ enum DevInfoTypes { class Device { public: - explicit Device(std::string path); + explicit Device(std::string path, RocmSMI_env_vars const *e); ~Device(void); void set_monitor(std::shared_ptr m) {monitor_ = m;} @@ -89,6 +90,7 @@ class Device { std::shared_ptr power_monitor_; std::string path_; uint32_t index_; + const RocmSMI_env_vars *env_; int readDevInfoStr(DevInfoTypes type, std::string *retStr); int readDevInfoMultiLineStr(DevInfoTypes type, std::vector *retVec); diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h index c759cda253..cf5ecba0e6 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_main.h @@ -55,6 +55,7 @@ #include "rocm_smi/rocm_smi_device.h" #include "rocm_smi/rocm_smi_monitor.h" #include "rocm_smi/rocm_smi_power_mon.h" +#include "rocm_smi/rocm_smi_common.h" namespace amd { namespace smi { @@ -78,17 +79,17 @@ class RocmSMI { std::function&, void *)> func, void *); private: - // temporarily make public RocmSMI(void); // force use getInstance() - std::vector> devices_; std::vector> monitors_; std::vector> power_mons_; std::set amd_monitor_types_; void AddToDeviceList(std::string dev_name); + void GetEnvVariables(void); uint32_t DiscoverAMDMonitors(void); static std::vector> s_monitor_devices; + RocmSMI_env_vars env_vars_; }; } // namespace smi diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h index 9df5a1219c..0e60b789a2 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_monitor.h @@ -48,6 +48,8 @@ #include #include +#include "rocm_smi/rocm_smi_common.h" + namespace amd { namespace smi { @@ -81,7 +83,7 @@ enum MonitorTypes { class Monitor { public: - explicit Monitor(std::string path); + explicit Monitor(std::string path, RocmSMI_env_vars const *e); ~Monitor(void); const std::string path(void) const {return path_;} int readMonitor(MonitorTypes type, uint32_t sensor_ind, std::string *val); @@ -89,6 +91,7 @@ class Monitor { private: std::string MakeMonitorPath(MonitorTypes type, int32_t sensor_id); std::string path_; + const RocmSMI_env_vars *env_; }; } // namespace smi diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_power_mon.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_power_mon.h index ffbb1567af..ea693d3878 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_power_mon.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_power_mon.h @@ -48,6 +48,8 @@ #include #include +#include "rocm_smi/rocm_smi_common.h" + namespace amd { namespace smi { @@ -59,7 +61,7 @@ enum PowerMonTypes { class PowerMon { public: - explicit PowerMon(std::string path); + explicit PowerMon(std::string path, RocmSMI_env_vars const *e); ~PowerMon(void); const std::string path(void) const {return path_;} @@ -68,8 +70,9 @@ class PowerMon { int readPowerValue(PowerMonTypes type, uint64_t *power); private: - uint32_t dev_index_; std::string path_; + const RocmSMI_env_vars *env_; + uint32_t dev_index_; }; } // namespace smi diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index e9ce432c7a..3b15b12d40 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -441,6 +441,12 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, for (uint32_t i = 0; i < f->num_supported; ++i) { f->frequency[i] = freq_string_to_int(val_vec[i], ¤t); + + // Our assumption is that frequencies are read in from lowest to highest. + // Check that that is true. + if (i > 0) { + assert(f->frequency[i-1] <= f->frequency[i]); + } if (current) { // Should only be 1 current frequency assert(f->current == RSMI_MAX_NUM_FREQUENCIES + 1); @@ -511,8 +517,6 @@ static rsmi_status_t set_power_profile(uint32_t dv_ind, rsmi_status_t ret; rsmi_power_profile_status avail_profiles = {0, RSMI_PWR_PROF_PRST_INVALID, 0}; - // TODO(cf): test if it is valid to OR profiles; if not the following is - // not necessary: // Determine if the provided profile is valid if (!is_power_of_2(profile)) { return RSMI_STATUS_INPUT_OUT_OF_BOUNDS; diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index 3819572fcd..853635da55 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -98,7 +98,7 @@ static bool isRegularFile(std::string fname) { if (X) return X; \ } -Device::Device(std::string p) : path_(p) { +Device::Device(std::string p, RocmSMI_env_vars const *e) : path_(p), env_(e) { monitor_ = nullptr; } @@ -114,6 +114,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { tempPath += "/device/"; tempPath += kDevAttribNameMap.at(type); + DBG_FILE_ERROR(tempPath); if (!isRegularFile(tempPath)) { return EISDIR; } @@ -121,6 +122,7 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) { std::ifstream fs; fs.open(tempPath); + DBG_FILE_ERROR(tempPath); if (!fs.is_open()) { return errno; } @@ -139,12 +141,14 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) { std::ofstream fs; fs.open(tempPath); + DBG_FILE_ERROR(tempPath); if (!isRegularFile(tempPath)) { return EISDIR; } + DBG_FILE_ERROR(tempPath); if (!fs.is_open()) { - return errno; + return errno; } fs << valStr; @@ -206,6 +210,7 @@ int Device::readDevInfoMultiLineStr(DevInfoTypes type, std::stringstream buffer; + DBG_FILE_ERROR(tempPath); if (!isRegularFile(tempPath)) { return EISDIR; } diff --git a/projects/rocm-smi-lib/src/rocm_smi_main.cc b/projects/rocm-smi-lib/src/rocm_smi_main.cc index dd8716b956..409e4687aa 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_main.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_main.cc @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -138,6 +139,8 @@ std::vector> RocmSMI::s_monitor_devices; RocmSMI::RocmSMI(void) { auto i = 0; + GetEnvVariables(); + while (std::string(kAMDMonitorTypes[i]) != "") { amd_monitor_types_.insert(kAMDMonitorTypes[i]); ++i; @@ -165,6 +168,20 @@ RocmSMI& RocmSMI::getInstance(void) { return singleton; } +static int GetEnvVarInteger(const char *ev_str) { + ev_str = getenv(ev_str); + + if (ev_str) { + return atoi(ev_str); + } + return 0; +} + +// Get and store env. variables in this method +void RocmSMI::GetEnvVariables(void) { + env_vars_.debug_output_bitfield = GetEnvVarInteger("RSMI_DEBUG_BITFIELD"); +} + void RocmSMI::AddToDeviceList(std::string dev_name) { auto ret = 0; @@ -173,7 +190,7 @@ RocmSMI::AddToDeviceList(std::string dev_name) { dev_path += "/"; dev_path += dev_name; - auto dev = std::shared_ptr(new Device(dev_path)); + auto dev = std::shared_ptr(new Device(dev_path, &env_vars_)); auto m = monitors_.begin(); @@ -214,6 +231,8 @@ uint32_t RocmSMI::DiscoverDevices(void) { } auto drm_dir = opendir(kPathDRMRoot); + assert(drm_dir != nullptr); + auto dentry = readdir(drm_dir); while (dentry != nullptr) { @@ -261,7 +280,8 @@ uint32_t RocmSMI::DiscoverAMDMonitors(void) { fs.close(); if (amd_monitor_types_.find(mon_type) != amd_monitor_types_.end()) { - monitors_.push_back(std::shared_ptr(new Monitor(mon_name))); + monitors_.push_back(std::shared_ptr( + new Monitor(mon_name, &env_vars_))); } } dentry = readdir(mon_dir); @@ -311,7 +331,7 @@ uint32_t RocmSMI::DiscoverAMDPowerMonitors(bool force_update) { if (FileExists(tmp.c_str())) { std::shared_ptr mon = - std::shared_ptr(new PowerMon(mon_name)); + std::shared_ptr(new PowerMon(mon_name, &env_vars_)); power_mons_.push_back(mon); mon->set_dev_index(GetDeviceIndex(dentry->d_name)); } diff --git a/projects/rocm-smi-lib/src/rocm_smi_monitor.cc b/projects/rocm-smi-lib/src/rocm_smi_monitor.cc index b9bb298c5d..81dc0f097f 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_monitor.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_monitor.cc @@ -114,7 +114,8 @@ static const std::map kMonitorNameMap = { {kMonTempHighest, kMonTempHighestName}, }; -Monitor::Monitor(std::string path) : path_(path) { +Monitor::Monitor(std::string path, RocmSMI_env_vars const *e) : + path_(path), env_(e) { } Monitor::~Monitor(void) { } @@ -135,6 +136,8 @@ Monitor::MakeMonitorPath(MonitorTypes type, int32_t sensor_id) { int Monitor::writeMonitor(MonitorTypes type, uint32_t sensor_id, std::string val) { std::string sysfs_path = MakeMonitorPath(type, sensor_id); + + DBG_FILE_ERROR(sysfs_path) return WriteSysfsStr(sysfs_path, val); } @@ -146,6 +149,7 @@ int Monitor::readMonitor(MonitorTypes type, uint32_t sensor_id, std::string temp_str; std::string sysfs_path = MakeMonitorPath(type, sensor_id); + DBG_FILE_ERROR(sysfs_path) return ReadSysfsStr(sysfs_path, val); } diff --git a/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc b/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc index 6a4a11bb88..27dec0d7a9 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_power_mon.cc @@ -55,6 +55,7 @@ #include "rocm_smi/rocm_smi_main.h" #include "rocm_smi/rocm_smi_monitor.h" #include "rocm_smi/rocm_smi_utils.h" +#include "rocm_smi/rocm_smi_common.h" namespace amd { namespace smi { @@ -68,7 +69,8 @@ static const std::map kMonitorNameMap = { {kPowerAveGPUPower, kPowerMonPMName}, }; -PowerMon::PowerMon(std::string path) : path_(path) { +PowerMon::PowerMon(std::string path, RocmSMI_env_vars const *e) : + path_(path), env_(e) { } PowerMon::~PowerMon(void) { } @@ -139,6 +141,8 @@ int PowerMon::readPowerValue(PowerMonTypes type, uint64_t *power) { tempPath += "/"; tempPath += kMonitorNameMap.at(type); + + DBG_FILE_ERROR(tempPath) int ret = ReadSysfsStr(tempPath, &fstr); if (ret) {