From 0fe747d2cd8de4d0a9d7fa8a3d7db521f32fa51b Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Sat, 4 May 2019 15:10:58 -0500 Subject: [PATCH] Add mutex for multi-process access to device sysfs files This commit uses a pthread mutex in shared memory to prevent almost all cases of multiple processes simultaneously reading/writing to device sysfs files. The main existing race condition is when 2 processes are starting at the same time, setting up their shared memory and mutexes. Since this is meant to prevent collisions among thread and processes, the small shared memory segments (big enough for a pthread_mutex) will persist until reboot. [ROCm/rocm_smi_lib commit: 5e24a77193cb3858492f5fc8ae615821e01b016b] --- projects/rocm-smi-lib/CMakeLists.txt | 6 +- .../include/rocm_smi/rocm_smi_device.h | 9 +- .../include/rocm_smi/rocm_smi_utils.h | 24 ++++ projects/rocm-smi-lib/src/rocm_smi.cc | 91 +++++++++++- projects/rocm-smi-lib/src/rocm_smi_device.cc | 26 ++++ .../rocm-smi-lib/src/shared_mutex/LICENSE | 21 +++ .../src/shared_mutex/shared_mutex.c | 131 ++++++++++++++++++ .../src/shared_mutex/shared_mutex.h | 67 +++++++++ 8 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 projects/rocm-smi-lib/src/shared_mutex/LICENSE create mode 100755 projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c create mode 100755 projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h diff --git a/projects/rocm-smi-lib/CMakeLists.txt b/projects/rocm-smi-lib/CMakeLists.txt index 01b00d62bc..53c8d5977d 100755 --- a/projects/rocm-smi-lib/CMakeLists.txt +++ b/projects/rocm-smi-lib/CMakeLists.txt @@ -106,13 +106,15 @@ endif () set(SRC_DIR "src") set(INC_DIR "include/rocm_smi") -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/src/shared_mutex) set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc") +set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.c") set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h") @@ -121,12 +123,14 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h") +set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h") set(SMI_EXAMPLE_EXE "rocm_smi_ex") add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc") target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET}) add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST}) +target_link_libraries(${ROCM_SMI_TARGET} pthread rt) ## Set the VERSION and SOVERSION values set_property(TARGET ${ROCM_SMI_TARGET} diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index 94557204c0..7197814209 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -42,6 +42,9 @@ */ #ifndef INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_ #define INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_ + +#include + #include #include #include @@ -52,6 +55,9 @@ #include "rocm_smi/rocm_smi_power_mon.h" #include "rocm_smi/rocm_smi_common.h" #include "rocm_smi/rocm_smi.h" +extern "C" { +#include "shared_mutex.h" +}; namespace amd { namespace smi { @@ -109,11 +115,12 @@ class Device { uint64_t bdfid(void) const {return bdfid_;} void set_bdfid(uint64_t val) {bdfid_ = val;} uint64_t get_bdfid(void) const {return bdfid_;} - + pthread_mutex_t *mutex(void) {return mutex_.ptr;} private: std::shared_ptr monitor_; std::shared_ptr power_monitor_; std::string path_; + shared_mutex_t mutex_; uint32_t index_; const RocmSMI_env_vars *env_; template int openSysfsFileStream(DevInfoTypes type, T *fs, diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h index 15bae95e14..8949eaac7a 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h @@ -43,6 +43,8 @@ #ifndef INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_ #define INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_ +#include + #include #include @@ -63,6 +65,28 @@ namespace smi { int ReadSysfsStr(std::string path, std::string *retStr); int WriteSysfsStr(std::string path, std::string val); +struct pthread_wrap { + public: + pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {} + + void Acquire() { pthread_mutex_lock(&mutex_); } + void Release() { pthread_mutex_unlock(&mutex_); } + private: + pthread_mutex_t& mutex_; +}; +struct ScopedPthread { + ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) { + pthrd_ref_.Acquire(); + }; + + ~ScopedPthread() { + pthrd_ref_.Release(); + } + private: + ScopedPthread(const ScopedPthread&); + + pthread_wrap& pthrd_ref_; +}; } // namespace smi } // namespace amd diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 2da8a54ba4..3a8cec8e31 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -100,6 +101,21 @@ static rsmi_status_t handleException() { std::shared_ptr dev = smi.monitor_devices()[dv_ind]; \ assert(dev != nullptr); +#define DEVICE_MUTEX \ + amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \ + amd::smi::ScopedPthread _lock(_pw); + +static pthread_mutex_t *get_mutex(uint32_t dv_ind) { + amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance(); + if (dv_ind >= smi.monitor_devices().size()) { + return nullptr; + } + std::shared_ptr dev = smi.monitor_devices()[dv_ind]; + assert(dev != nullptr); + + return dev->mutex(); +} + static rsmi_status_t errno_to_rsmi_status(uint32_t err) { switch (err) { case 0: return RSMI_STATUS_SUCCESS; @@ -440,6 +456,9 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind, if (enabled_mask == nullptr) { return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX + std::vector val_vec; ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec); @@ -508,6 +527,8 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_status_t ret; std::vector val_vec; + DEVICE_MUTEX + ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec); if (ret == RSMI_STATUS_FILE_ERROR) { @@ -569,6 +590,9 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block, default: return RSMI_STATUS_NOT_SUPPORTED; } + + DEVICE_MUTEX + ret = get_dev_value_vec(type, dv_ind, &val_vec); if (ret == RSMI_STATUS_FILE_ERROR) { @@ -605,6 +629,8 @@ rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid) { } GET_DEV_FROM_INDX + DEVICE_MUTEX + *bdfid = dev->get_bdfid(); return RSMI_STATUS_SUCCESS; CATCH @@ -614,6 +640,9 @@ static rsmi_status_t get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) { TRY std::string val_str; + + DEVICE_MUTEX + rsmi_status_t ret = get_dev_value_str(typ, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -630,21 +659,25 @@ get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) { rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevDevID, id); } rsmi_status_t rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevSubSysDevID, id); } rsmi_status_t rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevVendorID, id); } rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevSubSysVendorID, id); } @@ -652,6 +685,8 @@ rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf) { TRY std::string val_str; + DEVICE_MUTEX + rsmi_status_t ret = get_dev_value_str(amd::smi::kDevPerfLevel, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -668,6 +703,8 @@ rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { TRY std::string val_str; + DEVICE_MUTEX + rsmi_status_t ret = get_dev_value_str(amd::smi::kDevOverDriveLevel, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -688,7 +725,7 @@ rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) { if (od > kMaxOverdriveLevel) { return RSMI_STATUS_INVALID_ARGS; } - + DEVICE_MUTEX return set_dev_value(amd::smi::kDevOverDriveLevel, dv_ind, od); CATCH } @@ -700,6 +737,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_level) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX return set_dev_value(amd::smi::kDevPerfLevel, dv_ind, perf_level); CATCH } @@ -1009,6 +1047,8 @@ rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type, return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + return get_frequencies(dev_type, dv_ind, f); CATCH @@ -1035,6 +1075,9 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, rsmi_frequencies_t freqs; TRY + + DEVICE_MUTEX + ret = rsmi_dev_gpu_clk_freq_get(dv_ind, clk_type, &freqs); if (ret != RSMI_STATUS_SUCCESS) { @@ -1247,6 +1290,8 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_DEVICE); if (ret != RSMI_STATUS_SUCCESS) { return ret; @@ -1265,6 +1310,8 @@ rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_SUBSYS); return ret; CATCH @@ -1279,6 +1326,7 @@ rsmi_dev_vendor_name_get(uint32_t dv_ind, char *name, size_t len) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_VENDOR); return ret; CATCH @@ -1294,6 +1342,8 @@ rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *b) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + return get_frequencies(amd::smi::kDevPCIEClk, dv_ind, &b->transfer_rate, b->lanes); @@ -1306,6 +1356,8 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) { rsmi_pcie_bandwidth_t bws; TRY + + DEVICE_MUTEX ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bws); if (ret != RSMI_STATUS_SUCCESS) { @@ -1346,6 +1398,9 @@ rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent, rsmi_status_t ret; std::string val_str; + + DEVICE_MUTEX + ret = get_dev_value_line(amd::smi::kDevPCIEThruPut, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -1435,6 +1490,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind, mon_type = amd::smi::kMonInvalid; } + DEVICE_MUTEX + ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature); return ret; @@ -1453,6 +1510,8 @@ rsmi_dev_fan_speed_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) { ++sensor_ind; // fan sysfs files have 1-based indices + DEVICE_MUTEX + ret = get_dev_mon_value(amd::smi::kMonFanSpeed, dv_ind, sensor_ind, speed); return ret; @@ -1470,6 +1529,8 @@ rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) { rsmi_status_t ret; + DEVICE_MUTEX + ret = get_dev_mon_value(amd::smi::kMonFanRPMs, dv_ind, sensor_ind, speed); return ret; @@ -1484,6 +1545,8 @@ rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) { ++sensor_ind; // fan sysfs files have 1-based indices + DEVICE_MUTEX + ret = set_dev_mon_value(amd::smi::kMonFanCntrlEnable, dv_ind, sensor_ind, 2); @@ -1499,6 +1562,7 @@ rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) { rsmi_status_t ret; uint64_t max_speed; + DEVICE_MUTEX ret = rsmi_dev_fan_speed_max_get(dv_ind, sensor_ind, &max_speed); @@ -1541,6 +1605,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t ret; + DEVICE_MUTEX + ret = get_dev_mon_value(amd::smi::kMonMaxFanSpeed, dv_ind, sensor_ind, reinterpret_cast(max_speed)); @@ -1551,6 +1617,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) { TRY + DEVICE_MUTEX + rsmi_status_t ret = get_od_clk_volt_info(dv_ind, odv); return ret; @@ -1564,6 +1632,8 @@ rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind, if (buffer == nullptr || num_regions == nullptr || *num_regions == 0) { return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX rsmi_status_t ret = get_od_clk_volt_curve_regions(dv_ind, num_regions, buffer); return ret; @@ -1582,6 +1652,8 @@ rsmi_dev_power_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) { // ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_power_mon_value(amd::smi::kPowerMaxGPUPower, dv_ind, power); return ret; @@ -1598,6 +1670,8 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) { ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_dev_mon_value(amd::smi::kMonPowerAve, dv_ind, sensor_ind, power); return ret; @@ -1615,6 +1689,8 @@ rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap) { ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_dev_mon_value(amd::smi::kMonPowerCap, dv_ind, sensor_ind, cap); return ret; @@ -1633,6 +1709,8 @@ rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind, ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_dev_mon_value(amd::smi::kMonPowerCapMax, dv_ind, sensor_ind, max); if (ret == RSMI_STATUS_SUCCESS) { @@ -1650,6 +1728,7 @@ rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap) { rsmi_status_t ret; uint64_t min, max; + DEVICE_MUTEX ret = rsmi_dev_power_cap_range_get(dv_ind, sensor_ind, &max, &min); if (ret != RSMI_STATUS_SUCCESS) { @@ -1678,6 +1757,7 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, ++sensor_ind; // power sysfs files have 1-based indices + DEVICE_MUTEX rsmi_status_t ret = get_power_profiles(dv_ind, status, nullptr); return ret; CATCH @@ -1689,6 +1769,7 @@ rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind, TRY ++sensor_ind; // power sysfs files have 1-based indices + DEVICE_MUTEX rsmi_status_t ret = set_power_profile(dv_ind, profile); return ret; CATCH @@ -1722,6 +1803,8 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, assert(!"Unexpected memory type"); return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX ret = get_dev_value_int(mem_type_file, dv_ind, total); return ret; @@ -1755,6 +1838,8 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, assert(!"Unexpected memory type"); return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX ret = get_dev_value_int(mem_type_file, dv_ind, used); return ret; @@ -1847,6 +1932,8 @@ rsmi_status_t rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent) { TRY std::string val_str; + + DEVICE_MUTEX rsmi_status_t ret = get_dev_value_str(amd::smi::kDevUsage, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -1871,6 +1958,8 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) { TRY GET_DEV_FROM_INDX std::string val_str; + + DEVICE_MUTEX int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str); if (ret != 0) { diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index 2a8cbc3f23..f50fc319ee 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -41,6 +41,10 @@ * */ +#include +#include +#include + #include #include #include @@ -55,6 +59,11 @@ #include "rocm_smi/rocm_smi_main.h" #include "rocm_smi/rocm_smi_device.h" #include "rocm_smi/rocm_smi.h" +#include "rocm_smi/rocm_smi_exception.h" + +extern "C" { +#include "shared_mutex.h" // NOLINT +}; namespace amd { namespace smi { @@ -154,9 +163,26 @@ static bool isRegularFile(std::string fname) { Device::Device(std::string p, RocmSMI_env_vars const *e) : path_(p), env_(e) { monitor_ = nullptr; + + // Get the device name + size_t i = path_.rfind('/', path_.length()); + std::string dev = path_.substr(i + 1, path_.length() - i); + + std::string m_name("/rocm_smi_"); + m_name += dev; + m_name += '_'; + m_name += std::to_string(geteuid()); + + mutex_ = shared_mutex_init(m_name.c_str(), 0777); + + if (mutex_.ptr == nullptr) { + throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR, + "Failed to create shared mem. mutex."); + } } Device:: ~Device() { + shared_mutex_close(mutex_); } template diff --git a/projects/rocm-smi-lib/src/shared_mutex/LICENSE b/projects/rocm-smi-lib/src/shared_mutex/LICENSE new file mode 100644 index 0000000000..d85e0d6d9f --- /dev/null +++ b/projects/rocm-smi-lib/src/shared_mutex/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Oleg Yamnikov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c new file mode 100755 index 0000000000..33c4d38729 --- /dev/null +++ b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c @@ -0,0 +1,131 @@ +#include "shared_mutex.h" +#include // errno, ENOENT +#include // O_RDWR, O_CREATE +#include // NAME_MAX +#include // shm_open, shm_unlink, mmap, munmap, + // PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED +#include // ftruncate, close +#include // perror +#include // malloc, free +#include // strcpy + +shared_mutex_t shared_mutex_init(const char *name, mode_t mode) { + shared_mutex_t mutex = {NULL, 0, NULL, 0}; + errno = 0; + + // Open existing shared memory object, or create one. + // Two separate calls are needed here, to mark fact of creation + // for later initialization of pthread mutex. + mutex.shm_fd = shm_open(name, O_RDWR, mode); + if (errno == ENOENT) { + mutex.shm_fd = shm_open(name, O_RDWR|O_CREAT, mode); + mutex.created = 1; + // Change permissions of shared memory, so every body can access it. Avoiding the umask of shm_open + if (fchmod(mutex.shm_fd, mode) != 0) { + perror("fchmod"); + } + } + if (mutex.shm_fd == -1) { + perror("shm_open"); + return mutex; + } + + // Truncate shared memory segment so it would contain + // pthread_mutex_t AND the ref. count + if (ftruncate(mutex.shm_fd, sizeof(pthread_mutex_t)) != 0) { + perror("ftruncate"); + return mutex; + } + + // Map pthread mutex into the shared memory. + void *addr = mmap( + NULL, + sizeof(pthread_mutex_t), + PROT_READ|PROT_WRITE, + MAP_SHARED, + mutex.shm_fd, + 0 + ); + if (addr == MAP_FAILED) { + perror("mmap"); + return mutex; + } + + if (mutex.created == 0 && ((shared_mutex_t *)addr)->ptr == NULL) { + // Something is out of sync. Unlink shm and start over. + if (shm_unlink(name)) { + mutex.shm_fd = 0; + perror("shm_unlink"); + } + free(mutex.name); + + return shared_mutex_init(name, mode); + } + + pthread_mutex_t *mutex_ptr = (pthread_mutex_t *)addr; + + if (mutex.created) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) { + perror("pthread_mutexattr_init"); + return mutex; + } + if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { + perror("pthread_mutexattr_setpshared"); + return mutex; + } + + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) { + perror("pthread_mutexattr_settype"); + return mutex; + } + if (pthread_mutex_init(mutex_ptr, &attr)) { + perror("pthread_mutex_init"); + return mutex; + } + } + + mutex.ptr = mutex_ptr; + mutex.name = (char *)malloc(NAME_MAX+1); + strcpy(mutex.name, name); + return mutex; +} + +int shared_mutex_close(shared_mutex_t mutex) { + if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) { + perror("munmap"); + return -1; + } + mutex.ptr = NULL; + if (close(mutex.shm_fd)) { + perror("close"); + return -1; + } + mutex.shm_fd = 0; + free(mutex.name); + + return 0; +} + +int shared_mutex_destroy(shared_mutex_t mutex) { + if ((errno = pthread_mutex_destroy(mutex.ptr))) { + perror("pthread_mutex_destroy"); + return -1; + } + if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) { + perror("munmap"); + return -1; + } + mutex.ptr = NULL; + if (close(mutex.shm_fd)) { + perror("close"); + return -1; + } + mutex.shm_fd = 0; + if (shm_unlink(mutex.name)) { + perror("shm_unlink"); + return -1; + } + free(mutex.name); + return 0; +} diff --git a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h new file mode 100755 index 0000000000..18e70bd6de --- /dev/null +++ b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h @@ -0,0 +1,67 @@ +#ifndef SRC_SHARED_MUTEX_SHARED_MUTEX_H_ +#define SRC_SHARED_MUTEX_SHARED_MUTEX_H_ + +#include + +#include // pthread_mutex_t, pthread_mutexattr_t, + // pthread_mutexattr_init, pthread_mutexattr_setpshared, + // pthread_mutex_init, pthread_mutex_destroy + +// Structure of a shared mutex. +typedef struct shared_mutex_t { + pthread_mutex_t *ptr; // Pointer to the pthread mutex and + // shared memory segment. + int shm_fd; // Descriptor of shared memory object. + char* name; // Name of the mutex and associated + // shared memory object. + int created; // Equals 1 (true) if initialization + // of this structure caused creation + // of a new shared mutex. + // Equals 0 (false) if this mutex was + // just retrieved from shared memory. +} shared_mutex_t; + +// Initialize a new shared mutex with given `name`. If a mutex +// with such name exists in the system, it will be loaded. +// Otherwise a new mutes will by created. +// +// In case of any error, it will be printed into the standard output +// and the returned structure will have `ptr` equal `NULL`. +// `errno` wil not be reset in such case, so you may used it. +// +// **NOTE:** In case when the mutex appears to be uncreated, +// this function becomes *non-thread-safe*. If multiple threads +// call it at one moment, there occur several race conditions, +// in which one call might recreate another's shared memory +// object or rewrite another's pthread mutex in the shared memory. +// There is no workaround currently, except to run first +// initialization only before multi-threaded or multi-process +// functionality. +shared_mutex_t shared_mutex_init(const char *name, mode_t mode); + +// Close access to the shared mutex and free all the resources, +// used by the structure. +// +// Returns 0 in case of success. If any error occurs, it will be +// printed into the standard output and the function will return -1. +// `errno` wil not be reset in such case, so you may used it. +// +// **NOTE:** It will not destroy the mutex. The mutex would not +// only be available to other processes using it right now, +// but also to any process which might want to use it later on. +// For complete desctruction use `shared_mutex_destroy` instead. +// +// **NOTE:** It will not unlock locked mutex. +int shared_mutex_close(shared_mutex_t mutex); + +// Close and destroy shared mutex. +// Any open pointers to it will be invalidated. +// +// Returns 0 in case of success. If any error occurs, it will be +// printed into the standard output and the function will return -1. +// `errno` wil not be reset in such case, so you may used it. +// +// **NOTE:** It will not unlock locked mutex. +int shared_mutex_destroy(shared_mutex_t mutex); + +#endif // SRC_SHARED_MUTEX_SHARED_MUTEX_H_