diff --git a/projects/rocm-smi-lib/CMakeLists.txt b/projects/rocm-smi-lib/CMakeLists.txt index 01b00d62bc..53c8d5977d 100755 --- a/projects/rocm-smi-lib/CMakeLists.txt +++ b/projects/rocm-smi-lib/CMakeLists.txt @@ -106,13 +106,15 @@ endif () set(SRC_DIR "src") set(INC_DIR "include/rocm_smi") -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/src/shared_mutex) set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc") set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc") +set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.c") set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h") @@ -121,12 +123,14 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h") set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h") +set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h") set(SMI_EXAMPLE_EXE "rocm_smi_ex") add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc") target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET}) add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST}) +target_link_libraries(${ROCM_SMI_TARGET} pthread rt) ## Set the VERSION and SOVERSION values set_property(TARGET ${ROCM_SMI_TARGET} diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h index 94557204c0..7197814209 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_device.h @@ -42,6 +42,9 @@ */ #ifndef INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_ #define INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_ + +#include + #include #include #include @@ -52,6 +55,9 @@ #include "rocm_smi/rocm_smi_power_mon.h" #include "rocm_smi/rocm_smi_common.h" #include "rocm_smi/rocm_smi.h" +extern "C" { +#include "shared_mutex.h" +}; namespace amd { namespace smi { @@ -109,11 +115,12 @@ class Device { uint64_t bdfid(void) const {return bdfid_;} void set_bdfid(uint64_t val) {bdfid_ = val;} uint64_t get_bdfid(void) const {return bdfid_;} - + pthread_mutex_t *mutex(void) {return mutex_.ptr;} private: std::shared_ptr monitor_; std::shared_ptr power_monitor_; std::string path_; + shared_mutex_t mutex_; uint32_t index_; const RocmSMI_env_vars *env_; template int openSysfsFileStream(DevInfoTypes type, T *fs, diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h index 15bae95e14..8949eaac7a 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_utils.h @@ -43,6 +43,8 @@ #ifndef INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_ #define INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_ +#include + #include #include @@ -63,6 +65,28 @@ namespace smi { int ReadSysfsStr(std::string path, std::string *retStr); int WriteSysfsStr(std::string path, std::string val); +struct pthread_wrap { + public: + pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {} + + void Acquire() { pthread_mutex_lock(&mutex_); } + void Release() { pthread_mutex_unlock(&mutex_); } + private: + pthread_mutex_t& mutex_; +}; +struct ScopedPthread { + ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) { + pthrd_ref_.Acquire(); + }; + + ~ScopedPthread() { + pthrd_ref_.Release(); + } + private: + ScopedPthread(const ScopedPthread&); + + pthread_wrap& pthrd_ref_; +}; } // namespace smi } // namespace amd diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index 2da8a54ba4..3a8cec8e31 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -100,6 +101,21 @@ static rsmi_status_t handleException() { std::shared_ptr dev = smi.monitor_devices()[dv_ind]; \ assert(dev != nullptr); +#define DEVICE_MUTEX \ + amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \ + amd::smi::ScopedPthread _lock(_pw); + +static pthread_mutex_t *get_mutex(uint32_t dv_ind) { + amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance(); + if (dv_ind >= smi.monitor_devices().size()) { + return nullptr; + } + std::shared_ptr dev = smi.monitor_devices()[dv_ind]; + assert(dev != nullptr); + + return dev->mutex(); +} + static rsmi_status_t errno_to_rsmi_status(uint32_t err) { switch (err) { case 0: return RSMI_STATUS_SUCCESS; @@ -440,6 +456,9 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind, if (enabled_mask == nullptr) { return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX + std::vector val_vec; ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec); @@ -508,6 +527,8 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block, rsmi_status_t ret; std::vector val_vec; + DEVICE_MUTEX + ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec); if (ret == RSMI_STATUS_FILE_ERROR) { @@ -569,6 +590,9 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block, default: return RSMI_STATUS_NOT_SUPPORTED; } + + DEVICE_MUTEX + ret = get_dev_value_vec(type, dv_ind, &val_vec); if (ret == RSMI_STATUS_FILE_ERROR) { @@ -605,6 +629,8 @@ rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid) { } GET_DEV_FROM_INDX + DEVICE_MUTEX + *bdfid = dev->get_bdfid(); return RSMI_STATUS_SUCCESS; CATCH @@ -614,6 +640,9 @@ static rsmi_status_t get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) { TRY std::string val_str; + + DEVICE_MUTEX + rsmi_status_t ret = get_dev_value_str(typ, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -630,21 +659,25 @@ get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) { rsmi_status_t rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevDevID, id); } rsmi_status_t rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevSubSysDevID, id); } rsmi_status_t rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevVendorID, id); } rsmi_status_t rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id) { + DEVICE_MUTEX return get_id(dv_ind, amd::smi::kDevSubSysVendorID, id); } @@ -652,6 +685,8 @@ rsmi_status_t rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf) { TRY std::string val_str; + DEVICE_MUTEX + rsmi_status_t ret = get_dev_value_str(amd::smi::kDevPerfLevel, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -668,6 +703,8 @@ rsmi_status_t rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) { TRY std::string val_str; + DEVICE_MUTEX + rsmi_status_t ret = get_dev_value_str(amd::smi::kDevOverDriveLevel, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -688,7 +725,7 @@ rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) { if (od > kMaxOverdriveLevel) { return RSMI_STATUS_INVALID_ARGS; } - + DEVICE_MUTEX return set_dev_value(amd::smi::kDevOverDriveLevel, dv_ind, od); CATCH } @@ -700,6 +737,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_level) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX return set_dev_value(amd::smi::kDevPerfLevel, dv_ind, perf_level); CATCH } @@ -1009,6 +1047,8 @@ rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type, return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + return get_frequencies(dev_type, dv_ind, f); CATCH @@ -1035,6 +1075,9 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, rsmi_frequencies_t freqs; TRY + + DEVICE_MUTEX + ret = rsmi_dev_gpu_clk_freq_get(dv_ind, clk_type, &freqs); if (ret != RSMI_STATUS_SUCCESS) { @@ -1247,6 +1290,8 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_DEVICE); if (ret != RSMI_STATUS_SUCCESS) { return ret; @@ -1265,6 +1310,8 @@ rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_SUBSYS); return ret; CATCH @@ -1279,6 +1326,7 @@ rsmi_dev_vendor_name_get(uint32_t dv_ind, char *name, size_t len) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_VENDOR); return ret; CATCH @@ -1294,6 +1342,8 @@ rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *b) { return RSMI_STATUS_INVALID_ARGS; } + DEVICE_MUTEX + return get_frequencies(amd::smi::kDevPCIEClk, dv_ind, &b->transfer_rate, b->lanes); @@ -1306,6 +1356,8 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) { rsmi_pcie_bandwidth_t bws; TRY + + DEVICE_MUTEX ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bws); if (ret != RSMI_STATUS_SUCCESS) { @@ -1346,6 +1398,9 @@ rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent, rsmi_status_t ret; std::string val_str; + + DEVICE_MUTEX + ret = get_dev_value_line(amd::smi::kDevPCIEThruPut, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -1435,6 +1490,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind, mon_type = amd::smi::kMonInvalid; } + DEVICE_MUTEX + ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature); return ret; @@ -1453,6 +1510,8 @@ rsmi_dev_fan_speed_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) { ++sensor_ind; // fan sysfs files have 1-based indices + DEVICE_MUTEX + ret = get_dev_mon_value(amd::smi::kMonFanSpeed, dv_ind, sensor_ind, speed); return ret; @@ -1470,6 +1529,8 @@ rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) { rsmi_status_t ret; + DEVICE_MUTEX + ret = get_dev_mon_value(amd::smi::kMonFanRPMs, dv_ind, sensor_ind, speed); return ret; @@ -1484,6 +1545,8 @@ rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) { ++sensor_ind; // fan sysfs files have 1-based indices + DEVICE_MUTEX + ret = set_dev_mon_value(amd::smi::kMonFanCntrlEnable, dv_ind, sensor_ind, 2); @@ -1499,6 +1562,7 @@ rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) { rsmi_status_t ret; uint64_t max_speed; + DEVICE_MUTEX ret = rsmi_dev_fan_speed_max_get(dv_ind, sensor_ind, &max_speed); @@ -1541,6 +1605,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t ret; + DEVICE_MUTEX + ret = get_dev_mon_value(amd::smi::kMonMaxFanSpeed, dv_ind, sensor_ind, reinterpret_cast(max_speed)); @@ -1551,6 +1617,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind, rsmi_status_t rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) { TRY + DEVICE_MUTEX + rsmi_status_t ret = get_od_clk_volt_info(dv_ind, odv); return ret; @@ -1564,6 +1632,8 @@ rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind, if (buffer == nullptr || num_regions == nullptr || *num_regions == 0) { return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX rsmi_status_t ret = get_od_clk_volt_curve_regions(dv_ind, num_regions, buffer); return ret; @@ -1582,6 +1652,8 @@ rsmi_dev_power_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) { // ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_power_mon_value(amd::smi::kPowerMaxGPUPower, dv_ind, power); return ret; @@ -1598,6 +1670,8 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) { ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_dev_mon_value(amd::smi::kMonPowerAve, dv_ind, sensor_ind, power); return ret; @@ -1615,6 +1689,8 @@ rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap) { ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_dev_mon_value(amd::smi::kMonPowerCap, dv_ind, sensor_ind, cap); return ret; @@ -1633,6 +1709,8 @@ rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind, ++sensor_ind; // power sysfs files have 1-based indices rsmi_status_t ret; + + DEVICE_MUTEX ret = get_dev_mon_value(amd::smi::kMonPowerCapMax, dv_ind, sensor_ind, max); if (ret == RSMI_STATUS_SUCCESS) { @@ -1650,6 +1728,7 @@ rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap) { rsmi_status_t ret; uint64_t min, max; + DEVICE_MUTEX ret = rsmi_dev_power_cap_range_get(dv_ind, sensor_ind, &max, &min); if (ret != RSMI_STATUS_SUCCESS) { @@ -1678,6 +1757,7 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind, ++sensor_ind; // power sysfs files have 1-based indices + DEVICE_MUTEX rsmi_status_t ret = get_power_profiles(dv_ind, status, nullptr); return ret; CATCH @@ -1689,6 +1769,7 @@ rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind, TRY ++sensor_ind; // power sysfs files have 1-based indices + DEVICE_MUTEX rsmi_status_t ret = set_power_profile(dv_ind, profile); return ret; CATCH @@ -1722,6 +1803,8 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, assert(!"Unexpected memory type"); return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX ret = get_dev_value_int(mem_type_file, dv_ind, total); return ret; @@ -1755,6 +1838,8 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type, assert(!"Unexpected memory type"); return RSMI_STATUS_INVALID_ARGS; } + + DEVICE_MUTEX ret = get_dev_value_int(mem_type_file, dv_ind, used); return ret; @@ -1847,6 +1932,8 @@ rsmi_status_t rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent) { TRY std::string val_str; + + DEVICE_MUTEX rsmi_status_t ret = get_dev_value_str(amd::smi::kDevUsage, dv_ind, &val_str); if (ret != RSMI_STATUS_SUCCESS) { @@ -1871,6 +1958,8 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) { TRY GET_DEV_FROM_INDX std::string val_str; + + DEVICE_MUTEX int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str); if (ret != 0) { diff --git a/projects/rocm-smi-lib/src/rocm_smi_device.cc b/projects/rocm-smi-lib/src/rocm_smi_device.cc index 2a8cbc3f23..f50fc319ee 100755 --- a/projects/rocm-smi-lib/src/rocm_smi_device.cc +++ b/projects/rocm-smi-lib/src/rocm_smi_device.cc @@ -41,6 +41,10 @@ * */ +#include +#include +#include + #include #include #include @@ -55,6 +59,11 @@ #include "rocm_smi/rocm_smi_main.h" #include "rocm_smi/rocm_smi_device.h" #include "rocm_smi/rocm_smi.h" +#include "rocm_smi/rocm_smi_exception.h" + +extern "C" { +#include "shared_mutex.h" // NOLINT +}; namespace amd { namespace smi { @@ -154,9 +163,26 @@ static bool isRegularFile(std::string fname) { Device::Device(std::string p, RocmSMI_env_vars const *e) : path_(p), env_(e) { monitor_ = nullptr; + + // Get the device name + size_t i = path_.rfind('/', path_.length()); + std::string dev = path_.substr(i + 1, path_.length() - i); + + std::string m_name("/rocm_smi_"); + m_name += dev; + m_name += '_'; + m_name += std::to_string(geteuid()); + + mutex_ = shared_mutex_init(m_name.c_str(), 0777); + + if (mutex_.ptr == nullptr) { + throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR, + "Failed to create shared mem. mutex."); + } } Device:: ~Device() { + shared_mutex_close(mutex_); } template diff --git a/projects/rocm-smi-lib/src/shared_mutex/LICENSE b/projects/rocm-smi-lib/src/shared_mutex/LICENSE new file mode 100644 index 0000000000..d85e0d6d9f --- /dev/null +++ b/projects/rocm-smi-lib/src/shared_mutex/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Oleg Yamnikov + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c new file mode 100755 index 0000000000..33c4d38729 --- /dev/null +++ b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.c @@ -0,0 +1,131 @@ +#include "shared_mutex.h" +#include // errno, ENOENT +#include // O_RDWR, O_CREATE +#include // NAME_MAX +#include // shm_open, shm_unlink, mmap, munmap, + // PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED +#include // ftruncate, close +#include // perror +#include // malloc, free +#include // strcpy + +shared_mutex_t shared_mutex_init(const char *name, mode_t mode) { + shared_mutex_t mutex = {NULL, 0, NULL, 0}; + errno = 0; + + // Open existing shared memory object, or create one. + // Two separate calls are needed here, to mark fact of creation + // for later initialization of pthread mutex. + mutex.shm_fd = shm_open(name, O_RDWR, mode); + if (errno == ENOENT) { + mutex.shm_fd = shm_open(name, O_RDWR|O_CREAT, mode); + mutex.created = 1; + // Change permissions of shared memory, so every body can access it. Avoiding the umask of shm_open + if (fchmod(mutex.shm_fd, mode) != 0) { + perror("fchmod"); + } + } + if (mutex.shm_fd == -1) { + perror("shm_open"); + return mutex; + } + + // Truncate shared memory segment so it would contain + // pthread_mutex_t AND the ref. count + if (ftruncate(mutex.shm_fd, sizeof(pthread_mutex_t)) != 0) { + perror("ftruncate"); + return mutex; + } + + // Map pthread mutex into the shared memory. + void *addr = mmap( + NULL, + sizeof(pthread_mutex_t), + PROT_READ|PROT_WRITE, + MAP_SHARED, + mutex.shm_fd, + 0 + ); + if (addr == MAP_FAILED) { + perror("mmap"); + return mutex; + } + + if (mutex.created == 0 && ((shared_mutex_t *)addr)->ptr == NULL) { + // Something is out of sync. Unlink shm and start over. + if (shm_unlink(name)) { + mutex.shm_fd = 0; + perror("shm_unlink"); + } + free(mutex.name); + + return shared_mutex_init(name, mode); + } + + pthread_mutex_t *mutex_ptr = (pthread_mutex_t *)addr; + + if (mutex.created) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) { + perror("pthread_mutexattr_init"); + return mutex; + } + if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) { + perror("pthread_mutexattr_setpshared"); + return mutex; + } + + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) { + perror("pthread_mutexattr_settype"); + return mutex; + } + if (pthread_mutex_init(mutex_ptr, &attr)) { + perror("pthread_mutex_init"); + return mutex; + } + } + + mutex.ptr = mutex_ptr; + mutex.name = (char *)malloc(NAME_MAX+1); + strcpy(mutex.name, name); + return mutex; +} + +int shared_mutex_close(shared_mutex_t mutex) { + if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) { + perror("munmap"); + return -1; + } + mutex.ptr = NULL; + if (close(mutex.shm_fd)) { + perror("close"); + return -1; + } + mutex.shm_fd = 0; + free(mutex.name); + + return 0; +} + +int shared_mutex_destroy(shared_mutex_t mutex) { + if ((errno = pthread_mutex_destroy(mutex.ptr))) { + perror("pthread_mutex_destroy"); + return -1; + } + if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) { + perror("munmap"); + return -1; + } + mutex.ptr = NULL; + if (close(mutex.shm_fd)) { + perror("close"); + return -1; + } + mutex.shm_fd = 0; + if (shm_unlink(mutex.name)) { + perror("shm_unlink"); + return -1; + } + free(mutex.name); + return 0; +} diff --git a/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h new file mode 100755 index 0000000000..18e70bd6de --- /dev/null +++ b/projects/rocm-smi-lib/src/shared_mutex/shared_mutex.h @@ -0,0 +1,67 @@ +#ifndef SRC_SHARED_MUTEX_SHARED_MUTEX_H_ +#define SRC_SHARED_MUTEX_SHARED_MUTEX_H_ + +#include + +#include // pthread_mutex_t, pthread_mutexattr_t, + // pthread_mutexattr_init, pthread_mutexattr_setpshared, + // pthread_mutex_init, pthread_mutex_destroy + +// Structure of a shared mutex. +typedef struct shared_mutex_t { + pthread_mutex_t *ptr; // Pointer to the pthread mutex and + // shared memory segment. + int shm_fd; // Descriptor of shared memory object. + char* name; // Name of the mutex and associated + // shared memory object. + int created; // Equals 1 (true) if initialization + // of this structure caused creation + // of a new shared mutex. + // Equals 0 (false) if this mutex was + // just retrieved from shared memory. +} shared_mutex_t; + +// Initialize a new shared mutex with given `name`. If a mutex +// with such name exists in the system, it will be loaded. +// Otherwise a new mutes will by created. +// +// In case of any error, it will be printed into the standard output +// and the returned structure will have `ptr` equal `NULL`. +// `errno` wil not be reset in such case, so you may used it. +// +// **NOTE:** In case when the mutex appears to be uncreated, +// this function becomes *non-thread-safe*. If multiple threads +// call it at one moment, there occur several race conditions, +// in which one call might recreate another's shared memory +// object or rewrite another's pthread mutex in the shared memory. +// There is no workaround currently, except to run first +// initialization only before multi-threaded or multi-process +// functionality. +shared_mutex_t shared_mutex_init(const char *name, mode_t mode); + +// Close access to the shared mutex and free all the resources, +// used by the structure. +// +// Returns 0 in case of success. If any error occurs, it will be +// printed into the standard output and the function will return -1. +// `errno` wil not be reset in such case, so you may used it. +// +// **NOTE:** It will not destroy the mutex. The mutex would not +// only be available to other processes using it right now, +// but also to any process which might want to use it later on. +// For complete desctruction use `shared_mutex_destroy` instead. +// +// **NOTE:** It will not unlock locked mutex. +int shared_mutex_close(shared_mutex_t mutex); + +// Close and destroy shared mutex. +// Any open pointers to it will be invalidated. +// +// Returns 0 in case of success. If any error occurs, it will be +// printed into the standard output and the function will return -1. +// `errno` wil not be reset in such case, so you may used it. +// +// **NOTE:** It will not unlock locked mutex. +int shared_mutex_destroy(shared_mutex_t mutex); + +#endif // SRC_SHARED_MUTEX_SHARED_MUTEX_H_