Add mutex for multi-process access to device sysfs files
This commit uses a pthread mutex in shared memory to prevent
almost all cases of multiple processes simultaneously
reading/writing to device sysfs files. The main existing race
condition is when 2 processes are starting at the same time,
setting up their shared memory and mutexes. Since this is meant
to prevent collisions among thread and processes, the small
shared memory segments (big enough for a pthread_mutex) will
persist until reboot.
[ROCm/rocm_smi_lib commit: 5e24a77193]
Этот коммит содержится в:
@@ -106,13 +106,15 @@ endif ()
|
||||
set(SRC_DIR "src")
|
||||
set(INC_DIR "include/rocm_smi")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/shared_mutex)
|
||||
set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.c")
|
||||
|
||||
set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h")
|
||||
@@ -121,12 +123,14 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h")
|
||||
|
||||
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
|
||||
|
||||
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
|
||||
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST})
|
||||
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${ROCM_SMI_TARGET}
|
||||
|
||||
@@ -42,6 +42,9 @@
|
||||
*/
|
||||
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_
|
||||
#define INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
@@ -52,6 +55,9 @@
|
||||
#include "rocm_smi/rocm_smi_power_mon.h"
|
||||
#include "rocm_smi/rocm_smi_common.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
extern "C" {
|
||||
#include "shared_mutex.h"
|
||||
};
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -109,11 +115,12 @@ class Device {
|
||||
uint64_t bdfid(void) const {return bdfid_;}
|
||||
void set_bdfid(uint64_t val) {bdfid_ = val;}
|
||||
uint64_t get_bdfid(void) const {return bdfid_;}
|
||||
|
||||
pthread_mutex_t *mutex(void) {return mutex_.ptr;}
|
||||
private:
|
||||
std::shared_ptr<Monitor> monitor_;
|
||||
std::shared_ptr<PowerMon> power_monitor_;
|
||||
std::string path_;
|
||||
shared_mutex_t mutex_;
|
||||
uint32_t index_;
|
||||
const RocmSMI_env_vars *env_;
|
||||
template <typename T> int openSysfsFileStream(DevInfoTypes type, T *fs,
|
||||
|
||||
@@ -43,6 +43,8 @@
|
||||
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
|
||||
#define INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
|
||||
@@ -63,6 +65,28 @@ namespace smi {
|
||||
int ReadSysfsStr(std::string path, std::string *retStr);
|
||||
int WriteSysfsStr(std::string path, std::string val);
|
||||
|
||||
struct pthread_wrap {
|
||||
public:
|
||||
pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {}
|
||||
|
||||
void Acquire() { pthread_mutex_lock(&mutex_); }
|
||||
void Release() { pthread_mutex_unlock(&mutex_); }
|
||||
private:
|
||||
pthread_mutex_t& mutex_;
|
||||
};
|
||||
struct ScopedPthread {
|
||||
ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) {
|
||||
pthrd_ref_.Acquire();
|
||||
};
|
||||
|
||||
~ScopedPthread() {
|
||||
pthrd_ref_.Release();
|
||||
}
|
||||
private:
|
||||
ScopedPthread(const ScopedPthread&);
|
||||
|
||||
pthread_wrap& pthrd_ref_;
|
||||
};
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
@@ -100,6 +101,21 @@ static rsmi_status_t handleException() {
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
|
||||
assert(dev != nullptr);
|
||||
|
||||
#define DEVICE_MUTEX \
|
||||
amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \
|
||||
amd::smi::ScopedPthread _lock(_pw);
|
||||
|
||||
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
if (dv_ind >= smi.monitor_devices().size()) {
|
||||
return nullptr;
|
||||
}
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
|
||||
assert(dev != nullptr);
|
||||
|
||||
return dev->mutex();
|
||||
}
|
||||
|
||||
static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
|
||||
switch (err) {
|
||||
case 0: return RSMI_STATUS_SUCCESS;
|
||||
@@ -440,6 +456,9 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind,
|
||||
if (enabled_mask == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
std::vector<std::string> val_vec;
|
||||
|
||||
ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec);
|
||||
@@ -508,6 +527,8 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
||||
rsmi_status_t ret;
|
||||
std::vector<std::string> val_vec;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec);
|
||||
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
@@ -569,6 +590,9 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
||||
default:
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_value_vec(type, dv_ind, &val_vec);
|
||||
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
@@ -605,6 +629,8 @@ rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid) {
|
||||
}
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
*bdfid = dev->get_bdfid();
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
@@ -614,6 +640,9 @@ static rsmi_status_t
|
||||
get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) {
|
||||
TRY
|
||||
std::string val_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
rsmi_status_t ret = get_dev_value_str(typ, dv_ind, &val_str);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -630,21 +659,25 @@ get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) {
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
DEVICE_MUTEX
|
||||
return get_id(dv_ind, amd::smi::kDevDevID, id);
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
DEVICE_MUTEX
|
||||
return get_id(dv_ind, amd::smi::kDevSubSysDevID, id);
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
DEVICE_MUTEX
|
||||
return get_id(dv_ind, amd::smi::kDevVendorID, id);
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id) {
|
||||
DEVICE_MUTEX
|
||||
return get_id(dv_ind, amd::smi::kDevSubSysVendorID, id);
|
||||
}
|
||||
|
||||
@@ -652,6 +685,8 @@ rsmi_status_t
|
||||
rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf) {
|
||||
TRY
|
||||
std::string val_str;
|
||||
DEVICE_MUTEX
|
||||
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevPerfLevel, dv_ind,
|
||||
&val_str);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -668,6 +703,8 @@ rsmi_status_t
|
||||
rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) {
|
||||
TRY
|
||||
std::string val_str;
|
||||
DEVICE_MUTEX
|
||||
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevOverDriveLevel, dv_ind,
|
||||
&val_str);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -688,7 +725,7 @@ rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) {
|
||||
if (od > kMaxOverdriveLevel) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
return set_dev_value(amd::smi::kDevOverDriveLevel, dv_ind, od);
|
||||
CATCH
|
||||
}
|
||||
@@ -700,6 +737,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_level) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
return set_dev_value(amd::smi::kDevPerfLevel, dv_ind, perf_level);
|
||||
CATCH
|
||||
}
|
||||
@@ -1009,6 +1047,8 @@ rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type,
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
return get_frequencies(dev_type, dv_ind, f);
|
||||
|
||||
CATCH
|
||||
@@ -1035,6 +1075,9 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
|
||||
rsmi_frequencies_t freqs;
|
||||
|
||||
TRY
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, clk_type, &freqs);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -1247,6 +1290,8 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_DEVICE);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
@@ -1265,6 +1310,8 @@ rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_SUBSYS);
|
||||
return ret;
|
||||
CATCH
|
||||
@@ -1279,6 +1326,7 @@ rsmi_dev_vendor_name_get(uint32_t dv_ind, char *name, size_t len) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_VENDOR);
|
||||
return ret;
|
||||
CATCH
|
||||
@@ -1294,6 +1342,8 @@ rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *b) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
return get_frequencies(amd::smi::kDevPCIEClk, dv_ind,
|
||||
&b->transfer_rate, b->lanes);
|
||||
|
||||
@@ -1306,6 +1356,8 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) {
|
||||
rsmi_pcie_bandwidth_t bws;
|
||||
|
||||
TRY
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bws);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -1346,6 +1398,9 @@ rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
|
||||
rsmi_status_t ret;
|
||||
|
||||
std::string val_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_value_line(amd::smi::kDevPCIEThruPut, dv_ind, &val_str);
|
||||
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -1435,6 +1490,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
mon_type = amd::smi::kMonInvalid;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature);
|
||||
|
||||
return ret;
|
||||
@@ -1453,6 +1510,8 @@ rsmi_dev_fan_speed_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) {
|
||||
|
||||
++sensor_ind; // fan sysfs files have 1-based indices
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_mon_value(amd::smi::kMonFanSpeed, dv_ind, sensor_ind, speed);
|
||||
|
||||
return ret;
|
||||
@@ -1470,6 +1529,8 @@ rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) {
|
||||
|
||||
rsmi_status_t ret;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_mon_value(amd::smi::kMonFanRPMs, dv_ind, sensor_ind, speed);
|
||||
|
||||
return ret;
|
||||
@@ -1484,6 +1545,8 @@ rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) {
|
||||
|
||||
++sensor_ind; // fan sysfs files have 1-based indices
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable,
|
||||
dv_ind, sensor_ind, 2);
|
||||
|
||||
@@ -1499,6 +1562,7 @@ rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) {
|
||||
rsmi_status_t ret;
|
||||
uint64_t max_speed;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = rsmi_dev_fan_speed_max_get(dv_ind, sensor_ind, &max_speed);
|
||||
|
||||
@@ -1541,6 +1605,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
|
||||
rsmi_status_t ret;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_mon_value(amd::smi::kMonMaxFanSpeed, dv_ind, sensor_ind,
|
||||
reinterpret_cast<int64_t *>(max_speed));
|
||||
|
||||
@@ -1551,6 +1617,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
rsmi_status_t
|
||||
rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) {
|
||||
TRY
|
||||
DEVICE_MUTEX
|
||||
|
||||
rsmi_status_t ret = get_od_clk_volt_info(dv_ind, odv);
|
||||
|
||||
return ret;
|
||||
@@ -1564,6 +1632,8 @@ rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind,
|
||||
if (buffer == nullptr || num_regions == nullptr || *num_regions == 0) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_od_clk_volt_curve_regions(dv_ind, num_regions,
|
||||
buffer);
|
||||
return ret;
|
||||
@@ -1582,6 +1652,8 @@ rsmi_dev_power_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
|
||||
// ++sensor_ind; // power sysfs files have 1-based indices
|
||||
|
||||
rsmi_status_t ret;
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_power_mon_value(amd::smi::kPowerMaxGPUPower, dv_ind, power);
|
||||
|
||||
return ret;
|
||||
@@ -1598,6 +1670,8 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
|
||||
++sensor_ind; // power sysfs files have 1-based indices
|
||||
|
||||
rsmi_status_t ret;
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_dev_mon_value(amd::smi::kMonPowerAve, dv_ind, sensor_ind, power);
|
||||
|
||||
return ret;
|
||||
@@ -1615,6 +1689,8 @@ rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap) {
|
||||
++sensor_ind; // power sysfs files have 1-based indices
|
||||
|
||||
rsmi_status_t ret;
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_dev_mon_value(amd::smi::kMonPowerCap, dv_ind, sensor_ind, cap);
|
||||
|
||||
return ret;
|
||||
@@ -1633,6 +1709,8 @@ rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
++sensor_ind; // power sysfs files have 1-based indices
|
||||
|
||||
rsmi_status_t ret;
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_dev_mon_value(amd::smi::kMonPowerCapMax, dv_ind, sensor_ind, max);
|
||||
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
@@ -1650,6 +1728,7 @@ rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap) {
|
||||
rsmi_status_t ret;
|
||||
uint64_t min, max;
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = rsmi_dev_power_cap_range_get(dv_ind, sensor_ind, &max, &min);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -1678,6 +1757,7 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
|
||||
++sensor_ind; // power sysfs files have 1-based indices
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_power_profiles(dv_ind, status, nullptr);
|
||||
return ret;
|
||||
CATCH
|
||||
@@ -1689,6 +1769,7 @@ rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind,
|
||||
TRY
|
||||
++sensor_ind; // power sysfs files have 1-based indices
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = set_power_profile(dv_ind, profile);
|
||||
return ret;
|
||||
CATCH
|
||||
@@ -1722,6 +1803,8 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
|
||||
assert(!"Unexpected memory type");
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_dev_value_int(mem_type_file, dv_ind, total);
|
||||
|
||||
return ret;
|
||||
@@ -1755,6 +1838,8 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
|
||||
assert(!"Unexpected memory type");
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
ret = get_dev_value_int(mem_type_file, dv_ind, used);
|
||||
|
||||
return ret;
|
||||
@@ -1847,6 +1932,8 @@ rsmi_status_t
|
||||
rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent) {
|
||||
TRY
|
||||
std::string val_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevUsage, dv_ind,
|
||||
&val_str);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
@@ -1871,6 +1958,8 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) {
|
||||
TRY
|
||||
GET_DEV_FROM_INDX
|
||||
std::string val_str;
|
||||
|
||||
DEVICE_MUTEX
|
||||
int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str);
|
||||
|
||||
if (ret != 0) {
|
||||
|
||||
@@ -41,6 +41,10 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdint.h>
|
||||
@@ -55,6 +59,11 @@
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
|
||||
extern "C" {
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
};
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -154,9 +163,26 @@ static bool isRegularFile(std::string fname) {
|
||||
|
||||
Device::Device(std::string p, RocmSMI_env_vars const *e) : path_(p), env_(e) {
|
||||
monitor_ = nullptr;
|
||||
|
||||
// Get the device name
|
||||
size_t i = path_.rfind('/', path_.length());
|
||||
std::string dev = path_.substr(i + 1, path_.length() - i);
|
||||
|
||||
std::string m_name("/rocm_smi_");
|
||||
m_name += dev;
|
||||
m_name += '_';
|
||||
m_name += std::to_string(geteuid());
|
||||
|
||||
mutex_ = shared_mutex_init(m_name.c_str(), 0777);
|
||||
|
||||
if (mutex_.ptr == nullptr) {
|
||||
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
|
||||
"Failed to create shared mem. mutex.");
|
||||
}
|
||||
}
|
||||
|
||||
Device:: ~Device() {
|
||||
shared_mutex_close(mutex_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018 Oleg Yamnikov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
Исполняемый файл
+131
@@ -0,0 +1,131 @@
|
||||
#include "shared_mutex.h"
|
||||
#include <errno.h> // errno, ENOENT
|
||||
#include <fcntl.h> // O_RDWR, O_CREATE
|
||||
#include <linux/limits.h> // NAME_MAX
|
||||
#include <sys/mman.h> // shm_open, shm_unlink, mmap, munmap,
|
||||
// PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED
|
||||
#include <unistd.h> // ftruncate, close
|
||||
#include <stdio.h> // perror
|
||||
#include <stdlib.h> // malloc, free
|
||||
#include <string.h> // strcpy
|
||||
|
||||
shared_mutex_t shared_mutex_init(const char *name, mode_t mode) {
|
||||
shared_mutex_t mutex = {NULL, 0, NULL, 0};
|
||||
errno = 0;
|
||||
|
||||
// Open existing shared memory object, or create one.
|
||||
// Two separate calls are needed here, to mark fact of creation
|
||||
// for later initialization of pthread mutex.
|
||||
mutex.shm_fd = shm_open(name, O_RDWR, mode);
|
||||
if (errno == ENOENT) {
|
||||
mutex.shm_fd = shm_open(name, O_RDWR|O_CREAT, mode);
|
||||
mutex.created = 1;
|
||||
// Change permissions of shared memory, so every body can access it. Avoiding the umask of shm_open
|
||||
if (fchmod(mutex.shm_fd, mode) != 0) {
|
||||
perror("fchmod");
|
||||
}
|
||||
}
|
||||
if (mutex.shm_fd == -1) {
|
||||
perror("shm_open");
|
||||
return mutex;
|
||||
}
|
||||
|
||||
// Truncate shared memory segment so it would contain
|
||||
// pthread_mutex_t AND the ref. count
|
||||
if (ftruncate(mutex.shm_fd, sizeof(pthread_mutex_t)) != 0) {
|
||||
perror("ftruncate");
|
||||
return mutex;
|
||||
}
|
||||
|
||||
// Map pthread mutex into the shared memory.
|
||||
void *addr = mmap(
|
||||
NULL,
|
||||
sizeof(pthread_mutex_t),
|
||||
PROT_READ|PROT_WRITE,
|
||||
MAP_SHARED,
|
||||
mutex.shm_fd,
|
||||
0
|
||||
);
|
||||
if (addr == MAP_FAILED) {
|
||||
perror("mmap");
|
||||
return mutex;
|
||||
}
|
||||
|
||||
if (mutex.created == 0 && ((shared_mutex_t *)addr)->ptr == NULL) {
|
||||
// Something is out of sync. Unlink shm and start over.
|
||||
if (shm_unlink(name)) {
|
||||
mutex.shm_fd = 0;
|
||||
perror("shm_unlink");
|
||||
}
|
||||
free(mutex.name);
|
||||
|
||||
return shared_mutex_init(name, mode);
|
||||
}
|
||||
|
||||
pthread_mutex_t *mutex_ptr = (pthread_mutex_t *)addr;
|
||||
|
||||
if (mutex.created) {
|
||||
pthread_mutexattr_t attr;
|
||||
if (pthread_mutexattr_init(&attr)) {
|
||||
perror("pthread_mutexattr_init");
|
||||
return mutex;
|
||||
}
|
||||
if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
|
||||
perror("pthread_mutexattr_setpshared");
|
||||
return mutex;
|
||||
}
|
||||
|
||||
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) {
|
||||
perror("pthread_mutexattr_settype");
|
||||
return mutex;
|
||||
}
|
||||
if (pthread_mutex_init(mutex_ptr, &attr)) {
|
||||
perror("pthread_mutex_init");
|
||||
return mutex;
|
||||
}
|
||||
}
|
||||
|
||||
mutex.ptr = mutex_ptr;
|
||||
mutex.name = (char *)malloc(NAME_MAX+1);
|
||||
strcpy(mutex.name, name);
|
||||
return mutex;
|
||||
}
|
||||
|
||||
int shared_mutex_close(shared_mutex_t mutex) {
|
||||
if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) {
|
||||
perror("munmap");
|
||||
return -1;
|
||||
}
|
||||
mutex.ptr = NULL;
|
||||
if (close(mutex.shm_fd)) {
|
||||
perror("close");
|
||||
return -1;
|
||||
}
|
||||
mutex.shm_fd = 0;
|
||||
free(mutex.name);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int shared_mutex_destroy(shared_mutex_t mutex) {
|
||||
if ((errno = pthread_mutex_destroy(mutex.ptr))) {
|
||||
perror("pthread_mutex_destroy");
|
||||
return -1;
|
||||
}
|
||||
if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) {
|
||||
perror("munmap");
|
||||
return -1;
|
||||
}
|
||||
mutex.ptr = NULL;
|
||||
if (close(mutex.shm_fd)) {
|
||||
perror("close");
|
||||
return -1;
|
||||
}
|
||||
mutex.shm_fd = 0;
|
||||
if (shm_unlink(mutex.name)) {
|
||||
perror("shm_unlink");
|
||||
return -1;
|
||||
}
|
||||
free(mutex.name);
|
||||
return 0;
|
||||
}
|
||||
Исполняемый файл
+67
@@ -0,0 +1,67 @@
|
||||
#ifndef SRC_SHARED_MUTEX_SHARED_MUTEX_H_
|
||||
#define SRC_SHARED_MUTEX_SHARED_MUTEX_H_
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <pthread.h> // pthread_mutex_t, pthread_mutexattr_t,
|
||||
// pthread_mutexattr_init, pthread_mutexattr_setpshared,
|
||||
// pthread_mutex_init, pthread_mutex_destroy
|
||||
|
||||
// Structure of a shared mutex.
|
||||
typedef struct shared_mutex_t {
|
||||
pthread_mutex_t *ptr; // Pointer to the pthread mutex and
|
||||
// shared memory segment.
|
||||
int shm_fd; // Descriptor of shared memory object.
|
||||
char* name; // Name of the mutex and associated
|
||||
// shared memory object.
|
||||
int created; // Equals 1 (true) if initialization
|
||||
// of this structure caused creation
|
||||
// of a new shared mutex.
|
||||
// Equals 0 (false) if this mutex was
|
||||
// just retrieved from shared memory.
|
||||
} shared_mutex_t;
|
||||
|
||||
// Initialize a new shared mutex with given `name`. If a mutex
|
||||
// with such name exists in the system, it will be loaded.
|
||||
// Otherwise a new mutes will by created.
|
||||
//
|
||||
// In case of any error, it will be printed into the standard output
|
||||
// and the returned structure will have `ptr` equal `NULL`.
|
||||
// `errno` wil not be reset in such case, so you may used it.
|
||||
//
|
||||
// **NOTE:** In case when the mutex appears to be uncreated,
|
||||
// this function becomes *non-thread-safe*. If multiple threads
|
||||
// call it at one moment, there occur several race conditions,
|
||||
// in which one call might recreate another's shared memory
|
||||
// object or rewrite another's pthread mutex in the shared memory.
|
||||
// There is no workaround currently, except to run first
|
||||
// initialization only before multi-threaded or multi-process
|
||||
// functionality.
|
||||
shared_mutex_t shared_mutex_init(const char *name, mode_t mode);
|
||||
|
||||
// Close access to the shared mutex and free all the resources,
|
||||
// used by the structure.
|
||||
//
|
||||
// Returns 0 in case of success. If any error occurs, it will be
|
||||
// printed into the standard output and the function will return -1.
|
||||
// `errno` wil not be reset in such case, so you may used it.
|
||||
//
|
||||
// **NOTE:** It will not destroy the mutex. The mutex would not
|
||||
// only be available to other processes using it right now,
|
||||
// but also to any process which might want to use it later on.
|
||||
// For complete desctruction use `shared_mutex_destroy` instead.
|
||||
//
|
||||
// **NOTE:** It will not unlock locked mutex.
|
||||
int shared_mutex_close(shared_mutex_t mutex);
|
||||
|
||||
// Close and destroy shared mutex.
|
||||
// Any open pointers to it will be invalidated.
|
||||
//
|
||||
// Returns 0 in case of success. If any error occurs, it will be
|
||||
// printed into the standard output and the function will return -1.
|
||||
// `errno` wil not be reset in such case, so you may used it.
|
||||
//
|
||||
// **NOTE:** It will not unlock locked mutex.
|
||||
int shared_mutex_destroy(shared_mutex_t mutex);
|
||||
|
||||
#endif // SRC_SHARED_MUTEX_SHARED_MUTEX_H_
|
||||
Ссылка в новой задаче
Block a user