Merge 'master' into 'amd-master'

Change-Id: I4bf7ea780e1c74ece3ead10be1eb941128ce2d9e


[ROCm/amdsmi commit: 8ab76fd0cc]
This commit is contained in:
Jenkins
2019-05-06 12:02:02 -07:00
کامیت ff9011c8c7
13فایلهای تغییر یافته به همراه450 افزوده شده و 14 حذف شده
@@ -106,13 +106,15 @@ endif ()
set(SRC_DIR "src")
set(INC_DIR "include/rocm_smi")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src/shared_mutex)
set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.c")
set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h")
@@ -121,12 +123,14 @@ set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h")
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST})
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
## Set the VERSION and SOVERSION values
set_property(TARGET ${ROCM_SMI_TARGET}
@@ -744,6 +744,24 @@ rsmi_status_t rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid);
rsmi_status_t rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
uint64_t *received, uint64_t *max_pkt_sz);
/**
* @brief Get PCIe replay counter
*
* @details Given a device index @p dv_ind and a pointer to a uint64_t @p
* counter, this function will write the sum of the number of NAK's received
* by the GPU and the NAK's generated by the GPU to memory pointed to by @p
* counter.
*
* @param[in] dv_ind a device index
*
* @param[inout] counter a pointer to uint64_t to which the sum of the NAK's
* received and generated by the GPU is written
*
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
*/
rsmi_status_t rsmi_dev_pci_replay_counter_get(uint32_t dv_ind,
uint64_t *counter);
/** @} */ // end of PCIeQuer
/*****************************************************************************/
/** @defgroup PCIeCont PCIe Control
@@ -42,6 +42,9 @@
*/
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_
#define INCLUDE_ROCM_SMI_ROCM_SMI_DEVICE_H_
#include <pthread.h>
#include <string>
#include <memory>
#include <utility>
@@ -52,6 +55,9 @@
#include "rocm_smi/rocm_smi_power_mon.h"
#include "rocm_smi/rocm_smi_common.h"
#include "rocm_smi/rocm_smi.h"
extern "C" {
#include "shared_mutex.h" // NOLINT
};
namespace amd {
namespace smi {
@@ -84,6 +90,7 @@ enum DevInfoTypes {
kDevMemUsedGTT,
kDevMemUsedVisVRAM,
kDevMemUsedVRAM,
kDevPCIEReplayCount,
};
class Device {
@@ -109,11 +116,13 @@ class Device {
uint64_t bdfid(void) const {return bdfid_;}
void set_bdfid(uint64_t val) {bdfid_ = val;}
uint64_t get_bdfid(void) const {return bdfid_;}
pthread_mutex_t *mutex(void) {return mutex_.ptr;}
private:
std::shared_ptr<Monitor> monitor_;
std::shared_ptr<PowerMon> power_monitor_;
std::string path_;
shared_mutex_t mutex_;
uint32_t index_;
const RocmSMI_env_vars *env_;
template <typename T> int openSysfsFileStream(DevInfoTypes type, T *fs,
@@ -43,6 +43,8 @@
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
#define INCLUDE_ROCM_SMI_ROCM_SMI_UTILS_H_
#include <pthread.h>
#include <string>
#include <cstdint>
@@ -63,6 +65,28 @@ namespace smi {
int ReadSysfsStr(std::string path, std::string *retStr);
int WriteSysfsStr(std::string path, std::string val);
struct pthread_wrap {
public:
explicit pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {}
void Acquire() { pthread_mutex_lock(&mutex_); }
void Release() { pthread_mutex_unlock(&mutex_); }
private:
pthread_mutex_t& mutex_;
};
struct ScopedPthread {
explicit ScopedPthread(pthread_wrap& mutex) : pthrd_ref_(mutex) {
pthrd_ref_.Acquire();
}
~ScopedPthread() {
pthrd_ref_.Release();
}
private:
ScopedPthread(const ScopedPthread&);
pthread_wrap& pthrd_ref_;
};
} // namespace smi
} // namespace amd
+105 -3
مشاهده پرونده
@@ -44,6 +44,7 @@
#include <assert.h>
#include <errno.h>
#include <sys/utsname.h>
#include <pthread.h>
#include <sstream>
#include <algorithm>
@@ -100,12 +101,27 @@ static rsmi_status_t handleException() {
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
assert(dev != nullptr);
#define DEVICE_MUTEX \
amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \
amd::smi::ScopedPthread _lock(_pw);
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
if (dv_ind >= smi.monitor_devices().size()) {
return nullptr;
}
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
assert(dev != nullptr);
return dev->mutex();
}
static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
switch (err) {
case 0: return RSMI_STATUS_SUCCESS;
case EACCES: return RSMI_STATUS_PERMISSION;
case EPERM: return RSMI_STATUS_NOT_SUPPORTED;
case ENOENT:
case EPERM:
case ENOENT: return RSMI_STATUS_NOT_SUPPORTED;
case EISDIR: return RSMI_STATUS_FILE_ERROR;
default: return RSMI_STATUS_UNKNOWN_ERROR;
}
@@ -440,6 +456,9 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind,
if (enabled_mask == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
std::vector<std::string> val_vec;
ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec);
@@ -508,6 +527,8 @@ rsmi_status_t rsmi_dev_ecc_status_get(uint32_t dv_ind, rsmi_gpu_block_t block,
rsmi_status_t ret;
std::vector<std::string> val_vec;
DEVICE_MUTEX
ret = get_dev_value_vec(amd::smi::kDevErrCntFeatures, dv_ind, &val_vec);
if (ret == RSMI_STATUS_FILE_ERROR) {
@@ -569,6 +590,9 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
default:
return RSMI_STATUS_NOT_SUPPORTED;
}
DEVICE_MUTEX
ret = get_dev_value_vec(type, dv_ind, &val_vec);
if (ret == RSMI_STATUS_FILE_ERROR) {
@@ -605,6 +629,8 @@ rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid) {
}
GET_DEV_FROM_INDX
DEVICE_MUTEX
*bdfid = dev->get_bdfid();
return RSMI_STATUS_SUCCESS;
CATCH
@@ -614,6 +640,9 @@ static rsmi_status_t
get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) {
TRY
std::string val_str;
DEVICE_MUTEX
rsmi_status_t ret = get_dev_value_str(typ, dv_ind, &val_str);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -630,21 +659,25 @@ get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) {
rsmi_status_t
rsmi_dev_id_get(uint32_t dv_ind, uint16_t *id) {
DEVICE_MUTEX
return get_id(dv_ind, amd::smi::kDevDevID, id);
}
rsmi_status_t
rsmi_dev_subsystem_id_get(uint32_t dv_ind, uint16_t *id) {
DEVICE_MUTEX
return get_id(dv_ind, amd::smi::kDevSubSysDevID, id);
}
rsmi_status_t
rsmi_dev_vendor_id_get(uint32_t dv_ind, uint16_t *id) {
DEVICE_MUTEX
return get_id(dv_ind, amd::smi::kDevVendorID, id);
}
rsmi_status_t
rsmi_dev_subsystem_vendor_id_get(uint32_t dv_ind, uint16_t *id) {
DEVICE_MUTEX
return get_id(dv_ind, amd::smi::kDevSubSysVendorID, id);
}
@@ -652,6 +685,8 @@ rsmi_status_t
rsmi_dev_perf_level_get(uint32_t dv_ind, rsmi_dev_perf_level_t *perf) {
TRY
std::string val_str;
DEVICE_MUTEX
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevPerfLevel, dv_ind,
&val_str);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -668,6 +703,8 @@ rsmi_status_t
rsmi_dev_overdrive_level_get(uint32_t dv_ind, uint32_t *od) {
TRY
std::string val_str;
DEVICE_MUTEX
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevOverDriveLevel, dv_ind,
&val_str);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -688,7 +725,7 @@ rsmi_dev_overdrive_level_set(int32_t dv_ind, uint32_t od) {
if (od > kMaxOverdriveLevel) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
return set_dev_value(amd::smi::kDevOverDriveLevel, dv_ind, od);
CATCH
}
@@ -700,6 +737,7 @@ rsmi_dev_perf_level_set(int32_t dv_ind, rsmi_dev_perf_level_t perf_level) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
return set_dev_value(amd::smi::kDevPerfLevel, dv_ind, perf_level);
CATCH
}
@@ -1009,6 +1047,8 @@ rsmi_dev_gpu_clk_freq_get(uint32_t dv_ind, rsmi_clk_type_t clk_type,
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
return get_frequencies(dev_type, dv_ind, f);
CATCH
@@ -1035,6 +1075,9 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
rsmi_frequencies_t freqs;
TRY
DEVICE_MUTEX
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, clk_type, &freqs);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -1247,6 +1290,8 @@ rsmi_dev_name_get(uint32_t dv_ind, char *name, size_t len) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_DEVICE);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
@@ -1265,6 +1310,8 @@ rsmi_dev_subsystem_name_get(uint32_t dv_ind, char *name, size_t len) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_SUBSYS);
return ret;
CATCH
@@ -1279,6 +1326,7 @@ rsmi_dev_vendor_name_get(uint32_t dv_ind, char *name, size_t len) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
ret = get_dev_name_from_id(dv_ind, name, len, NAME_STR_VENDOR);
return ret;
CATCH
@@ -1294,6 +1342,8 @@ rsmi_dev_pci_bandwidth_get(uint32_t dv_ind, rsmi_pcie_bandwidth_t *b) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
return get_frequencies(amd::smi::kDevPCIEClk, dv_ind,
&b->transfer_rate, b->lanes);
@@ -1306,6 +1356,8 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) {
rsmi_pcie_bandwidth_t bws;
TRY
DEVICE_MUTEX
ret = rsmi_dev_pci_bandwidth_get(dv_ind, &bws);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -1346,6 +1398,9 @@ rsmi_dev_pci_throughput_get(uint32_t dv_ind, uint64_t *sent,
rsmi_status_t ret;
std::string val_str;
DEVICE_MUTEX
ret = get_dev_value_line(amd::smi::kDevPCIEThruPut, dv_ind, &val_str);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -1435,6 +1490,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_ind,
mon_type = amd::smi::kMonInvalid;
}
DEVICE_MUTEX
ret = get_dev_mon_value(mon_type, dv_ind, sensor_ind, temperature);
return ret;
@@ -1453,6 +1510,8 @@ rsmi_dev_fan_speed_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) {
++sensor_ind; // fan sysfs files have 1-based indices
DEVICE_MUTEX
ret = get_dev_mon_value(amd::smi::kMonFanSpeed, dv_ind, sensor_ind, speed);
return ret;
@@ -1470,6 +1529,8 @@ rsmi_dev_fan_rpms_get(uint32_t dv_ind, uint32_t sensor_ind, int64_t *speed) {
rsmi_status_t ret;
DEVICE_MUTEX
ret = get_dev_mon_value(amd::smi::kMonFanRPMs, dv_ind, sensor_ind, speed);
return ret;
@@ -1484,6 +1545,8 @@ rsmi_dev_fan_reset(uint32_t dv_ind, uint32_t sensor_ind) {
++sensor_ind; // fan sysfs files have 1-based indices
DEVICE_MUTEX
ret = set_dev_mon_value<uint64_t>(amd::smi::kMonFanCntrlEnable,
dv_ind, sensor_ind, 2);
@@ -1499,6 +1562,7 @@ rsmi_dev_fan_speed_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t speed) {
rsmi_status_t ret;
uint64_t max_speed;
DEVICE_MUTEX
ret = rsmi_dev_fan_speed_max_get(dv_ind, sensor_ind, &max_speed);
@@ -1541,6 +1605,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_status_t ret;
DEVICE_MUTEX
ret = get_dev_mon_value(amd::smi::kMonMaxFanSpeed, dv_ind, sensor_ind,
reinterpret_cast<int64_t *>(max_speed));
@@ -1551,6 +1617,8 @@ rsmi_dev_fan_speed_max_get(uint32_t dv_ind, uint32_t sensor_ind,
rsmi_status_t
rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) {
TRY
DEVICE_MUTEX
rsmi_status_t ret = get_od_clk_volt_info(dv_ind, odv);
return ret;
@@ -1564,6 +1632,8 @@ rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind,
if (buffer == nullptr || num_regions == nullptr || *num_regions == 0) {
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
rsmi_status_t ret = get_od_clk_volt_curve_regions(dv_ind, num_regions,
buffer);
return ret;
@@ -1582,6 +1652,8 @@ rsmi_dev_power_max_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
// ++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
DEVICE_MUTEX
ret = get_power_mon_value(amd::smi::kPowerMaxGPUPower, dv_ind, power);
return ret;
@@ -1598,6 +1670,8 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
DEVICE_MUTEX
ret = get_dev_mon_value(amd::smi::kMonPowerAve, dv_ind, sensor_ind, power);
return ret;
@@ -1615,6 +1689,8 @@ rsmi_dev_power_cap_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *cap) {
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
DEVICE_MUTEX
ret = get_dev_mon_value(amd::smi::kMonPowerCap, dv_ind, sensor_ind, cap);
return ret;
@@ -1633,6 +1709,8 @@ rsmi_dev_power_cap_range_get(uint32_t dv_ind, uint32_t sensor_ind,
++sensor_ind; // power sysfs files have 1-based indices
rsmi_status_t ret;
DEVICE_MUTEX
ret = get_dev_mon_value(amd::smi::kMonPowerCapMax, dv_ind, sensor_ind, max);
if (ret == RSMI_STATUS_SUCCESS) {
@@ -1650,6 +1728,7 @@ rsmi_dev_power_cap_set(uint32_t dv_ind, uint32_t sensor_ind, uint64_t cap) {
rsmi_status_t ret;
uint64_t min, max;
DEVICE_MUTEX
ret = rsmi_dev_power_cap_range_get(dv_ind, sensor_ind, &max, &min);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -1678,6 +1757,7 @@ rsmi_dev_power_profile_presets_get(uint32_t dv_ind, uint32_t sensor_ind,
++sensor_ind; // power sysfs files have 1-based indices
DEVICE_MUTEX
rsmi_status_t ret = get_power_profiles(dv_ind, status, nullptr);
return ret;
CATCH
@@ -1689,6 +1769,7 @@ rsmi_dev_power_profile_set(uint32_t dv_ind, uint32_t sensor_ind,
TRY
++sensor_ind; // power sysfs files have 1-based indices
DEVICE_MUTEX
rsmi_status_t ret = set_power_profile(dv_ind, profile);
return ret;
CATCH
@@ -1722,6 +1803,8 @@ rsmi_dev_memory_total_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
assert(!"Unexpected memory type");
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
ret = get_dev_value_int(mem_type_file, dv_ind, total);
return ret;
@@ -1755,6 +1838,8 @@ rsmi_dev_memory_usage_get(uint32_t dv_ind, rsmi_memory_type_t mem_type,
assert(!"Unexpected memory type");
return RSMI_STATUS_INVALID_ARGS;
}
DEVICE_MUTEX
ret = get_dev_value_int(mem_type_file, dv_ind, used);
return ret;
@@ -1847,6 +1932,8 @@ rsmi_status_t
rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent) {
TRY
std::string val_str;
DEVICE_MUTEX
rsmi_status_t ret = get_dev_value_str(amd::smi::kDevUsage, dv_ind,
&val_str);
if (ret != RSMI_STATUS_SUCCESS) {
@@ -1871,6 +1958,8 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) {
TRY
GET_DEV_FROM_INDX
std::string val_str;
DEVICE_MUTEX
int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str);
if (ret != 0) {
@@ -1955,3 +2044,16 @@ rsmi_version_str_get(rsmi_sw_component_t component, char *ver_str,
CATCH
}
rsmi_status_t
rsmi_dev_pci_replay_counter_get(uint32_t dv_ind, uint64_t *counter) {
TRY
DEVICE_MUTEX
rsmi_status_t ret;
ret = get_dev_value_int(amd::smi::kDevPCIEReplayCount, dv_ind, counter);
return ret;
CATCH
}
@@ -41,6 +41,10 @@
*
*/
#include <pthread.h>
#include <unistd.h>
#include <sys/types.h>
#include <assert.h>
#include <sys/stat.h>
#include <stdint.h>
@@ -55,6 +59,11 @@
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_exception.h"
extern "C" {
#include "shared_mutex.h" // NOLINT
};
namespace amd {
namespace smi {
@@ -87,6 +96,7 @@ static const char *kDevMemTotVRAMFName = "mem_info_vram_total";
static const char *kDevMemUsedGTTFName = "mem_info_gtt_used";
static const char *kDevMemUsedVisVRAMFName = "mem_info_vis_vram_used";
static const char *kDevMemUsedVRAMFName = "mem_info_vram_used";
static const char *kDevPCIEReplayCountFName = "pcie_replay_count";
// Strings that are found within sysfs files
static const char *kDevPerfLevelAutoStr = "auto";
@@ -127,6 +137,7 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
{kDevMemUsedGTT, kDevMemUsedGTTFName},
{kDevMemUsedVisVRAM, kDevMemUsedVisVRAMFName},
{kDevMemUsedVRAM, kDevMemUsedVRAMFName},
{kDevPCIEReplayCount, kDevPCIEReplayCountFName},
};
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
@@ -154,9 +165,26 @@ static bool isRegularFile(std::string fname) {
Device::Device(std::string p, RocmSMI_env_vars const *e) : path_(p), env_(e) {
monitor_ = nullptr;
// Get the device name
size_t i = path_.rfind('/', path_.length());
std::string dev = path_.substr(i + 1, path_.length() - i);
std::string m_name("/rocm_smi_");
m_name += dev;
m_name += '_';
m_name += std::to_string(geteuid());
mutex_ = shared_mutex_init(m_name.c_str(), 0777);
if (mutex_.ptr == nullptr) {
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
"Failed to create shared mem. mutex.");
}
}
Device:: ~Device() {
shared_mutex_close(mutex_);
}
template <typename T>
@@ -176,7 +204,7 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
DBG_FILE_ERROR(sysfs_path, str);
if (!isRegularFile(sysfs_path)) {
return EISDIR;
return ENOENT;
}
fs->open(sysfs_path);
@@ -341,6 +369,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
case kDevMemUsedGTT:
case kDevMemUsedVisVRAM:
case kDevMemUsedVRAM:
case kDevPCIEReplayCount:
ret = readDevInfoStr(type, &tempStr);
RET_IF_NONZERO(ret);
*val = std::stoul(tempStr, 0);
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2018 Oleg Yamnikov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
@@ -0,0 +1,145 @@
#include "shared_mutex.h"
#include <errno.h> // errno, ENOENT
#include <fcntl.h> // O_RDWR, O_CREATE
#include <linux/limits.h> // NAME_MAX
#include <sys/mman.h> // shm_open, shm_unlink, mmap, munmap,
// PROT_READ, PROT_WRITE, MAP_SHARED, MAP_FAILED
#include <unistd.h> // ftruncate, close
#include <stdio.h> // perror
#include <stdlib.h> // malloc, free
#include <string.h> // strcpy
#include <time.h> // clock_gettime
shared_mutex_t shared_mutex_init(const char *name, mode_t mode) {
shared_mutex_t mutex = {NULL, 0, NULL, 0};
errno = 0;
// Open existing shared memory object, or create one.
// Two separate calls are needed here, to mark fact of creation
// for later initialization of pthread mutex.
mutex.shm_fd = shm_open(name, O_RDWR, mode);
if (errno == ENOENT) {
mutex.shm_fd = shm_open(name, O_RDWR|O_CREAT, mode);
mutex.created = 1;
// Change permissions of shared memory, so every body can access it. Avoiding the umask of shm_open
if (fchmod(mutex.shm_fd, mode) != 0) {
perror("fchmod");
}
}
if (mutex.shm_fd == -1) {
perror("shm_open");
return mutex;
}
// Truncate shared memory segment so it would contain
// pthread_mutex_t AND the ref. count
if (ftruncate(mutex.shm_fd, sizeof(pthread_mutex_t)) != 0) {
perror("ftruncate");
return mutex;
}
// Map pthread mutex into the shared memory.
void *addr = mmap(
NULL,
sizeof(pthread_mutex_t),
PROT_READ|PROT_WRITE,
MAP_SHARED,
mutex.shm_fd,
0
);
if (addr == MAP_FAILED) {
perror("mmap");
return mutex;
}
pthread_mutex_t *mutex_ptr = (pthread_mutex_t *)addr;
// Make sure the mutex wasn't left in a locked state. If we can't
// acquire it in 3 sec., re-do everything.
struct timespec expireTime;
clock_gettime(CLOCK_REALTIME, &expireTime);
expireTime.tv_sec += 3;
int ret = pthread_mutex_timedlock(mutex_ptr, &expireTime);
if (ret || (mutex.created == 0 && ((shared_mutex_t *)addr)->ptr == NULL)) {
// Something is out of sync. Unlink shm and start over.
if (shm_unlink(name)) {
mutex.shm_fd = 0;
perror("shm_unlink");
}
free(mutex.name);
return shared_mutex_init(name, mode);
} else {
if (pthread_mutex_unlock(mutex_ptr)) {
perror("pthread_mutex_unlock");
}
}
if (mutex.created) {
pthread_mutexattr_t attr;
if (pthread_mutexattr_init(&attr)) {
perror("pthread_mutexattr_init");
return mutex;
}
if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
perror("pthread_mutexattr_setpshared");
return mutex;
}
if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) {
perror("pthread_mutexattr_settype");
return mutex;
}
if (pthread_mutex_init(mutex_ptr, &attr)) {
perror("pthread_mutex_init");
return mutex;
}
}
mutex.ptr = mutex_ptr;
mutex.name = (char *)malloc(NAME_MAX+1);
strcpy(mutex.name, name);
return mutex;
}
int shared_mutex_close(shared_mutex_t mutex) {
if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) {
perror("munmap");
return -1;
}
mutex.ptr = NULL;
if (close(mutex.shm_fd)) {
perror("close");
return -1;
}
mutex.shm_fd = 0;
free(mutex.name);
return 0;
}
int shared_mutex_destroy(shared_mutex_t mutex) {
if ((errno = pthread_mutex_destroy(mutex.ptr))) {
perror("pthread_mutex_destroy");
return -1;
}
if (munmap((void *)mutex.ptr, sizeof(pthread_mutex_t))) {
perror("munmap");
return -1;
}
mutex.ptr = NULL;
if (close(mutex.shm_fd)) {
perror("close");
return -1;
}
mutex.shm_fd = 0;
if (shm_unlink(mutex.name)) {
perror("shm_unlink");
return -1;
}
free(mutex.name);
return 0;
}
@@ -0,0 +1,70 @@
// NOLINT(legal/copyright)
// See LICENSE file
#ifndef SRC_SHARED_MUTEX_SHARED_MUTEX_H_
#define SRC_SHARED_MUTEX_SHARED_MUTEX_H_
#include <sys/stat.h>
#include <pthread.h> // pthread_mutex_t, pthread_mutexattr_t,
// pthread_mutexattr_init, pthread_mutexattr_setpshared,
// pthread_mutex_init, pthread_mutex_destroy
// Structure of a shared mutex.
typedef struct shared_mutex_t {
pthread_mutex_t *ptr; // Pointer to the pthread mutex and
// shared memory segment.
int shm_fd; // Descriptor of shared memory object.
char* name; // Name of the mutex and associated
// shared memory object.
int created; // Equals 1 (true) if initialization
// of this structure caused creation
// of a new shared mutex.
// Equals 0 (false) if this mutex was
// just retrieved from shared memory.
} shared_mutex_t;
// Initialize a new shared mutex with given `name`. If a mutex
// with such name exists in the system, it will be loaded.
// Otherwise a new mutes will by created.
//
// In case of any error, it will be printed into the standard output
// and the returned structure will have `ptr` equal `NULL`.
// `errno` wil not be reset in such case, so you may used it.
//
// **NOTE:** In case when the mutex appears to be uncreated,
// this function becomes *non-thread-safe*. If multiple threads
// call it at one moment, there occur several race conditions,
// in which one call might recreate another's shared memory
// object or rewrite another's pthread mutex in the shared memory.
// There is no workaround currently, except to run first
// initialization only before multi-threaded or multi-process
// functionality.
shared_mutex_t shared_mutex_init(const char *name, mode_t mode);
// Close access to the shared mutex and free all the resources,
// used by the structure.
//
// Returns 0 in case of success. If any error occurs, it will be
// printed into the standard output and the function will return -1.
// `errno` wil not be reset in such case, so you may used it.
//
// **NOTE:** It will not destroy the mutex. The mutex would not
// only be available to other processes using it right now,
// but also to any process which might want to use it later on.
// For complete desctruction use `shared_mutex_destroy` instead.
//
// **NOTE:** It will not unlock locked mutex.
int shared_mutex_close(shared_mutex_t mutex);
// Close and destroy shared mutex.
// Any open pointers to it will be invalidated.
//
// Returns 0 in case of success. If any error occurs, it will be
// printed into the standard output and the function will return -1.
// `errno` wil not be reset in such case, so you may used it.
//
// **NOTE:** It will not unlock locked mutex.
int shared_mutex_destroy(shared_mutex_t mutex);
#endif // SRC_SHARED_MUTEX_SHARED_MUTEX_H_
@@ -96,7 +96,7 @@ void TestErrCntRead::Run(void) {
err = rsmi_dev_ecc_enabled_get(i, &enabled_mask);
if (err == RSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**Error Count Enabled Mask for is not supported on this machine"
"\t**Error Count Enabled Mask get is not supported on this machine"
<< std::endl;
} else {
CHK_ERR_ASRT(err)
@@ -89,13 +89,26 @@ void TestPciReadWrite::Run(void) {
rsmi_status_t ret;
rsmi_pcie_bandwidth_t bw;
uint32_t freq_bitmask;
uint64_t sent, received, max_pkt_sz;
uint64_t sent, received, max_pkt_sz, u64int;
TestBase::Run();
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
PrintDeviceHeader(dv_ind);
ret = rsmi_dev_pci_replay_counter_get(dv_ind, &u64int);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout <<
"\t**rsmi_dev_pci_replay_counter_get() is not supported"
" on this machine" << std::endl;
} else {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\tPCIe Replay Counter: " << u64int << std::endl;
}
}
ret = rsmi_dev_pci_throughput_get(dv_ind, &sent, &received, &max_pkt_sz);
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
std::cout << "TEST FAILURE: Current PCIe throughput is not detected. "
@@ -106,7 +119,7 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "PCIe Throughput (1 sec.): " << std::endl;
std::cout << "\tPCIe Throughput (1 sec.): " << std::endl;
std::cout << "\t\tSent: " << sent << " bytes" << std::endl;
std::cout << "\t\tReceived: " << received << " bytes" << std::endl;
std::cout << "\t\tMax Packet Size: " << max_pkt_sz << " bytes" <<
@@ -125,7 +138,8 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "Initial PCIe is " << bw.transfer_rate.current << std::endl;
std::cout << "\tInitial PCIe is " << bw.transfer_rate.current <<
std::endl;
}
// First set the bitmask to all supported bandwidths
@@ -141,7 +155,7 @@ void TestPciReadWrite::Run(void) {
freq_bm_str.size()-1));
IF_VERB(STANDARD) {
std::cout << "Setting bandwidth mask to " << "0b" << freq_bm_str <<
std::cout << "\tSetting bandwidth mask to " << "0b" << freq_bm_str <<
" ..." << std::endl;
}
ret = rsmi_dev_pci_bandwidth_set(dv_ind, freq_bitmask);
@@ -151,9 +165,9 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "Bandwidth is now index " << bw.transfer_rate.current <<
std::cout << "\tBandwidth is now index " << bw.transfer_rate.current <<
std::endl;
std::cout << "Resetting mask to all bandwidths." << std::endl;
std::cout << "\tResetting mask to all bandwidths." << std::endl;
}
ret = rsmi_dev_pci_bandwidth_set(dv_ind, 0xFFFFFFFF);
CHK_ERR_ASRT(ret)
@@ -99,7 +99,7 @@ void TestTempRead::Run(void) {
err = rsmi_dev_temp_metric_get(i, 0, met, &val_i64);
if (err != RSMI_STATUS_SUCCESS) {
if (err == RSMI_STATUS_FILE_ERROR) {
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**" << label << ": " <<
"Not supported on this machine" << std::endl;
@@ -155,7 +155,7 @@ void TestVoltCurvRead::Run(void) {
err = rsmi_dev_od_volt_info_get(i, &odv);
if (err == RSMI_STATUS_FILE_ERROR ||
err == RSMI_STATUS_NOT_YET_IMPLEMENTED) {
err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout <<
"\t**rsmi_dev_od_volt_info_get: Not supported on this machine"