By default, only consider AMD GPU's in RSMI device list (#36)

With newly added initialization parameters that can be
passed to rsmi_init(), you can tell RSMI to consider other
devices.

Also:
-fixed incorrect header file name that would break in C
builds
-modified rsmi_init() and rsmi_shut_down() to reinitialize and
clear static structures


[ROCm/rocm_smi_lib commit: a0817d6b13]
此提交包含在:
Chris Freehill
2019-05-09 20:45:54 -05:00
提交者 GitHub
父節點 3901643427
當前提交 00eec6f558
共有 10 個檔案被更改,包括 137 行新增23 行删除
未顯示二進位檔案。
+30 -3
查看文件
@@ -47,7 +47,7 @@
extern "C" {
#include <cstdint>
#else
#include <stdinit.h>
#include <stdint.h>
#endif // __cplusplus
#include <stdint.h>
@@ -113,6 +113,29 @@ typedef enum {
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rsmi_status_t;
typedef enum {
RSMI_INIT_FLAG_ALL_GPUS = 0x1, //!< Attempt to add all GPUs found
//!< (including non-AMD) to the list
//!< of devices from which SMI
//!< information can be retrieved. By
//!< default, only AMD devices are
//!< ennumerated by RSMI.
} rsmi_init_flags_t;
/**
* @brief Initialization flags
*
* Initialization flags may be OR'd together and passed to ::rsmi_init().
*/
typedef enum {
RSMI_INIT_FLAG_ALL_GPUS = 0x1, //!< Attempt to add all GPUs found
//!< (including non-AMD) to the list
//!< of devices from which SMI
//!< information can be retrieved. By
//!< default, only AMD devices are
//!< ennumerated by RSMI.
} rsmi_init_flags_t;
/**
* @brief PowerPlay performance levels
*/
@@ -156,6 +179,9 @@ typedef enum {
RSMI_SW_COMP_LAST = RSMI_SW_COMP_DRIVER
} rsmi_sw_component_t;
/**
* Clock types
*/
typedef enum {
RSMI_CLK_TYPE_SYS = 0x0, //!< System clock
RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS,
@@ -493,8 +519,9 @@ typedef struct {
* @details When called, this initializes internal data structures,
* including those corresponding to sources of information that SMI provides.
*
* @param[in] init_flags Bit flags that tell SMI how to initialze. Not
* currently used.
* @param[in] init_flags Bit flags that tell SMI how to initialze. Values of
* ::rsmi_init_flags_t may be OR'd together and passed through @p init_flags
* to modify how RSMI initializes.
*
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
*/
+9 -4
查看文件
@@ -62,12 +62,13 @@ namespace smi {
class RocmSMI {
public:
RocmSMI(void); // direct use of this constructor is deprecated; use
// getInstance()
RocmSMI(uint64_t flags);
~RocmSMI(void);
static RocmSMI& getInstance(void);
static RocmSMI& getInstance(uint64_t flags = 0);
void Initialize(uint64_t flags);
void Cleanup(void);
static std::vector<std::shared_ptr<amd::smi::Device>>&
monitor_devices() {return s_monitor_devices;}
uint32_t DiscoverDevices(void);
@@ -78,6 +79,9 @@ class RocmSMI {
uint32_t IterateSMIDevices(
std::function<uint32_t(std::shared_ptr<Device>&, void *)> func, void *);
void set_init_options(uint64_t options) {init_options_ = options;}
uint64_t init_options() const {return init_options_;}
private:
std::vector<std::shared_ptr<Device>> devices_;
std::vector<std::shared_ptr<Monitor>> monitors_;
@@ -90,6 +94,7 @@ class RocmSMI {
static std::vector<std::shared_ptr<amd::smi::Device>> s_monitor_devices;
RocmSMI_env_vars env_vars_;
uint64_t init_options_;
};
} // namespace smi
+16 -9
查看文件
@@ -94,7 +94,7 @@ static rsmi_status_t handleException() {
#define TRY try {
#define CATCH } catch (...) {return handleException();}
#define GET_DEV_FROM_INDX \
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance(); \
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
if (dv_ind >= smi.monitor_devices().size()) { \
return RSMI_STATUS_INVALID_ARGS; \
} \
@@ -106,7 +106,8 @@ static rsmi_status_t handleException() {
amd::smi::ScopedPthread _lock(_pw);
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
if (dv_ind >= smi.monitor_devices().size()) {
return nullptr;
}
@@ -382,7 +383,7 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
uint32_t dv_ind, uint64_t *val) {
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
if (dv_ind >= smi.monitor_devices().size() || val == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
@@ -416,11 +417,12 @@ static bool is_power_of_2(uint64_t n) {
}
rsmi_status_t
rsmi_init(uint64_t init_flags) {
rsmi_init(uint64_t flags) {
TRY
(void)init_flags; // unused for now; for future use
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
smi.Initialize(flags);
return RSMI_STATUS_SUCCESS;
CATCH
}
@@ -430,6 +432,11 @@ rsmi_init(uint64_t init_flags) {
rsmi_status_t
rsmi_shut_down(void) {
TRY
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
smi.Cleanup();
return RSMI_STATUS_SUCCESS;
CATCH
}
@@ -441,7 +448,7 @@ rsmi_num_monitor_devices(uint32_t *num_devices) {
return RSMI_STATUS_INVALID_ARGS;
}
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
*num_devices = smi.monitor_devices().size();
return RSMI_STATUS_SUCCESS;
@@ -1086,7 +1093,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
assert(freqs.num_supported <= RSMI_MAX_NUM_FREQUENCIES);
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
// Above call to rsmi_dev_get_gpu_clk_freq should have emitted an error if
// assert below is not true
@@ -1366,7 +1373,7 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) {
assert(bws.transfer_rate.num_supported <= RSMI_MAX_NUM_FREQUENCIES);
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
// Above call to rsmi_dev_pci_bandwidth_get() should have emitted an error
// if assert below is not true
+50 -5
查看文件
@@ -234,10 +234,13 @@ static uint32_t GetMonitorDevices(const std::shared_ptr<amd::smi::Device> &d,
std::vector<std::shared_ptr<amd::smi::Device>> RocmSMI::s_monitor_devices;
RocmSMI::RocmSMI(void) {
void
RocmSMI::Initialize(uint64_t flags) {
auto i = 0;
uint32_t ret;
init_options_ = flags;
GetEnvVariables();
while (std::string(kAMDMonitorTypes[i]) != "") {
@@ -260,15 +263,23 @@ RocmSMI::RocmSMI(void) {
}
}
RocmSMI::~RocmSMI() {
void
RocmSMI::Cleanup() {
s_monitor_devices.clear();
devices_.clear();
monitors_.clear();
}
RocmSMI& RocmSMI::getInstance(void) {
RocmSMI::RocmSMI(uint64_t flags) : init_options_(flags) {
}
RocmSMI::~RocmSMI() {
}
RocmSMI& RocmSMI::getInstance(uint64_t flags) {
// Assume c++11 or greater. static objects will be created by only 1 thread
// and creation will be thread-safe.
static RocmSMI singleton;
static RocmSMI singleton(flags);
return singleton;
}
@@ -324,6 +335,33 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
return;
}
static const uint32_t kAmdGpuId=0x1002;
static bool isAMDGPU(std::string dev_path) {
std::string vend_path = dev_path + "/device/vendor";
if (!FileExists(vend_path.c_str())) {
return false;
}
std::ifstream fs;
fs.open(vend_path);
if (!fs.is_open()) {
return errno;
}
uint32_t vendor_id;
fs >> std::hex >> vendor_id;
fs.close();
if (vendor_id == kAmdGpuId) {
return true;
}
return false;
}
uint32_t RocmSMI::DiscoverDevices(void) {
auto ret = 0;
@@ -346,7 +384,14 @@ uint32_t RocmSMI::DiscoverDevices(void) {
while (dentry != nullptr) {
if (memcmp(dentry->d_name, kDeviceNamePrefix, strlen(kDeviceNamePrefix))
== 0) {
AddToDeviceList(dentry->d_name);
std::string vend_str_path = kPathDRMRoot;
vend_str_path += "/";
vend_str_path += dentry->d_name;
if (isAMDGPU(vend_str_path) ||
(init_options_ & RSMI_INIT_FLAG_ALL_GPUS)) {
AddToDeviceList(dentry->d_name);
}
}
dentry = readdir(drm_dir);
}
+1 -1
查看文件
@@ -138,7 +138,7 @@ void TestPciReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\tInitial PCIe is " << bw.transfer_rate.current <<
std::cout << "\tInitial PCIe BW index is " << bw.transfer_rate.current <<
std::endl;
}
+22
查看文件
@@ -46,6 +46,7 @@
#include <string>
#include <vector>
#include <memory>
#include <iostream>
#include "rocm_smi/rocm_smi.h"
#include "gtest/gtest.h"
@@ -79,6 +80,7 @@ static void SetFlags(TestBase *test) {
test->set_verbosity(sRSMIGlvalues->verbosity);
test->set_dont_fail(sRSMIGlvalues->dont_fail);
test->set_init_options(sRSMIGlvalues->init_options);
}
@@ -207,10 +209,30 @@ int main(int argc, char** argv) {
settings.monitor_verbosity = 1;
settings.num_iterations = 1;
settings.dont_fail = false;
settings.init_options = 0;
if (ProcessCmdline(&settings, argc, argv)) {
return 1;
}
int ret = 0;
sRSMIGlvalues = &settings;
ret = RUN_ALL_TESTS();
if (ret) {
return ret;
}
settings.init_options = RSMI_INIT_FLAG_ALL_GPUS;
std::cout << "****************************************" << std::endl;
std::cout << "****************************************" << std::endl;
std::cout << "****************************************" << std::endl;
std::cout << "Re-running tests with init options: " << std::hex <<
settings.init_options << std::endl;
std::cout << "****************************************" << std::endl;
std::cout << "****************************************" << std::endl;
std::cout << "****************************************" << std::endl;
settings.init_options = RSMI_INIT_FLAG_ALL_GPUS;
return RUN_ALL_TESTS();
}
+1 -1
查看文件
@@ -84,7 +84,7 @@ void TestBase::SetUp(void) {
MakeHeaderStr(kSetupLabel, &label);
printf("\n\t%s\n", label.c_str());
err = rsmi_init(0);
err = rsmi_init(init_options());
ASSERT_EQ(err, RSMI_STATUS_SUCCESS);
err = rsmi_num_monitor_devices(&num_monitor_devs_);
+7
查看文件
@@ -99,6 +99,12 @@ class TestBase {
uint32_t num_monitor_devs(void) const {
return num_monitor_devs_;
}
void set_init_options(uint64_t x) {
init_options_ = x;
}
uint64_t init_options(void) const {
return init_options_;
}
protected:
void PrintDeviceHeader(uint32_t dv_ind);
@@ -109,6 +115,7 @@ class TestBase {
std::string title_; ///< Displayed title of test
uint32_t verbosity_; ///< How much additional output to produce
bool dont_fail_; ///< Don't quit test on individual failure if true
uint64_t init_options_; ///< rsmi initialization options
};
#define IF_VERB(VB) if (verbosity() && verbosity() >= (TestBase::VERBOSE_##VB))
+1
查看文件
@@ -55,6 +55,7 @@ struct RSMITstGlobals {
uint32_t verbosity;
uint32_t monitor_verbosity;
uint32_t num_iterations;
uint64_t init_options;
bool dont_fail;
};