By default, only consider AMD GPU's in RSMI device list (#36)
With newly added initialization parameters that can be
passed to rsmi_init(), you can tell RSMI to consider other
devices.
Also:
-fixed incorrect header file name that would break in C
builds
-modified rsmi_init() and rsmi_shut_down() to reinitialize and
clear static structures
[ROCm/rocm_smi_lib commit: a0817d6b13]
此提交包含在:
未顯示二進位檔案。
@@ -47,7 +47,7 @@
|
||||
extern "C" {
|
||||
#include <cstdint>
|
||||
#else
|
||||
#include <stdinit.h>
|
||||
#include <stdint.h>
|
||||
#endif // __cplusplus
|
||||
|
||||
#include <stdint.h>
|
||||
@@ -113,6 +113,29 @@ typedef enum {
|
||||
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
|
||||
} rsmi_status_t;
|
||||
|
||||
typedef enum {
|
||||
RSMI_INIT_FLAG_ALL_GPUS = 0x1, //!< Attempt to add all GPUs found
|
||||
//!< (including non-AMD) to the list
|
||||
//!< of devices from which SMI
|
||||
//!< information can be retrieved. By
|
||||
//!< default, only AMD devices are
|
||||
//!< ennumerated by RSMI.
|
||||
} rsmi_init_flags_t;
|
||||
|
||||
/**
|
||||
* @brief Initialization flags
|
||||
*
|
||||
* Initialization flags may be OR'd together and passed to ::rsmi_init().
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_INIT_FLAG_ALL_GPUS = 0x1, //!< Attempt to add all GPUs found
|
||||
//!< (including non-AMD) to the list
|
||||
//!< of devices from which SMI
|
||||
//!< information can be retrieved. By
|
||||
//!< default, only AMD devices are
|
||||
//!< ennumerated by RSMI.
|
||||
} rsmi_init_flags_t;
|
||||
|
||||
/**
|
||||
* @brief PowerPlay performance levels
|
||||
*/
|
||||
@@ -156,6 +179,9 @@ typedef enum {
|
||||
RSMI_SW_COMP_LAST = RSMI_SW_COMP_DRIVER
|
||||
} rsmi_sw_component_t;
|
||||
|
||||
/**
|
||||
* Clock types
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_CLK_TYPE_SYS = 0x0, //!< System clock
|
||||
RSMI_CLK_TYPE_FIRST = RSMI_CLK_TYPE_SYS,
|
||||
@@ -493,8 +519,9 @@ typedef struct {
|
||||
* @details When called, this initializes internal data structures,
|
||||
* including those corresponding to sources of information that SMI provides.
|
||||
*
|
||||
* @param[in] init_flags Bit flags that tell SMI how to initialze. Not
|
||||
* currently used.
|
||||
* @param[in] init_flags Bit flags that tell SMI how to initialze. Values of
|
||||
* ::rsmi_init_flags_t may be OR'd together and passed through @p init_flags
|
||||
* to modify how RSMI initializes.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call.
|
||||
*/
|
||||
|
||||
@@ -62,12 +62,13 @@ namespace smi {
|
||||
|
||||
class RocmSMI {
|
||||
public:
|
||||
RocmSMI(void); // direct use of this constructor is deprecated; use
|
||||
// getInstance()
|
||||
|
||||
RocmSMI(uint64_t flags);
|
||||
~RocmSMI(void);
|
||||
|
||||
static RocmSMI& getInstance(void);
|
||||
static RocmSMI& getInstance(uint64_t flags = 0);
|
||||
void Initialize(uint64_t flags);
|
||||
void Cleanup(void);
|
||||
|
||||
static std::vector<std::shared_ptr<amd::smi::Device>>&
|
||||
monitor_devices() {return s_monitor_devices;}
|
||||
uint32_t DiscoverDevices(void);
|
||||
@@ -78,6 +79,9 @@ class RocmSMI {
|
||||
uint32_t IterateSMIDevices(
|
||||
std::function<uint32_t(std::shared_ptr<Device>&, void *)> func, void *);
|
||||
|
||||
void set_init_options(uint64_t options) {init_options_ = options;}
|
||||
uint64_t init_options() const {return init_options_;}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<Device>> devices_;
|
||||
std::vector<std::shared_ptr<Monitor>> monitors_;
|
||||
@@ -90,6 +94,7 @@ class RocmSMI {
|
||||
|
||||
static std::vector<std::shared_ptr<amd::smi::Device>> s_monitor_devices;
|
||||
RocmSMI_env_vars env_vars_;
|
||||
uint64_t init_options_;
|
||||
};
|
||||
|
||||
} // namespace smi
|
||||
|
||||
@@ -94,7 +94,7 @@ static rsmi_status_t handleException() {
|
||||
#define TRY try {
|
||||
#define CATCH } catch (...) {return handleException();}
|
||||
#define GET_DEV_FROM_INDX \
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance(); \
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
|
||||
if (dv_ind >= smi.monitor_devices().size()) { \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} \
|
||||
@@ -106,7 +106,8 @@ static rsmi_status_t handleException() {
|
||||
amd::smi::ScopedPthread _lock(_pw);
|
||||
|
||||
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
if (dv_ind >= smi.monitor_devices().size()) {
|
||||
return nullptr;
|
||||
}
|
||||
@@ -382,7 +383,7 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
|
||||
|
||||
static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
|
||||
uint32_t dv_ind, uint64_t *val) {
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
if (dv_ind >= smi.monitor_devices().size() || val == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
@@ -416,11 +417,12 @@ static bool is_power_of_2(uint64_t n) {
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_init(uint64_t init_flags) {
|
||||
rsmi_init(uint64_t flags) {
|
||||
TRY
|
||||
(void)init_flags; // unused for now; for future use
|
||||
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
smi.Initialize(flags);
|
||||
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
@@ -430,6 +432,11 @@ rsmi_init(uint64_t init_flags) {
|
||||
rsmi_status_t
|
||||
rsmi_shut_down(void) {
|
||||
TRY
|
||||
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
smi.Cleanup();
|
||||
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
CATCH
|
||||
}
|
||||
@@ -441,7 +448,7 @@ rsmi_num_monitor_devices(uint32_t *num_devices) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
*num_devices = smi.monitor_devices().size();
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
@@ -1086,7 +1093,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
|
||||
|
||||
assert(freqs.num_supported <= RSMI_MAX_NUM_FREQUENCIES);
|
||||
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
// Above call to rsmi_dev_get_gpu_clk_freq should have emitted an error if
|
||||
// assert below is not true
|
||||
@@ -1366,7 +1373,7 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) {
|
||||
|
||||
assert(bws.transfer_rate.num_supported <= RSMI_MAX_NUM_FREQUENCIES);
|
||||
|
||||
amd::smi::RocmSMI smi = amd::smi::RocmSMI::getInstance();
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
// Above call to rsmi_dev_pci_bandwidth_get() should have emitted an error
|
||||
// if assert below is not true
|
||||
|
||||
@@ -234,10 +234,13 @@ static uint32_t GetMonitorDevices(const std::shared_ptr<amd::smi::Device> &d,
|
||||
|
||||
std::vector<std::shared_ptr<amd::smi::Device>> RocmSMI::s_monitor_devices;
|
||||
|
||||
RocmSMI::RocmSMI(void) {
|
||||
void
|
||||
RocmSMI::Initialize(uint64_t flags) {
|
||||
auto i = 0;
|
||||
uint32_t ret;
|
||||
|
||||
init_options_ = flags;
|
||||
|
||||
GetEnvVariables();
|
||||
|
||||
while (std::string(kAMDMonitorTypes[i]) != "") {
|
||||
@@ -260,15 +263,23 @@ RocmSMI::RocmSMI(void) {
|
||||
}
|
||||
}
|
||||
|
||||
RocmSMI::~RocmSMI() {
|
||||
void
|
||||
RocmSMI::Cleanup() {
|
||||
s_monitor_devices.clear();
|
||||
devices_.clear();
|
||||
monitors_.clear();
|
||||
}
|
||||
|
||||
RocmSMI& RocmSMI::getInstance(void) {
|
||||
RocmSMI::RocmSMI(uint64_t flags) : init_options_(flags) {
|
||||
}
|
||||
|
||||
RocmSMI::~RocmSMI() {
|
||||
}
|
||||
|
||||
RocmSMI& RocmSMI::getInstance(uint64_t flags) {
|
||||
// Assume c++11 or greater. static objects will be created by only 1 thread
|
||||
// and creation will be thread-safe.
|
||||
static RocmSMI singleton;
|
||||
static RocmSMI singleton(flags);
|
||||
return singleton;
|
||||
}
|
||||
|
||||
@@ -324,6 +335,33 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
|
||||
return;
|
||||
}
|
||||
|
||||
static const uint32_t kAmdGpuId=0x1002;
|
||||
|
||||
static bool isAMDGPU(std::string dev_path) {
|
||||
|
||||
std::string vend_path = dev_path + "/device/vendor";
|
||||
if (!FileExists(vend_path.c_str())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ifstream fs;
|
||||
fs.open(vend_path);
|
||||
|
||||
if (!fs.is_open()) {
|
||||
return errno;
|
||||
}
|
||||
|
||||
uint32_t vendor_id;
|
||||
|
||||
fs >> std::hex >> vendor_id;
|
||||
|
||||
fs.close();
|
||||
|
||||
if (vendor_id == kAmdGpuId) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t RocmSMI::DiscoverDevices(void) {
|
||||
auto ret = 0;
|
||||
@@ -346,7 +384,14 @@ uint32_t RocmSMI::DiscoverDevices(void) {
|
||||
while (dentry != nullptr) {
|
||||
if (memcmp(dentry->d_name, kDeviceNamePrefix, strlen(kDeviceNamePrefix))
|
||||
== 0) {
|
||||
AddToDeviceList(dentry->d_name);
|
||||
std::string vend_str_path = kPathDRMRoot;
|
||||
vend_str_path += "/";
|
||||
vend_str_path += dentry->d_name;
|
||||
|
||||
if (isAMDGPU(vend_str_path) ||
|
||||
(init_options_ & RSMI_INIT_FLAG_ALL_GPUS)) {
|
||||
AddToDeviceList(dentry->d_name);
|
||||
}
|
||||
}
|
||||
dentry = readdir(drm_dir);
|
||||
}
|
||||
|
||||
@@ -138,7 +138,7 @@ void TestPciReadWrite::Run(void) {
|
||||
CHK_ERR_ASRT(ret)
|
||||
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\tInitial PCIe is " << bw.transfer_rate.current <<
|
||||
std::cout << "\tInitial PCIe BW index is " << bw.transfer_rate.current <<
|
||||
std::endl;
|
||||
}
|
||||
|
||||
|
||||
@@ -46,6 +46,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "gtest/gtest.h"
|
||||
@@ -79,6 +80,7 @@ static void SetFlags(TestBase *test) {
|
||||
|
||||
test->set_verbosity(sRSMIGlvalues->verbosity);
|
||||
test->set_dont_fail(sRSMIGlvalues->dont_fail);
|
||||
test->set_init_options(sRSMIGlvalues->init_options);
|
||||
}
|
||||
|
||||
|
||||
@@ -207,10 +209,30 @@ int main(int argc, char** argv) {
|
||||
settings.monitor_verbosity = 1;
|
||||
settings.num_iterations = 1;
|
||||
settings.dont_fail = false;
|
||||
settings.init_options = 0;
|
||||
|
||||
if (ProcessCmdline(&settings, argc, argv)) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
sRSMIGlvalues = &settings;
|
||||
ret = RUN_ALL_TESTS();
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
settings.init_options = RSMI_INIT_FLAG_ALL_GPUS;
|
||||
|
||||
std::cout << "****************************************" << std::endl;
|
||||
std::cout << "****************************************" << std::endl;
|
||||
std::cout << "****************************************" << std::endl;
|
||||
std::cout << "Re-running tests with init options: " << std::hex <<
|
||||
settings.init_options << std::endl;
|
||||
std::cout << "****************************************" << std::endl;
|
||||
std::cout << "****************************************" << std::endl;
|
||||
std::cout << "****************************************" << std::endl;
|
||||
settings.init_options = RSMI_INIT_FLAG_ALL_GPUS;
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@@ -84,7 +84,7 @@ void TestBase::SetUp(void) {
|
||||
MakeHeaderStr(kSetupLabel, &label);
|
||||
printf("\n\t%s\n", label.c_str());
|
||||
|
||||
err = rsmi_init(0);
|
||||
err = rsmi_init(init_options());
|
||||
ASSERT_EQ(err, RSMI_STATUS_SUCCESS);
|
||||
|
||||
err = rsmi_num_monitor_devices(&num_monitor_devs_);
|
||||
|
||||
@@ -99,6 +99,12 @@ class TestBase {
|
||||
uint32_t num_monitor_devs(void) const {
|
||||
return num_monitor_devs_;
|
||||
}
|
||||
void set_init_options(uint64_t x) {
|
||||
init_options_ = x;
|
||||
}
|
||||
uint64_t init_options(void) const {
|
||||
return init_options_;
|
||||
}
|
||||
|
||||
protected:
|
||||
void PrintDeviceHeader(uint32_t dv_ind);
|
||||
@@ -109,6 +115,7 @@ class TestBase {
|
||||
std::string title_; ///< Displayed title of test
|
||||
uint32_t verbosity_; ///< How much additional output to produce
|
||||
bool dont_fail_; ///< Don't quit test on individual failure if true
|
||||
uint64_t init_options_; ///< rsmi initialization options
|
||||
};
|
||||
|
||||
#define IF_VERB(VB) if (verbosity() && verbosity() >= (TestBase::VERBOSE_##VB))
|
||||
|
||||
@@ -55,6 +55,7 @@ struct RSMITstGlobals {
|
||||
uint32_t verbosity;
|
||||
uint32_t monitor_verbosity;
|
||||
uint32_t num_iterations;
|
||||
uint64_t init_options;
|
||||
bool dont_fail;
|
||||
};
|
||||
|
||||
|
||||
新增問題並參考
封鎖使用者