Partition EBUSY with RSMI_STATUS_BUSY & invalid GPU Metrics check

* Updates:
   - [API/CLI] rsmi_dev_*_partition_set &
     rsmi_dev_*_partition_reset - exposed RSMI_STATUS_BUSY for
     EBUSY writes + cleaned up accidental map insertions
     (maplookup[] can insert values that are not in the map,
     map.at(key) fixes this potential issue)
   - [API] rsmi_dev_gpu_metrics_info_get() - returns
     RSMI_STATUS_NOT_SUPPORTED for unsupported metric tables
     outside of 1v1/1v2/1v3
   - [API] writeDevInfoStr() - exposes RSMI_STATUS_BUSY for
     EBUSY write errors; kept backward compatibility
     for other writes which do not care about these states
   - [API] rsmi_dev_od_volt_info_get()
      & rsmi_dev_od_volt_curve_regions_get() have better logging
     + Expose more details on why they are erroring
   - [Utils/logs/example] Expose AMD GPU gfx target version to aid in
     system troubleshooting
   - [Utils] Added test methods that look at od volt
     freq & regions into here - for easier access across
     several tests
   - [Utils] Updated getRSMIStatusString(new argument - fullstatus;
     default to true for backwards compatibility)
     -> true shows shortened RSMI STATUS response
   - [Utils] Added splitString to cut out noisy return responses
     (used in getRSMIStatusString(), when fullstatus = true)
   - [Utils] Added getFileCreationDate() to expose build date
     of the library - helpful for local builds or experimental builds
   - [Utils] Macro cleanup
   - [Example] Added a few gpu_metric checks - helpful for upcoming
     updates
   - [Device] SYSFS/DebugFS - now have better r/w displayed in logs
   - [LOGS] Expose library build date - see above for details
   - [Tests] Add more warnings/errors to test builds
   - [Tests] Moved up Partition tests for ordered test runs - helped
     identify issues with GPU BUSY writes
   - [Tests] compute_partition_read_write - handles RSMI_STATUS_BUSY
     with waits for busy status found & cleaned up how we checked
     for partition changes - with RSMI responses exposed more clearly
   - [Tests] perf_determinism - multi gpu now properly runs through
     with full resets as needed
   - [Tests] volt_freq_curv_read - better error handling with more
     verbose output

Change-Id: Ie94c6abb6a9aab95c345996d3ad3843cf6734977
Signed-off-by: Charis Poag <Charis.Poag@amd.com>


[ROCm/amdsmi commit: 57b6135e54]
Этот коммит содержится в:
Charis Poag
2023-10-23 21:37:31 -05:00
родитель 791ad2407c
Коммит 41f5a26408
18 изменённых файлов: 701 добавлений и 215 удалений
+1 -1
Просмотреть файл
@@ -72,7 +72,7 @@ endif()
## Compiler flags
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti")
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -std=c++17")
if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
+18 -10
Просмотреть файл
@@ -363,16 +363,16 @@ typedef rsmi_clk_type_t rsmi_clk_type;
*/
typedef enum {
RSMI_COMPUTE_PARTITION_INVALID = 0,
RSMI_COMPUTE_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
//!< shared memory
RSMI_COMPUTE_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
//!< together with shared memory
RSMI_COMPUTE_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
//!< together with shared memory
RSMI_COMPUTE_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
//!< work together with shared memory
RSMI_COMPUTE_PARTITION_QPX //!< Quad GPU mode (QPX)- Quarter XCCs
//!< work together with shared memory
RSMI_COMPUTE_PARTITION_CPX = 1, //!< Core mode (CPX)- Per-chip XCC with
//!< shared memory
RSMI_COMPUTE_PARTITION_SPX = 2, //!< Single GPU mode (SPX)- All XCCs work
//!< together with shared memory
RSMI_COMPUTE_PARTITION_DPX = 3, //!< Dual GPU mode (DPX)- Half XCCs work
//!< together with shared memory
RSMI_COMPUTE_PARTITION_TPX = 4, //!< Triple GPU mode (TPX)- One-third XCCs
//!< work together with shared memory
RSMI_COMPUTE_PARTITION_QPX = 5, //!< Quad GPU mode (QPX)- Quarter XCCs
//!< work together with shared memory
} rsmi_compute_partition_type_t;
/// \cond Ignore in docs.
typedef rsmi_compute_partition_type_t rsmi_compute_partition_type;
@@ -3783,6 +3783,8 @@ rsmi_dev_compute_partition_get(uint32_t dv_ind, char *compute_partition,
* unavailable for current device
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_BUSY A resource or mutex could not be acquired
* because it is already being used - device is busy
*
*/
rsmi_status_t
@@ -3802,6 +3804,8 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
* @retval ::RSMI_STATUS_PERMISSION function requires root access
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_BUSY A resource or mutex could not be acquired
* because it is already being used - device is busy
*
*/
rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind);
@@ -3866,6 +3870,8 @@ rsmi_dev_memory_partition_get(uint32_t dv_ind, char *memory_partition,
* support this function
* @retval ::RSMI_STATUS_AMDGPU_RESTART_ERR could not successfully restart
* the amdgpu driver
* @retval ::RSMI_STATUS_BUSY A resource or mutex could not be acquired
* because it is already being used - device is busy
*
*/
rsmi_status_t
@@ -3887,6 +3893,8 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
* support this function
* @retval ::RSMI_STATUS_AMDGPU_RESTART_ERR could not successfully restart
* the amdgpu driver
* @retval ::RSMI_STATUS_BUSY A resource or mutex could not be acquired
* because it is already being used - device is busy
*
*/
rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind);
+2 -1
Просмотреть файл
@@ -260,7 +260,8 @@ class Device {
std::vector<std::string> *retVec);
int readDevInfoBinary(DevInfoTypes type, std::size_t b_size,
void *p_binary_data);
int writeDevInfoStr(DevInfoTypes type, std::string valStr);
int writeDevInfoStr(DevInfoTypes type, std::string valStr,
bool returnWriteErr = false);
rsmi_status_t run_amdgpu_property_reinforcement_query(const AMDGpuPropertyQuery_t& amdgpu_property_query);
+3
Просмотреть файл
@@ -84,6 +84,9 @@ class KFDNode {
int get_total_memory(uint64_t* total);
int get_used_memory(uint64_t* used);
// Get gfx target version from kfd
int get_gfx_target_version(uint64_t* gfx_target_version);
private:
uint32_t node_indx_;
uint32_t amdgpu_dev_index_;
+16 -5
Просмотреть файл
@@ -51,6 +51,8 @@
#include <sstream>
#include <iomanip>
#include <type_traits>
#include <tuple>
#include <queue>
#include "rocm_smi/rocm_smi_device.h"
@@ -97,10 +99,10 @@ rsmi_status_t
GetDevBinaryBlob(amd::smi::DevInfoTypes type,
uint32_t dv_ind, std::size_t b_size, void* p_binary_data);
rsmi_status_t ErrnoToRsmiStatus(int err);
std::string getRSMIStatusString(rsmi_status_t ret);
std::string getRSMIStatusString(rsmi_status_t ret, bool fullStatus = true);
std::tuple<bool, std::string, std::string, std::string, std::string,
std::string, std::string, std::string, std::string,
std::string, std::string, std::string>
std::string, std::string, std::string, std::string, std::string>
getSystemDetails(void);
void logSystemDetails(void);
rsmi_status_t getBDFString(uint64_t bdf_id, std::string& bfd_str);
@@ -109,11 +111,20 @@ void logHexDump(const char *desc, const void *addr, const size_t len,
bool isSystemBigEndian();
std::string getBuildType();
std::string getMyLibPath();
std::string getFileCreationDate(std::string path);
int subDirectoryCountInPath(const std::string path);
std::queue<std::string> getAllDeviceGfxVers();
std::string monitor_type_string(amd::smi::MonitorTypes type);
std::string power_type_string(RSMI_POWER_TYPE type);
std::string splitString(std::string str, char delim);
std::string print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv);
std::string print_rsmi_od_volt_freq_regions(uint32_t num_regions,
rsmi_freq_volt_region_t *regions);
bool is_sudo_user();
rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind,
std::string *gfx_version);
template <typename T>
std::string print_int_as_hex(T i, bool showHexNotation=true) {
std::string print_int_as_hex(T i, bool showHexNotation = true) {
std::stringstream ss;
if (showHexNotation) {
ss << "0x" << std::setfill('0') << std::setw(sizeof(T) * 2) << std::hex;
@@ -132,7 +143,7 @@ std::string print_int_as_hex(T i, bool showHexNotation=true) {
}
ss << std::dec;
return ss.str();
};
}
template <typename T>
std::string print_unsigned_int(T i) {
@@ -263,7 +274,7 @@ class ScopedAcquire {
LockType* lock_;
bool doRelease;
/// @brief: Disable copiable and assignable ability.
DISALLOW_COPY_AND_ASSIGN(ScopedAcquire);
DISALLOW_COPY_AND_ASSIGN(ScopedAcquire)
};
} // namespace smi
+12
Просмотреть файл
@@ -966,6 +966,9 @@ def resetComputePartition(deviceList):
printLog(device, 'Permission denied', None)
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None)
elif ret == rsmi_status_t.RSMI_STATUS_BUSY:
printLog(device, 'Device is currently busy, try again later',
None)
else:
rsmi_ret_ok(ret, device, 'reset_compute_partition')
printErrLog(device, 'Failed to reset the compute partition to boot state')
@@ -1002,6 +1005,9 @@ def resetMemoryPartition(deviceList):
printLog(device, 'Permission denied', None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_BUSY:
printLog(device, 'Device is currently busy, try again later',
None)
else:
rsmi_ret_ok(ret, device, 'reset_memory_partition')
printErrLog(device, 'Failed to reset memory partition to boot state')
@@ -1603,6 +1609,9 @@ def setComputePartition(deviceList, computePartitionType):
%computePartitionType, None)
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None)
elif ret == rsmi_status_t.RSMI_STATUS_BUSY:
printLog(device, 'Device is currently busy, try again later',
None)
else:
rsmi_ret_ok(ret, device, 'set_compute_partition')
printErrLog(device, 'Failed to retrieve compute partition, even though device supports it.')
@@ -1673,6 +1682,9 @@ def setMemoryPartition(deviceList, memoryPartition):
printLog(device, 'Permission denied', None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_NOT_SUPPORTED:
printLog(device, 'Not supported on the given system', None, addExtraLine)
elif ret == rsmi_status_t.RSMI_STATUS_BUSY:
printLog(device, 'Device is currently busy, try again later',
None, addExtraLine)
else:
rsmi_ret_ok(ret, device, 'set_memory_partition')
printErrLog(device, 'Failed to retrieve memory partition, even though device supports it.')
+95 -33
Просмотреть файл
@@ -156,6 +156,15 @@
} \
}
void print_function_header_with_rsmi_ret(
rsmi_status_t myReturn, std::string header = "") {
std::cout << "\t** ";
if (!header.empty()) {
std::cout << header << ": ";
}
std::cout << amd::smi::getRSMIStatusString(myReturn, false) << "\n";
}
static void print_test_header(const char *str, uint32_t dv_ind) {
std::cout << "********************************" << "\n";
std::cout << "*** " << str << "\n";
@@ -254,14 +263,24 @@ perf_level_string(rsmi_dev_perf_level_t perf_lvl) {
}
}
static bool isUserRunningAsSudo() {
bool isRunningWithSudo = false;
auto myUID = getuid();
auto myPrivledges = geteuid();
if ((myUID == myPrivledges) && (myPrivledges == 0)) {
isRunningWithSudo = true;
static const std::string
clock_type_string(rsmi_clk_type_t clk) {
switch (clk) {
case RSMI_CLK_TYPE_SYS:
return "RSMI_CLK_TYPE_SYS";
case RSMI_CLK_TYPE_DF:
return "RSMI_CLK_TYPE_DF";
case RSMI_CLK_TYPE_DCEF:
return "RSMI_CLK_TYPE_DCEF";
case RSMI_CLK_TYPE_SOC:
return "RSMI_CLK_TYPE_SOC";
case RSMI_CLK_TYPE_MEM:
return "RSMI_CLK_TYPE_MEM";
case RSMI_CLK_TYPE_PCIE:
return "RSMI_CLK_TYPE_PCIE";
default:
return "RSMI_CLK_INVALID";
}
return isRunningWithSudo;
}
static bool isFileWritable(rsmi_status_t response) {
@@ -271,7 +290,7 @@ static bool isFileWritable(rsmi_status_t response) {
// isFileWritable(ret) - intends to capture this
// response situation.
bool fileWritable = true;
if (isUserRunningAsSudo() && (response == RSMI_STATUS_PERMISSION)) {
if (amd::smi::is_sudo_user() && (response == RSMI_STATUS_PERMISSION)) {
std::cout << "[WARN] User is running with sudo "
<< "permissions, file is not writable." << "\n";
fileWritable = false;
@@ -574,9 +593,19 @@ static rsmi_status_t test_set_freq(uint32_t dv_ind) {
}
static void print_frequencies(rsmi_frequencies_t *f) {
assert(f != nullptr);
bool hasDeepSleep = false;
if (f == nullptr) {
std::cout << "Freq was nullptr\n";
return;
}
for (uint32_t j = 0; j < f->num_supported; ++j) {
std::cout << "\t** " << j << ": " << std::to_string(f->frequency[j]);
if (f->has_deep_sleep && j == 0) {
std::cout << "\t** S: " << std::to_string(f->frequency[j]);
hasDeepSleep = true;
} else {
std::cout << "\t** " << (hasDeepSleep ? j-1 : j)
<< ": " << std::to_string(f->frequency[j]);
}
if (j == f->current) {
std::cout << " *";
}
@@ -714,6 +743,7 @@ int main() {
rsmi_frequencies_t f;
uint32_t num_monitor_devs = 0;
rsmi_gpu_metrics_t p;
std::string val_str;
RSMI_POWER_TYPE power_type = RSMI_INVALID_POWER;
rsmi_num_monitor_devices(&num_monitor_devs);
@@ -725,6 +755,8 @@ int main() {
ret = rsmi_dev_revision_get(i, &val_ui16);
CHK_RSMI_RET_I(ret)
std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << val_ui16 << "\n";
ret = amd::smi::rsmi_get_gfx_target_version(i , &val_str);
std::cout << "\t**Target Graphics Version: " << val_str << "\n";
char current_compute_partition[256];
current_compute_partition[0] = '\0';
@@ -736,7 +768,7 @@ int main() {
? "UNKNOWN" : current_compute_partition);
if (ret != RSMI_STATUS_SUCCESS) {
std::cout << ", RSMI_STATUS = ";
} else {
} else {
std::cout << "\n";
}
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
@@ -773,8 +805,38 @@ int main() {
}
ret = rsmi_dev_gpu_metrics_info_get(i, &p);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**GPU METRICS" << "\n";
print_test_header("GPU METRICS", i);
print_function_header_with_rsmi_ret(ret,
"rsmi_dev_gpu_metrics_info_get(" + std::to_string(i) + ", &p)");
std::cout << "\t**p.average_gfxclk_frequency: " << std::dec
<< p.average_gfxclk_frequency << "\n";
std::cout << "\t**p.average_socclk_frequency: " << std::dec
<< p.average_socclk_frequency << "\n";
std::cout << "\t**p.average_uclk_frequency: " << std::dec
<< p.average_uclk_frequency << "\n";
std::cout << "\t**p.average_vclk0_frequency: " << std::dec
<< p.average_vclk0_frequency << "\n";
std::cout << "\t**p.average_dclk0_frequency: " << std::dec
<< p.average_dclk0_frequency << "\n";
std::cout << "\t**p.average_vclk1_frequency: " << std::dec
<< p.average_vclk1_frequency << "\n";
std::cout << "\t**p.average_dclk1_frequency: " << std::dec
<< p.average_dclk1_frequency << "\n";
std::cout << "\t**p.current_gfxclk: " << std::dec
<< p.current_gfxclk << "\n";
std::cout << "\t**p.current_socclk: " << std::dec
<< p.current_socclk << "\n";
std::cout << "\t**p.current_uclk: " << std::dec
<< p.current_uclk << "\n";
std::cout << "\t**p.current_vclk0: " << std::dec
<< p.current_vclk0 << "\n";
std::cout << "\t**p.current_dclk0: " << std::dec
<< p.current_dclk0 << "\n";
std::cout << "\t**p.current_vclk1: " << std::dec
<< p.current_vclk1 << "\n";
std::cout << "\t**p.current_dclk1: " << std::dec
<< p.current_dclk1 << "\n";
ret = rsmi_dev_perf_level_get(i, &pfl);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
@@ -784,25 +846,25 @@ int main() {
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**OverDrive Level:" << val_ui32 << "\n";
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_MEM, &f);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**Supported GPU Memory clock frequencies: ";
std::cout << f.num_supported << "\n";
print_frequencies(&f);
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SYS, &f);
CHK_AND_PRINT_RSMI_ERR_RET(ret)
std::cout << "\t**Supported GPU clock frequencies: ";
std::cout << f.num_supported << "\n";
print_frequencies(&f);
ret = rsmi_dev_gpu_clk_freq_get(i, RSMI_CLK_TYPE_SOC, &f);
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
std::cout << "\t**Supported GPU clock frequencies (SOC clk): ";
std::cout << f.num_supported << "\n";
std::cout << "\t**Current value (SOC clk): ";
std::cout << f.current << "\n";
print_frequencies(&f);
print_test_header("GPU Clocks", i);
for (int clkType = static_cast<int>(RSMI_CLK_TYPE_SYS);
clkType <= static_cast<int>(RSMI_CLK_TYPE_PCIE);
clkType++) {
rsmi_clk_type_t type = static_cast<rsmi_clk_type_t>(clkType);
ret = rsmi_dev_gpu_clk_freq_get(i, type, &f);
print_function_header_with_rsmi_ret(ret,
"rsmi_dev_gpu_clk_freq_get(" + std::to_string(i) +
", " + clock_type_string(type) + ", &f)");
if (ret != RSMI_STATUS_SUCCESS) {
continue;
}
std::cout << "\t** " << clock_type_string(type)
<< " - Supported # of freqs: ";
std::cout << f.num_supported << "\n";
std::cout << "\t** " << clock_type_string(type) << " f.current: "
<< f.current << "\n";
print_frequencies(&f);
}
std::cout << "\t**Monitor name: ";
char name[128];
@@ -892,7 +954,7 @@ int main() {
}
std::cout << "***** Testing write api's" << "\n";
if (isUserRunningAsSudo() == false) {
if (amd::smi::is_sudo_user() == false) {
std::cout << "Write APIs require users to execute with sudo. "
<< "Cannot proceed." << "\n";
return 0;
+59 -9
Просмотреть файл
@@ -929,6 +929,9 @@ rsmi_status_t
rsmi_perf_determinism_mode_set(uint32_t dv_ind, uint64_t clkvalue) {
TRY
DEVICE_MUTEX
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======";
LOG_TRACE(ss);
// Set perf. level to performance determinism so that we can then set the power profile
rsmi_status_t ret = rsmi_dev_perf_level_set_v1(dv_ind,
@@ -1510,6 +1513,9 @@ rsmi_status_t rsmi_dev_od_volt_info_set(uint32_t dv_ind, uint32_t vpoint,
static void get_vc_region(uint32_t start_ind,
std::vector<std::string> *val_vec, rsmi_freq_volt_region_t *p) {
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======";
LOG_TRACE(ss);
assert(p != nullptr);
assert(val_vec != nullptr);
THROW_IF_NULLPTR_DEREF(p)
@@ -1520,6 +1526,9 @@ static void get_vc_region(uint32_t start_ind,
assert((*val_vec)[kOD_OD_RANGE_label_array_index] == "OD_RANGE:");
if ((val_vec->size() < kOD_OD_RANGE_label_array_index + 2) ||
((*val_vec)[kOD_OD_RANGE_label_array_index] != "OD_RANGE:") ) {
ss << __PRETTY_FUNCTION__ << " | ======= end ======= | returning "
<< getRSMIStatusString(RSMI_STATUS_UNEXPECTED_DATA);
LOG_TRACE(ss);
throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_DATA, __FUNCTION__);
}
od_value_pair_str_to_range((*val_vec)[start_ind], &p->freq_range);
@@ -1539,6 +1548,7 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind,
TRY
std::vector<std::string> val_vec;
rsmi_status_t ret;
std::ostringstream ss;
assert(num_regions != nullptr);
assert(p != nullptr);
@@ -1547,12 +1557,20 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind,
ret = GetDevValueVec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | Issue: could not retreive kDevPowerODVoltage" << "; returning "
<< getRSMIStatusString(ret);
LOG_ERROR(ss);
return ret;
}
// This is a work-around to handle systems where kDevPowerODVoltage is not
// fully supported yet.
if (val_vec.size() < 2) {
ss << __PRETTY_FUNCTION__
<< " | Issue: val_vec.size() < 2" << "; returning "
<< getRSMIStatusString(RSMI_STATUS_NOT_YET_IMPLEMENTED);
LOG_ERROR(ss);
return RSMI_STATUS_NOT_YET_IMPLEMENTED;
}
@@ -1560,8 +1578,17 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind,
assert((val_vec_size - kOD_VDDC_CURVE_start_index) > 0);
assert((val_vec_size - kOD_VDDC_CURVE_start_index)%2 == 0);
ss << __PRETTY_FUNCTION__
<< " | val_vec_size = " << std::dec
<< val_vec_size
<< " | kOD_VDDC_CURVE_start_index = " << kOD_VDDC_CURVE_start_index;
LOG_DEBUG(ss);
if (((val_vec_size - kOD_VDDC_CURVE_start_index) <= 0) ||
(((val_vec_size - kOD_VDDC_CURVE_start_index)%2 != 0))) {
ss << __PRETTY_FUNCTION__ << " | Issue: od vdd curve returned unexpected "
<< "data" << "; returning "
<< getRSMIStatusString(RSMI_STATUS_UNEXPECTED_SIZE);
LOG_ERROR(ss);
throw amd::smi::rsmi_exception(RSMI_STATUS_UNEXPECTED_SIZE, __FUNCTION__);
}
@@ -2749,6 +2776,9 @@ rsmi_dev_od_volt_info_get(uint32_t dv_ind, rsmi_od_volt_freq_data_t *odv) {
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ss);
DEVICE_MUTEX
if (odv == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
CHK_SUPPORT_NAME_ONLY(odv)
rsmi_status_t ret = get_od_clk_volt_info(dv_ind, odv);
@@ -2779,7 +2809,7 @@ rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind,
uint32_t *num_regions, rsmi_freq_volt_region_t *buffer) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
ss << __PRETTY_FUNCTION__ << " | ======= start =======";
LOG_TRACE(ss);
CHK_SUPPORT_NAME_ONLY((num_regions == nullptr || buffer == nullptr) ?
@@ -2791,6 +2821,12 @@ rsmi_status_t rsmi_dev_od_volt_curve_regions_get(uint32_t dv_ind,
DEVICE_MUTEX
rsmi_status_t ret = get_od_clk_volt_curve_regions(dv_ind, num_regions,
buffer);
if (*num_regions == 0) {
ret = RSMI_STATUS_NOT_SUPPORTED;
}
ss << __PRETTY_FUNCTION__ << " | ======= end ======= | returning "
<< getRSMIStatusString(ret);
LOG_TRACE(ss);
return ret;
CATCH
}
@@ -4468,7 +4504,7 @@ get_compute_partition(uint32_t dv_ind, std::string &compute_partition) {
return ret;
}
switch (mapStringToRSMIComputePartitionTypes[compute_partition_str]) {
switch (mapStringToRSMIComputePartitionTypes.at(compute_partition_str)) {
case RSMI_COMPUTE_PARTITION_CPX:
case RSMI_COMPUTE_PARTITION_SPX:
case RSMI_COMPUTE_PARTITION_DPX:
@@ -4585,9 +4621,12 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
ss << __PRETTY_FUNCTION__ << "| ======= start =======";
LOG_TRACE(ss);
REQUIRE_ROOT_ACCESS
if (!amd::smi::is_sudo_user()) {
return RSMI_STATUS_PERMISSION;
}
DEVICE_MUTEX
std::string newComputePartitionStr
= mapRSMIToStringComputePartitionTypes[compute_partition];
= mapRSMIToStringComputePartitionTypes.at(compute_partition);
std::string currentComputePartition;
switch (compute_partition) {
@@ -4605,6 +4644,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
<< " | Device #: " << dv_ind
<< " | Type: "
<< devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Data: " << newComputePartitionStr
<< " | Cause: requested setting was invalid"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
@@ -4623,6 +4663,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
<< " | Device #: " << dv_ind
<< " | Type: "
<< devInfoTypesStrings.at(amd::smi::kDevComputePartition)
<< " | Data: " << newComputePartitionStr
<< " | Cause: not an available compute partition setting"
<< " | Returning = "
<< getRSMIStatusString(available_ret) << " |";
@@ -4650,7 +4691,7 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
return ret_get;
}
rsmi_compute_partition_type_t currRSMIComputePartition
= mapStringToRSMIComputePartitionTypes[currentComputePartition];
= mapStringToRSMIComputePartitionTypes.at(currentComputePartition);
if (currRSMIComputePartition == compute_partition) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
@@ -4665,6 +4706,15 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
return RSMI_STATUS_SUCCESS;
}
ss << __PRETTY_FUNCTION__ << " | about to try writing |"
<< newComputePartitionStr
<< "| size of string = " << newComputePartitionStr.size()
<< "| size of c-string = "<< std::dec
<< sizeof(newComputePartitionStr.c_str())/sizeof(newComputePartitionStr[0])
<< "| sizeof string = " << std::dec
<< sizeof(newComputePartitionStr);
LOG_DEBUG(ss);
GET_DEV_FROM_INDX
int ret = dev->writeDevInfo(amd::smi::kDevComputePartition,
newComputePartitionStr);
@@ -4699,7 +4749,7 @@ static rsmi_status_t get_memory_partition(uint32_t dv_ind,
return ret;
}
switch (mapStringToMemoryPartitionTypes[val_str]) {
switch (mapStringToMemoryPartitionTypes.at(val_str)) {
case RSMI_MEMORY_PARTITION_NPS1:
case RSMI_MEMORY_PARTITION_NPS2:
case RSMI_MEMORY_PARTITION_NPS4:
@@ -4755,7 +4805,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
}
std::string newMemoryPartition
= mapRSMIToStringMemoryPartitionTypes[memory_partition];
= mapRSMIToStringMemoryPartitionTypes.at(memory_partition);
std::string currentMemoryPartition;
switch (memory_partition) {
@@ -4798,7 +4848,7 @@ rsmi_dev_memory_partition_set(uint32_t dv_ind,
return ret_get;
}
rsmi_memory_partition_type_t currRSMIMemoryPartition
= mapStringToMemoryPartitionTypes[currentMemoryPartition];
= mapStringToMemoryPartitionTypes.at(currentMemoryPartition);
if (currRSMIMemoryPartition == memory_partition) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
@@ -4942,7 +4992,7 @@ rsmi_status_t rsmi_dev_compute_partition_reset(uint32_t dv_ind) {
// Likely due to device not supporting it
if (bootState != "UNKNOWN") {
rsmi_compute_partition_type_t compute_partition =
mapStringToRSMIComputePartitionTypes[bootState];
mapStringToRSMIComputePartitionTypes.at(bootState);
ret = rsmi_dev_compute_partition_set(dv_ind, compute_partition);
}
ss << __PRETTY_FUNCTION__
@@ -4981,7 +5031,7 @@ rsmi_status_t rsmi_dev_memory_partition_reset(uint32_t dv_ind) {
// Likely due to device not supporting it
if (bootState != "UNKNOWN") {
rsmi_memory_partition_type_t memory_partition =
mapStringToMemoryPartitionTypes[bootState];
mapStringToMemoryPartitionTypes.at(bootState);
ret = rsmi_dev_memory_partition_set(dv_ind, memory_partition);
}
ss << __PRETTY_FUNCTION__
+68 -19
Просмотреть файл
@@ -598,14 +598,17 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
int ret = isRegularFile(sysfs_path, &reg_file);
if (ret != 0) {
ss << "File did not exist - SYSFS file (" << sysfs_path
ss << __PRETTY_FUNCTION__ << " | Issue: File did not exist - SYSFS file ("
<< sysfs_path
<< ") for DevInfoInfoType (" << RocmSMI::devInfoTypesStrings.at(type)
<< "), returning " << std::to_string(ret);
LOG_ERROR(ss);
return ret;
}
if (!reg_file) {
ss << "File is not a regular file - SYSFS file (" << sysfs_path << ") for "
ss << __PRETTY_FUNCTION__
<< " | Issue: File is not a regular file - SYSFS file ("
<< sysfs_path << ") for "
<< "DevInfoInfoType (" << RocmSMI::devInfoTypesStrings.at(type) << "),"
<< " returning ENOENT (" << std::strerror(ENOENT) << ")";
LOG_ERROR(ss);
@@ -615,7 +618,8 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
fs->open(sysfs_path);
if (!fs->is_open()) {
ss << "Could not open - SYSFS file (" << sysfs_path << ") for "
ss << __PRETTY_FUNCTION__
<< " | Issue: Could not open - SYSFS file (" << sysfs_path << ") for "
<< "DevInfoInfoType (" << RocmSMI::devInfoTypesStrings.at(type) << "), "
<< ", returning " << std::to_string(errno) << " ("
<< std::strerror(errno) << ")";
@@ -623,7 +627,8 @@ int Device::openSysfsFileStream(DevInfoTypes type, T *fs, const char *str) {
return errno;
}
ss << "Successfully opened SYSFS file (" << sysfs_path
ss << __PRETTY_FUNCTION__ << " | Successfully opened SYSFS file ("
<< sysfs_path
<< ") for DevInfoInfoType (" << RocmSMI::devInfoTypesStrings.at(type)
<< ")";
LOG_INFO(ss);
@@ -671,32 +676,51 @@ int Device::readDevInfoStr(DevInfoTypes type, std::string *retStr) {
ret = openSysfsFileStream(type, &fs);
if (ret != 0) {
ss << "Could not read device info string for DevInfoType ("
<< RocmSMI::devInfoTypesStrings.at(type)<< "), returning "
<< RocmSMI::devInfoTypesStrings.at(type) << "), returning "
<< std::to_string(ret);
LOG_ERROR(ss);
return ret;
}
fs >> *retStr;
std::string info = "Successfully read device info string for DevInfoType (" +
RocmSMI::devInfoTypesStrings.at(type) + "): " +
*retStr;
LOG_INFO(info);
fs.close();
ss << __PRETTY_FUNCTION__
<< "Successfully read device info string for DevInfoType (" +
RocmSMI::devInfoTypesStrings.at(type) + "): " + *retStr
<< " | "
<< (fs.is_open() ? " File stream is opened" : " File stream is closed")
<< " | " << (fs.bad() ? "[ERROR] Bad read operation" :
"[GOOD] No bad bit read, successful read operation")
<< " | " << (fs.fail() ? "[ERROR] Failed read - format error" :
"[GOOD] No fail - Successful read operation")
<< " | " << (fs.eof() ? "[ERROR] Failed read - EOF error" :
"[GOOD] No eof error - Successful read operation")
<< " | " << (fs.good() ? "[GOOD] read good - Successful read operation" :
"[ERROR] Failed read - good error");
LOG_INFO(ss);
return 0;
}
int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) {
auto tempPath = path_;
int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr,
bool returnWriteErr) {
// returnWriteErr = false, backwards compatability (old calls)
// returnWriteErr = true, improvement - allows us to detect errors
// when writing to file
// (such as EBUSY)
auto sysfs_path = path_;
sysfs_path += "/device/";
sysfs_path += kDevAttribNameMap.at(type);
std::ofstream fs;
int ret;
std::ostringstream ss;
fs.rdbuf()->pubsetbuf(nullptr,0);
fs.flush();
fs.rdbuf()->pubsetbuf(0, 0);
ret = openSysfsFileStream(type, &fs, valStr.c_str());
if (ret != 0) {
ss << "Could not write device info string (" << valStr
fs.close();
ss << __PRETTY_FUNCTION__ << " | Issue: Could not open fileStream; "
<< "Could not write device info string (" << valStr
<< ") for DevInfoType (" << RocmSMI::devInfoTypesStrings.at(type)
<< "), returning " << std::to_string(ret);
LOG_ERROR(ss);
@@ -705,19 +729,39 @@ int Device::writeDevInfoStr(DevInfoTypes type, std::string valStr) {
// We'll catch any exceptions in rocm_smi.cc code.
if (fs << valStr) {
fs.flush();
fs.close();
ss << "Successfully wrote device info string (" << valStr
<< ") for DevInfoType (" << RocmSMI::devInfoTypesStrings.at(type)
<< "), returning RSMI_STATUS_SUCCESS";
LOG_INFO(ss);
ret = RSMI_STATUS_SUCCESS;
} else {
ss << "Could not write device info string (" << valStr
if (returnWriteErr) {
ret = errno;
} else {
ret = RSMI_STATUS_NOT_SUPPORTED;
}
fs.flush();
fs.close();
ss << __PRETTY_FUNCTION__ << " | Issue: Could not write to file; "
<< "Could not write device info string (" << valStr
<< ") for DevInfoType (" << RocmSMI::devInfoTypesStrings.at(type)
<< "), returning RSMI_STATUS_NOT_SUPPORTED";
<< "), returning " << getRSMIStatusString(ErrnoToRsmiStatus(ret));
ss << " | "
<< (fs.is_open() ? "[ERROR] File stream open" :
"[GOOD] File stream closed")
<< " | " << (fs.bad() ? "[ERROR] Bad write operation" :
"[GOOD] No bad bit write, successful write operation")
<< " | " << (fs.fail() ? "[ERROR] Failed write - format error" :
"[GOOD] No fail - Successful write operation")
<< " | " << (fs.eof() ? "[ERROR] Failed write - EOF error" :
"[GOOD] No eof error - Successful write operation")
<< " | " << (fs.good() ?
"[GOOD] Write good - Successful write operation" :
"[ERROR] Failed write - good error");
LOG_ERROR(ss);
ret = RSMI_STATUS_NOT_SUPPORTED;
}
fs.close();
return ret;
}
@@ -756,6 +800,9 @@ int Device::writeDevInfo(DevInfoTypes type, uint64_t val) {
}
int Device::writeDevInfo(DevInfoTypes type, std::string val) {
auto sysfs_path = path_;
sysfs_path += "/device/";
sysfs_path += kDevAttribNameMap.at(type);
switch (type) {
case kDevGPUMClk:
case kDevDCEFClk:
@@ -764,9 +811,10 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
case kDevPCIEClk:
case kDevPowerODVoltage:
case kDevSOCClk:
return writeDevInfoStr(type, val);
case kDevComputePartition:
case kDevMemoryPartition:
return writeDevInfoStr(type, val);
return writeDevInfoStr(type, val, true);
default:
return EINVAL;
@@ -899,6 +947,7 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
std::string tempStr;
int ret;
int tmp_val;
std::ostringstream ss;
switch (type) {
case kDevDevID:
+6
Просмотреть файл
@@ -496,6 +496,12 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
// a specific version.
*smu = {};
uint8_t dev_content_revision = dev->gpu_metrics_ver().content_revision;
if (dev_content_revision != RSMI_GPU_METRICS_API_CONTENT_VER_1 ||
dev_content_revision != RSMI_GPU_METRICS_API_CONTENT_VER_2 ||
dev_content_revision != RSMI_GPU_METRICS_API_CONTENT_VER_3) {
return RSMI_STATUS_NOT_SUPPORTED;
}
if (dev->gpu_metrics_ver().content_revision ==
RSMI_GPU_METRICS_API_CONTENT_VER_1) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
+21
Просмотреть файл
@@ -971,5 +971,26 @@ int get_gpu_id(uint32_t node, uint64_t *gpu_id) {
return retVal;
}
// /sys/class/kfd/kfd/topology/nodes/*/properties | grep gfx_target_version
int KFDNode::get_gfx_target_version(uint64_t *gfx_target_version) {
std::ostringstream ss;
std::string properties_path = "/sys/class/kfd/kfd/topology/nodes/"
+ std::to_string(this->node_indx_) + "/properties";
uint64_t gfx_version = 0;
int ret = read_node_properties(this->node_indx_, "gfx_target_version",
&gfx_version);
*gfx_target_version = gfx_version;
ss << __PRETTY_FUNCTION__
<< " | File: " << properties_path
<< " | Successfully read node #" << std::to_string(this->node_indx_)
<< " for gfx_target_version"
<< " | Data (gfx_target_version) *gfx_target_version = "
<< std::to_string(*gfx_target_version)
<< " | return = " << std::to_string(ret)
<< " | ";
LOG_DEBUG(ss);
return ret;
}
} // namespace smi
} // namespace amd
+6
Просмотреть файл
@@ -445,6 +445,12 @@ RocmSMI::Initialize(uint64_t flags) {
// store each device boot partition state, if file doesn't exist
dev->storeDevicePartitions(dv_ind);
}
// Assists displaying GPU information after device enumeration
// Otherwise GPU related info will not be discoverable
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
logSystemDetails();
}
// Leaving below to help debug temp file issues
// displayAppTmpFilesContent();
std::string amdGPUDeviceList = displayAllDevicePaths(devices_);
+201 -7
Просмотреть файл
@@ -599,9 +599,19 @@ std::tuple<bool, std::string> readTmpFile(uint32_t dv_ind,
}
// wrapper to return string expression of a rsmi_status_t return
std::string getRSMIStatusString(rsmi_status_t ret) {
// rsmi_status_t ret - return value of RSMI API function
// bool fullStatus - defaults to true, set to false to chop off description
// Returns:
// string - if fullStatus == true, returns full decription of return value
// ex. 'RSMI_STATUS_SUCCESS: The function has been executed successfully.'
// string - if fullStatus == false, returns a minimalized return value
// ex. 'RSMI_STATUS_SUCCESS'
std::string getRSMIStatusString(rsmi_status_t ret, bool fullStatus) {
const char *err_str;
rsmi_status_string(ret, &err_str);
if (!fullStatus) {
return splitString(std::string(err_str), ':');
}
return std::string(err_str);
}
@@ -620,9 +630,13 @@ std::string getRSMIStatusString(rsmi_status_t ret) {
// Expressed as big endian or little endian.
// Big Endian (BE), multi-bit symbols encoded as big endian (MSB first)
// Little Endian (LE), multi-bit symbols encoded as little endian (LSB first)
// string rocm_lib_path = Path to library
// string rocm_build_type = Release or debug
// string rocm_build_date = Creation date of library
// string dev_gfx_versions = GPU target graphics version
std::tuple<bool, std::string, std::string, std::string, std::string,
std::string, std::string, std::string, std::string,
std::string, std::string, std::string>
std::string, std::string, std::string, std::string, std::string>
getSystemDetails(void) {
struct utsname buf;
bool errorDetected = false;
@@ -637,7 +651,9 @@ std::tuple<bool, std::string, std::string, std::string, std::string,
std::string endianness = "<undefined>";
std::string rocm_lib_path = "<undefined>";
std::string rocm_build_type = "<undefined>";
std::string rocm_build_date = "<undefined>";
std::string rocm_env_variables = "<undefined>";
std::string dev_gfx_versions = "<undefined>";
if (uname(&buf) < 0) {
errorDetected = true;
@@ -674,11 +690,20 @@ std::tuple<bool, std::string, std::string, std::string, std::string,
}
rocm_build_type = getBuildType();
rocm_lib_path = getMyLibPath();
rocm_build_date = getFileCreationDate(rocm_lib_path);
rocm_env_variables = RocmSMI::getInstance().getRSMIEnvVarInfo();
std::queue<std::string> devGraphicsVersions = getAllDeviceGfxVers();
if (devGraphicsVersions.empty() == false) {
dev_gfx_versions = "";
while (devGraphicsVersions.empty() == false) {
dev_gfx_versions += "\n\t" + devGraphicsVersions.front();
devGraphicsVersions.pop();
}
}
return std::make_tuple(errorDetected, sysname, nodename, release,
version, machine, domainName, os_distribution,
endianness, rocm_build_type, rocm_lib_path,
rocm_env_variables);
rocm_build_date, rocm_env_variables, dev_gfx_versions);
}
// If logging is enabled through RSMI_LOGGING environment variable.
@@ -687,10 +712,11 @@ void logSystemDetails(void) {
std::ostringstream ss;
bool errorDetected;
std::string sysname, node, release, version, machine, domain, distName,
endianness, rocm_build_type, lib_path, rocm_env_vars;
endianness, rocm_build_type, lib_path, build_date, rocm_env_vars,
dev_gfx_versions;
std::tie(errorDetected, sysname, node, release, version, machine, domain,
distName, endianness, rocm_build_type, lib_path,
rocm_env_vars) = getSystemDetails();
distName, endianness, rocm_build_type, lib_path, build_date,
rocm_env_vars, dev_gfx_versions) = getSystemDetails();
if (errorDetected == false) {
ss << "====== Gathered system details ============\n"
<< "SYSTEM NAME: " << sysname << "\n"
@@ -703,7 +729,9 @@ void logSystemDetails(void) {
<< "ENDIANNESS: " << endianness << "\n"
<< "ROCM BUILD TYPE: " << rocm_build_type << "\n"
<< "ROCM-SMI-LIB PATH: " << lib_path << "\n"
<< "ROCM ENV VARIABLES: " << rocm_env_vars << "\n";
<< "ROCM-SMI-LIB BUILD DATE: " << build_date << "\n"
<< "ROCM ENV VARIABLES: " << rocm_env_vars
<< "AMD GFX VERSIONS: " << dev_gfx_versions << "\n";
LOG_INFO(ss);
} else {
ss << "====== Gathered system details ============\n"
@@ -831,6 +859,13 @@ std::string getMyLibPath(void) {
return path;
}
std::string getFileCreationDate(std::string path) {
struct stat t_stat;
stat(path.c_str(), &t_stat);
struct tm *timeinfo = localtime(&t_stat.st_ctime); // NOLINT
return removeNewLines(std::string(asctime(timeinfo))); // NOLINT
}
rsmi_status_t getBDFString(uint64_t bdf_id, std::string& bfd_str)
{
auto result = rsmi_status_t::RSMI_STATUS_SUCCESS;
@@ -974,5 +1009,164 @@ std::string power_type_string(RSMI_POWER_TYPE type) {
return powerTypesToString.at(type);
}
std::string splitString(std::string str, char delim) {
std::vector<std::string> tokens;
std::stringstream ss(str);
std::string token;
if (str.empty()) {
return "";
}
while (std::getline(ss, token, delim)) {
tokens.push_back(token);
return token; // return 1st match
}
}
static std::string pt_rng_Mhz(std::string title, rsmi_range *r) {
std::ostringstream ss;
if (r == nullptr) {
ss << "pt_rng_Mhz | rsmi_range r = nullptr\n";
return ss.str();
}
ss << title;
ss << r->lower_bound/1000000 << " to "
<< r->upper_bound/1000000 << " MHz" << "\n";
return ss.str();
}
static std::string pt_rng_mV(std::string title, rsmi_range *r) {
std::ostringstream ss;
if (r == nullptr) {
ss << "pt_rng_mV | rsmi_range r = nullptr\n";
return ss.str();
}
ss << title;
ss << r->lower_bound << " to " << r->upper_bound
<< " mV" << "\n";
return ss.str();
}
static std::string print_pnt(rsmi_od_vddc_point_t *pt) {
std::ostringstream ss;
ss << "\t\t** Frequency: " << pt->frequency/1000000 << " MHz\n";
ss << "\t\t** Voltage: " << pt->voltage << " mV\n";
return ss.str();
}
static std::string pt_vddc_curve(rsmi_od_volt_curve *c) {
std::ostringstream ss;
if (c == nullptr) {
ss << "pt_vddc_curve | rsmi_od_volt_curve c = nullptr\n";
return ss.str();
}
for (uint32_t i = 0; i < RSMI_NUM_VOLTAGE_CURVE_POINTS; ++i) {
ss << print_pnt(&c->vc_points[i]);
}
return ss.str();
}
std::string print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv) {
std::ostringstream ss;
if (odv == nullptr) {
ss << "rsmi_od_volt_freq_data_t odv = nullptr\n";
return ss.str();
}
ss << pt_rng_Mhz("\t**Current SCLK frequency range: ", &odv->curr_sclk_range);
ss << pt_rng_Mhz("\t**Current MCLK frequency range: ", &odv->curr_mclk_range);
ss << pt_rng_Mhz("\t**Min/Max Possible SCLK frequency range: ",
&odv->sclk_freq_limits);
ss << pt_rng_Mhz("\t**Min/Max Possible MCLK frequency range: ",
&odv->mclk_freq_limits);
ss << "\t**Current Freq/Volt. curve: " << "\n";
ss << pt_vddc_curve(&odv->curve);
ss << "\t**Number of Freq./Volt. regions: " << odv->num_regions << "\n\n";
return ss.str();
}
std::string print_odv_region(rsmi_freq_volt_region_t *region) {
std::ostringstream ss;
ss << pt_rng_Mhz("\t\tFrequency range: ", &region->freq_range);
ss << pt_rng_mV("\t\tVoltage range: ", &region->volt_range);
return ss.str();
}
std::string print_rsmi_od_volt_freq_regions(uint32_t num_regions,
rsmi_freq_volt_region_t *regions) {
std::ostringstream ss;
if (regions == nullptr) {
ss << "rsmi_freq_volt_region_t regions = nullptr\n";
return ss.str();
}
for (uint32_t i = 0; i < num_regions; ++i) {
ss << "\tRegion " << i << ": " << "\n";
ss << print_odv_region(&regions[i]);
}
return ss.str();
}
bool is_sudo_user() {
std::ostringstream ss;
bool isRunningWithSudo = false;
auto myUID = getuid();
auto myPrivledges = geteuid();
if ((myUID == myPrivledges) && (myPrivledges == 0)) {
isRunningWithSudo = true;
}
ss << __PRETTY_FUNCTION__ << (isRunningWithSudo ? " | running as sudoer" :
" | NOT running as sudoer");
LOG_DEBUG(ss);
return isRunningWithSudo;
}
rsmi_status_t rsmi_get_gfx_target_version(uint32_t dv_ind,
std::string *gfx_version) {
std::ostringstream ss;
uint64_t kfd_gfx_version = 0;
GET_DEV_AND_KFDNODE_FROM_INDX
int ret = kfd_node->get_gfx_target_version(&kfd_gfx_version);
if (ret == 0) {
ss << "gfx" << kfd_gfx_version;
*gfx_version = ss.str();
return RSMI_STATUS_SUCCESS;
} else {
*gfx_version = "Unknown";
return RSMI_STATUS_NOT_SUPPORTED;
}
}
std::queue<std::string> getAllDeviceGfxVers() {
uint32_t num_monitor_devs = 0;
rsmi_status_t ret;
std::queue<std::string> deviceGfxVersions;
std::string response = "";
std::string dev_gfx_ver = "";
ret = rsmi_num_monitor_devices(&num_monitor_devs);
if (ret != RSMI_STATUS_SUCCESS || num_monitor_devs == 0) {
response = "N/A - No AMD devices detected";
deviceGfxVersions.push(response);
return deviceGfxVersions;
}
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
ret = amd::smi::rsmi_get_gfx_target_version(i , &dev_gfx_ver);
response = "Device[" + std::to_string(i) + "]: ";
if (ret != RSMI_STATUS_SUCCESS) {
deviceGfxVersions.push(response + getRSMIStatusString(ret, false));
} else {
deviceGfxVersions.push(response + std::string(dev_gfx_ver));
}
}
return deviceGfxVersions;
}
} // namespace smi
} // namespace amd
+8
Просмотреть файл
@@ -21,6 +21,14 @@ message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("")
## Compiler flags
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -fno-rtti -std=c++17")
if (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -m64 -msse -msse2")
endif()
set(SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(RSMITST "rsmitst")
+75 -38
Просмотреть файл
@@ -54,6 +54,7 @@
#include "gtest/gtest.h"
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_utils.h"
#include "rocm_smi_test/functional/computepartition_read_write.h"
#include "rocm_smi_test/test_common.h"
@@ -118,6 +119,24 @@ computePartitionString(rsmi_compute_partition_type computeParitionType) {
}
}
static void system_wait(int seconds) {
// Adding a delay - since changing partitions depends on gpus not
// being in an active state, we'll wait a few seconds before starting
// full testing
auto start = std::chrono::high_resolution_clock::now();
int waitTime = seconds;
std::cout << "** Waiting for "
<< std::dec << waitTime
<< " seconds, for any GPU"
<< " activity to clear up. **" << std::endl;
sleep(waitTime);
auto stop = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "** Waiting took " << duration.count() / 1000000
<< " seconds **" << std::endl;
}
static const std::map<std::string, rsmi_compute_partition_type_t>
mapStringToRSMIComputePartitionTypes {
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
@@ -141,21 +160,7 @@ void TestComputePartitionReadWrite::Run(void) {
// Confirm system supports compute partition, before executing wait
ret = rsmi_dev_compute_partition_get(0, orig_char_computePartition, 255);
if (ret == RSMI_STATUS_SUCCESS) {
// Adding a delay - since changing partitions depends on gpus not
// being in an active state, we'll wait a few seconds before starting
// full testing
auto start = std::chrono::high_resolution_clock::now();
int waitTime = 20;
std::cout << "** Waiting for "
<< std::dec << waitTime
<< " seconds, for any GPU"
<< " activity to clear up. **" << std::endl;
sleep(waitTime);
auto stop = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
std::cout << "** Waiting took " << duration.count() / 1000000
<< " seconds **" << std::endl;
system_wait(25);
}
for (uint32_t dv_ind = 0; dv_ind < num_monitor_devs(); ++dv_ind) {
@@ -165,6 +170,7 @@ void TestComputePartitionReadWrite::Run(void) {
}
}
PrintDeviceHeader(dv_ind);
bool devicePartitionUpdated = false;
// Standard checks to see if API is supported, before running full tests
ret = rsmi_dev_compute_partition_get(dv_ind, orig_char_computePartition,
@@ -231,9 +237,8 @@ void TestComputePartitionReadWrite::Run(void) {
}
// Verify api support checking functionality is working
rsmi_compute_partition_type_t newPartition
= rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_INVALID;
err = rsmi_dev_compute_partition_set(dv_ind, newPartition);
err = rsmi_dev_compute_partition_set(dv_ind,
RSMI_COMPUTE_PARTITION_INVALID);
ASSERT_TRUE((err == RSMI_STATUS_INVALID_ARGS) ||
(err == RSMI_STATUS_NOT_SUPPORTED) ||
(err == RSMI_STATUS_PERMISSION));
@@ -270,27 +275,40 @@ void TestComputePartitionReadWrite::Run(void) {
* //!< work together with shared memory
*/
for (int partition =
rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_CPX;
partition <= rsmi_compute_partition_type_t::RSMI_COMPUTE_PARTITION_QPX;
for (int partition = static_cast<int>(RSMI_COMPUTE_PARTITION_CPX);
partition <= static_cast<int>(RSMI_COMPUTE_PARTITION_QPX);
partition++) {
newPartition = static_cast<rsmi_compute_partition_type_t>(partition);
rsmi_compute_partition_type_t updatePartition
= static_cast<rsmi_compute_partition_type_t>(partition);
IF_VERB(STANDARD) {
std::cout << std::endl;
std::cout << "\t**"
<< "======== TEST RSMI_COMPUTE_PARTITION_"
<< computePartitionString(newPartition)
<< computePartitionString(updatePartition)
<< " ===============" << std::endl;
}
ret = rsmi_dev_compute_partition_set(dv_ind, updatePartition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Attempting to set compute partition to: "
<< computePartitionString(newPartition) << std::endl;
<< "rsmi_dev_compute_partition_set(dv_ind, updatePartition): "
<< amd::smi::getRSMIStatusString(ret, false) << "\n"
<< "\t**New Partition (set): "
<< computePartitionString(updatePartition) << "\n";
}
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
ASSERT_TRUE((ret == RSMI_STATUS_SETTING_UNAVAILABLE)
|| (ret== RSMI_STATUS_PERMISSION)
|| (ret == RSMI_STATUS_SUCCESS)
|| ret == RSMI_STATUS_BUSY);
if (ret == RSMI_STATUS_BUSY) {
IF_VERB(STANDARD) {
std::cout << "\t**Device is currently busy.. continue\n";
}
system_wait(5);
continue;
}
bool isSettingUnavailable = false;
ASSERT_TRUE((ret == RSMI_STATUS_SUCCESS) ||
(ret == RSMI_STATUS_SETTING_UNAVAILABLE));
if (ret == RSMI_STATUS_SETTING_UNAVAILABLE) {
isSettingUnavailable = true;
}
@@ -306,7 +324,7 @@ void TestComputePartitionReadWrite::Run(void) {
}
if (isSettingUnavailable) {
ASSERT_EQ(RSMI_STATUS_SETTING_UNAVAILABLE, ret);
ASSERT_STRNE(computePartitionString(newPartition).c_str(),
ASSERT_STRNE(computePartitionString(updatePartition).c_str(),
current_char_computePartition);
IF_VERB(STANDARD) {
std::cout << "\t**"
@@ -314,23 +332,30 @@ void TestComputePartitionReadWrite::Run(void) {
<< "RSMI_STATUS_SETTING_UNAVAILABLE,\n\t current compute "
<< "partition (" << current_char_computePartition
<< ") did not update to ("
<< computePartitionString(newPartition) << ")"
<< computePartitionString(updatePartition) << ")"
<< std::endl;
}
} else {
if (strcmp(orig_char_computePartition, current_char_computePartition) !=
0) {
devicePartitionUpdated = true;
} else {
devicePartitionUpdated = false;
}
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(computePartitionString(newPartition).c_str(),
ASSERT_STREQ(computePartitionString(updatePartition).c_str(),
current_char_computePartition);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "Confirmed current compute partition ("
<< current_char_computePartition << ") matches"
<< "\n\t requested compute partition ("
<< computePartitionString(newPartition) << ")"
<< computePartitionString(updatePartition) << ")"
<< std::endl;
}
}
}
} // END looping through partition changes
/* TEST RETURN TO BOOT COMPUTE PARTITION SETTING */
IF_VERB(STANDARD) {
@@ -342,8 +367,14 @@ void TestComputePartitionReadWrite::Run(void) {
std::string oldPartition = current_char_computePartition;
bool wasResetSuccess = false;
ret = rsmi_dev_compute_partition_reset(dv_ind);
IF_VERB(STANDARD) {
std::cout << "\t**"
<< "rsmi_dev_compute_partition_reset(dv_ind): "
<< amd::smi::getRSMIStatusString(ret, false) << "\n";
}
ASSERT_TRUE((ret == RSMI_STATUS_SUCCESS) ||
(ret == RSMI_STATUS_NOT_SUPPORTED));
(ret == RSMI_STATUS_NOT_SUPPORTED) ||
(ret == RSMI_STATUS_BUSY));
if (ret == RSMI_STATUS_SUCCESS) {
wasResetSuccess = true;
}
@@ -352,9 +383,15 @@ void TestComputePartitionReadWrite::Run(void) {
CHK_ERR_ASRT(ret)
IF_VERB(STANDARD) {
std::cout << "\t**" << "Current compute partition: "
<< current_char_computePartition << std::endl;
<< current_char_computePartition << "\n"
<< "\t**" << "Original compute partition: "
<< orig_char_computePartition << "\n"
<< "\t**" << "Reset Successful: "
<< (wasResetSuccess ? "TRUE" : "FALSE") << "\n"
<< "\t**" << "Partitions Updated: "
<< (devicePartitionUpdated ? "TRUE" : "FALSE") << "\n";
}
if (wasResetSuccess) {
if (wasResetSuccess && devicePartitionUpdated) {
ASSERT_STRNE(oldPartition.c_str(), current_char_computePartition);
IF_VERB(STANDARD) {
std::cout << "\t**"
@@ -379,7 +416,7 @@ void TestComputePartitionReadWrite::Run(void) {
<< "=========== TEST RETURN TO ORIGINAL COMPUTE PARTITION "
<< "SETTING ========" << std::endl;
}
newPartition
rsmi_compute_partition_type_t newPartition
= mapStringToRSMIComputePartitionTypes.at(
std::string(orig_char_computePartition));
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
@@ -401,5 +438,5 @@ void TestComputePartitionReadWrite::Run(void) {
ASSERT_EQ(RSMI_STATUS_SUCCESS, ret);
ASSERT_STREQ(computePartitionString(newPartition).c_str(),
current_char_computePartition);
}
} // END looping through devices
}
+35 -9
Просмотреть файл
@@ -5,7 +5,7 @@
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2020, Advanced Micro Devices, Inc.
* Copyright (c) 2020-2023, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
@@ -56,6 +56,7 @@
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi_test/functional/perf_determinism.h"
#include "rocm_smi_test/test_common.h"
#include "rocm_smi/rocm_smi_utils.h"
TestPerfDeterminism::TestPerfDeterminism() : TestBase() {
@@ -103,23 +104,49 @@ void TestPerfDeterminism::Run(void) {
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(i);
std::cout << "\t**Resetting performance determinism\n";
err = rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO): "
<< amd::smi::getRSMIStatusString(err, false)
<< "\n";
}
CHK_ERR_ASRT(err)
ret = rsmi_dev_perf_level_get(i, &pfl);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_perf_level_get(i, &pfl): "
<< amd::smi::getRSMIStatusString(ret, false) << "\n";
}
CHK_ERR_ASRT(ret)
err = rsmi_dev_od_volt_info_get(i, &odv);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_od_volt_info_get(i, &odv): "
<< amd::smi::getRSMIStatusString(err, false)
<< "\n"
<< amd::smi::print_rsmi_od_volt_freq_data_t(&odv)
<< "\n";
}
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t** Not supported on this machine" << std::endl;
std::cout << "\t** Not supported on this machine\n";
}
return;
}
else{
} else if (err == RSMI_STATUS_SUCCESS) {
clkvalue = (odv.curr_sclk_range.lower_bound/1000000) + 50;
} else {
IF_VERB(STANDARD) {
std::cout << "\t** Unable to retrieve lower bound sclk, continue.. \n";
}
continue;
}
std::cout << "About to rsmi_perf_determinism_mode_set() -->\n";
err = rsmi_perf_determinism_mode_set(i, clkvalue);
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**Not supported on this machine" << std::endl;
}
return;
continue;
} else {
ret = rsmi_dev_perf_level_get(i, &pfl);
CHK_ERR_ASRT(ret)
@@ -130,7 +157,7 @@ void TestPerfDeterminism::Run(void) {
}
std::cout << "\t**Resetting performance determinism" << std::endl;
err = rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO);;
err = rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO);
CHK_ERR_ASRT(err)
ret = rsmi_dev_perf_level_get(i, &pfl);
CHK_ERR_ASRT(ret)
@@ -138,7 +165,6 @@ void TestPerfDeterminism::Run(void) {
std::cout << "\t**New Perf Level:" << GetPerfLevelStr(pfl) <<
std::endl;
}
return;
}
}
} // END - SET SUPPORTED
} // END - DEVICE LOOP
}
+67 -75
Просмотреть файл
@@ -5,7 +5,7 @@
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2019, Advanced Micro Devices, Inc.
* Copyright (c) 2019-2023, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
@@ -53,6 +53,7 @@
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi_test/functional/volt_freq_curv_read.h"
#include "rocm_smi_test/test_common.h"
#include "rocm_smi/rocm_smi_utils.h"
TestVoltCurvRead::TestVoltCurvRead() : TestBase() {
set_title("RSMI Voltage-Frequency Curve Read Test");
@@ -84,69 +85,10 @@ void TestVoltCurvRead::Close() {
TestBase::Close();
}
static void pt_rng_Mhz(std::string title, rsmi_range *r) {
assert(r != nullptr);
std::cout << title << std::endl;
std::cout << "\t\t** " << r->lower_bound/1000000 << " to " <<
r->upper_bound/1000000 << " MHz" << std::endl;
}
static void pt_rng_mV(std::string title, rsmi_range *r) {
assert(r != nullptr);
std::cout << title << std::endl;
std::cout << "\t\t** " << r->lower_bound << " to " << r->upper_bound <<
" mV" << std::endl;
}
static void print_pnt(rsmi_od_vddc_point_t *pt) {
std::cout << "\t\t** Frequency: " << pt->frequency/1000000 << "MHz" <<
std::endl;
std::cout << "\t\t** Voltage: " << pt->voltage << "mV" << std::endl;
}
static void pt_vddc_curve(rsmi_od_volt_curve *c) {
assert(c != nullptr);
for (uint32_t i = 0; i < RSMI_NUM_VOLTAGE_CURVE_POINTS; ++i) {
print_pnt(&c->vc_points[i]);
}
}
static void print_rsmi_od_volt_freq_data_t(rsmi_od_volt_freq_data_t *odv) {
assert(odv != nullptr);
std::cout.setf(std::ios::dec, std::ios::basefield);
pt_rng_Mhz("\t\tCurrent SCLK frequency range:", &odv->curr_sclk_range);
pt_rng_Mhz("\t\tCurrent MCLK frequency range:", &odv->curr_mclk_range);
pt_rng_Mhz("\t\tMin/Max Possible SCLK frequency range:",
&odv->sclk_freq_limits);
pt_rng_Mhz("\t\tMin/Max Possible MCLK frequency range:",
&odv->mclk_freq_limits);
std::cout << "\t\tCurrent Freq/Volt. curve:" << std::endl;
pt_vddc_curve(&odv->curve);
std::cout << "\tNumber of Freq./Volt. regions: " <<
odv->num_regions << std::endl;
}
static void print_odv_region(rsmi_freq_volt_region_t *region) {
pt_rng_Mhz("\t\tFrequency range:", &region->freq_range);
pt_rng_mV("\t\tVoltage range:", &region->volt_range);
}
static void print_rsmi_od_volt_freq_regions(uint32_t num_regions,
rsmi_freq_volt_region_t *regions) {
for (uint32_t i = 0; i < num_regions; ++i) {
std::cout << "\tRegion " << i << ":" << std::endl;
print_odv_region(&regions[i]);
}
}
void TestVoltCurvRead::Run(void) {
rsmi_status_t err;
rsmi_status_t err, ret;
rsmi_od_volt_freq_data_t odv;
rsmi_dev_perf_level_t pfl;
TestBase::Run();
if (setup_failed_) {
@@ -157,26 +99,57 @@ void TestVoltCurvRead::Run(void) {
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
PrintDeviceHeader(i);
std::cout << "\n\t**Resetting performance determinism to auto\n";
err = rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_perf_level_set(i, RSMI_DEV_PERF_LEVEL_AUTO): "
<< amd::smi::getRSMIStatusString(err, false)
<< "\n";
}
CHK_ERR_ASRT(err)
ret = rsmi_dev_perf_level_get(i, &pfl);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_perf_level_get(i, &pfl): "
<< amd::smi::getRSMIStatusString(ret, false) << "\n";
}
CHK_ERR_ASRT(ret)
err = rsmi_dev_od_volt_info_get(i, &odv);
if (err == RSMI_STATUS_NOT_SUPPORTED) {
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_od_volt_info_get(i, &odv): "
<< amd::smi::getRSMIStatusString(err, false)
<< "\n"
<< amd::smi::print_rsmi_od_volt_freq_data_t(&odv)
<< "\n";
}
if (err != RSMI_STATUS_SUCCESS) {
IF_VERB(STANDARD) {
std::cout <<
"\t**rsmi_dev_od_volt_info_get: Not supported on this machine"
<< std::endl;
}
// Verify api support checking functionality is working
err = rsmi_dev_od_volt_info_get(i, nullptr);
ASSERT_EQ(err, RSMI_STATUS_NOT_SUPPORTED);
} else {
CHK_ERR_ASRT(err)
// Verify api support checking functionality is working
err = rsmi_dev_od_volt_info_get(i, nullptr);
ASSERT_EQ(err, RSMI_STATUS_INVALID_ARGS);
continue;
}
// Verify api support checking functionality is working
err = rsmi_dev_od_volt_info_get(i, nullptr);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_od_volt_info_get(i, nullptr): "
<< amd::smi::getRSMIStatusString(err, false) << "\n";
// << "\n"
// << amd::smi::print_rsmi_od_volt_freq_data_t(&odv)
// << "\n";
}
ASSERT_TRUE(err == RSMI_STATUS_INVALID_ARGS);
err = rsmi_dev_od_volt_info_get(i, &odv);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_od_volt_info_get(i, &odv): "
<< amd::smi::getRSMIStatusString(err, false) << "\n"
<< amd::smi::print_rsmi_od_volt_freq_data_t(&odv)
<< "\t**odv.num_regions = " << std::dec
<< odv.num_regions << "\n";
}
if (err == RSMI_STATUS_SUCCESS) {
std::cout << "\t**Frequency-voltage curve data:" << std::endl;
print_rsmi_od_volt_freq_data_t(&odv);
std::cout << "\t**Frequency-voltage curve data:" << "\n";
std::cout << amd::smi::print_rsmi_od_volt_freq_data_t(&odv);
rsmi_freq_volt_region_t *regions;
uint32_t num_regions;
@@ -185,11 +158,30 @@ void TestVoltCurvRead::Run(void) {
num_regions = odv.num_regions;
err = rsmi_dev_od_volt_curve_regions_get(i, &num_regions, regions);
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_od_volt_curve_regions_get("
<< "i, &num_regions, regions): "
<< amd::smi::getRSMIStatusString(err, false) << "\n"
<< "\t**Number of regions: " << std::dec << num_regions
<< "\n";
}
ASSERT_TRUE(err == RSMI_STATUS_SUCCESS
|| err == RSMI_STATUS_NOT_SUPPORTED
|| err == RSMI_STATUS_UNEXPECTED_DATA
|| err == RSMI_STATUS_UNEXPECTED_SIZE);
if (err != RSMI_STATUS_SUCCESS) {
IF_VERB(STANDARD) {
std::cout << "\t**rsmi_dev_od_volt_curve_regions_get: "
"Not supported on this machine" << std::endl;
}
continue;
}
CHK_ERR_ASRT(err)
ASSERT_TRUE(num_regions == odv.num_regions);
std::cout << "\t**Frequency-voltage curve regions:" << std::endl;
print_rsmi_od_volt_freq_regions(num_regions, regions);
std::cout << amd::smi::print_rsmi_od_volt_freq_regions(num_regions,
regions);
delete []regions;
}
+8 -8
Просмотреть файл
@@ -163,6 +163,14 @@ TEST(rsmitstReadOnly, TestPerfLevelRead) {
TestPerfLevelRead tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestComputePartitionReadWrite) {
TestComputePartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestMemoryPartitionReadWrite) {
TestMemoryPartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestPerfLevelReadWrite) {
TestPerfLevelReadWrite tst;
RunGenericTest(&tst);
@@ -267,14 +275,6 @@ TEST(rsmitstReadOnly, TestMutualExclusion) {
tst.Run();
RunCustomTestEpilog(&tst);
}
TEST(rsmitstReadWrite, TestComputePartitionReadWrite) {
TestComputePartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestMemoryPartitionReadWrite) {
TestMemoryPartitionReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadWrite, TestEvtNotifReadWrite) {
TestEvtNotifReadWrite tst;
RunGenericTest(&tst);