c5ba765be0
Change-Id: Ie706473ff92a91b19e95d2d58f64904cad73a89a
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/amdsmi commit: 6132074089]
1301 rader
50 KiB
C++
Executable File
1301 rader
50 KiB
C++
Executable File
/*
|
|
* =============================================================================
|
|
* ROC Runtime Conformance Release License
|
|
* =============================================================================
|
|
* The University of Illinois/NCSA
|
|
* Open Source License (NCSA)
|
|
*
|
|
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Developed by:
|
|
*
|
|
* AMD Research and AMD ROC Software Development
|
|
*
|
|
* Advanced Micro Devices, Inc.
|
|
*
|
|
* www.amd.com
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal with the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimers.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimers in
|
|
* the documentation and/or other materials provided with the distribution.
|
|
* - Neither the names of <Name of Development Group, Name of Institution>,
|
|
* nor the names of its contributors may be used to endorse or promote
|
|
* products derived from this Software without specific prior written
|
|
* permission.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS WITH THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
#include <bitset>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <vector>
|
|
#include <type_traits>
|
|
|
|
#include "rocm_smi/rocm_smi.h"
|
|
#include "rocm_smi/rocm_smi_utils.h"
|
|
|
|
#define PRINT_RSMI_ERR(RET) { \
|
|
if (RET != RSMI_STATUS_SUCCESS) { \
|
|
std::cout << "[ERROR] RSMI call returned " << (RET) \
|
|
<< " at line " << __LINE__ << "\n"; \
|
|
std::cout << amd::smi::getRSMIStatusString(RET) << "\n"; \
|
|
} \
|
|
}
|
|
|
|
#define CHK_RSMI_RET(RET) { \
|
|
PRINT_RSMI_ERR(RET) \
|
|
if (RET != RSMI_STATUS_SUCCESS) { \
|
|
return (RET); \
|
|
} \
|
|
}
|
|
|
|
#define CHK_AND_PRINT_RSMI_ERR_RET(RET) { \
|
|
PRINT_RSMI_ERR(RET) \
|
|
CHK_RSMI_RET(RET) \
|
|
}
|
|
|
|
#define CHK_RSMI_RET_I(RET) { \
|
|
PRINT_RSMI_ERR(RET) \
|
|
if (RET != RSMI_STATUS_SUCCESS) { \
|
|
return static_cast<int>(RET); \
|
|
} \
|
|
}
|
|
|
|
#define CHK_FILE_PERMISSIONS(RET) { \
|
|
if ((RET) == RSMI_STATUS_PERMISSION) { \
|
|
if (isFileWritable(RET)) { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
} else { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
}
|
|
|
|
#define CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(RET) { \
|
|
if ((RET) == RSMI_STATUS_PERMISSION) { \
|
|
if (isFileWritable(RET)) { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
} else if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
|
std::cout << "Not Supported." \
|
|
<< "\n"; \
|
|
} else if ((RET) == RSMI_STATUS_NOT_YET_IMPLEMENTED) { \
|
|
std::cout << "Not Yet Implemented." \
|
|
<< "\n"; \
|
|
} else { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
}
|
|
|
|
#define CHK_RSMI_NOT_SUPPORTED_RET(RET) { \
|
|
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
|
std::cout << "Not Supported." \
|
|
<< "\n"; \
|
|
} else { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
}
|
|
|
|
#define CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(RET) { \
|
|
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
|
std::cout << "Not Supported." \
|
|
<< "\n"; \
|
|
} else if ((RET) == RSMI_STATUS_UNEXPECTED_DATA) { \
|
|
std::cout << "[ERROR] RSMI_STATUS_UNEXPECTED_DATA retrieved." \
|
|
<< "\n"; \
|
|
} else { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
}
|
|
|
|
#define CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(RET) {\
|
|
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
|
std::cout << "Not Supported."\
|
|
<< "\n"; \
|
|
} else if ((RET) == RSMI_STATUS_SETTING_UNAVAILABLE) { \
|
|
std::cout << "[WARN] RSMI_STATUS_SETTING_UNAVAILABLE retrieved." \
|
|
<< "\n"; \
|
|
} else { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
}
|
|
|
|
#define CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(RET) { \
|
|
if ((RET) == RSMI_STATUS_NOT_SUPPORTED) { \
|
|
std::cout << "Not Supported." \
|
|
<< "\n"; \
|
|
} else if ((RET) == RSMI_STATUS_UNEXPECTED_DATA) { \
|
|
std::cout << "[WARN] RSMI_STATUS_UNEXPECTED_DATA retrieved." \
|
|
<< "\n"; \
|
|
} else if ((RET) == RSMI_STATUS_INSUFFICIENT_SIZE) { \
|
|
std::cout << "[WARN] RSMI_STATUS_INSUFFICIENT_SIZE retrieved." \
|
|
<< "\n"; \
|
|
} else { \
|
|
CHK_RSMI_RET(RET) \
|
|
} \
|
|
}
|
|
|
|
void print_function_header_with_rsmi_ret(
|
|
rsmi_status_t myReturn, std::string header = "") {
|
|
std::cout << "\t** ";
|
|
if (!header.empty()) {
|
|
std::cout << header << ": ";
|
|
}
|
|
std::cout << amd::smi::getRSMIStatusString(myReturn, false) << "\n";
|
|
}
|
|
|
|
static void print_test_header(const char *str, uint32_t dv_ind) {
|
|
std::cout << "******************************************" << "\n";
|
|
std::cout << "*** " << str << "\n";
|
|
std::cout << "******************************************" << "\n";
|
|
std::cout << "Device index: " << dv_ind << "\n";
|
|
}
|
|
|
|
static void print_mini_header(const char *str) {
|
|
std::cout << "\n>> " << str << " <<" << "\n";
|
|
}
|
|
|
|
static const char *
|
|
power_profile_string(rsmi_power_profile_preset_masks_t profile) {
|
|
switch (profile) {
|
|
case RSMI_PWR_PROF_PRST_CUSTOM_MASK:
|
|
return "CUSTOM";
|
|
case RSMI_PWR_PROF_PRST_VIDEO_MASK:
|
|
return "VIDEO";
|
|
case RSMI_PWR_PROF_PRST_POWER_SAVING_MASK:
|
|
return "POWER SAVING";
|
|
case RSMI_PWR_PROF_PRST_COMPUTE_MASK:
|
|
return "COMPUTE";
|
|
case RSMI_PWR_PROF_PRST_VR_MASK:
|
|
return "VR";
|
|
case RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK:
|
|
return "3D FULL SCREEN";
|
|
default:
|
|
return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
static const std::string
|
|
compute_partition_string(rsmi_compute_partition_type_t partition) {
|
|
switch (partition) {
|
|
case RSMI_COMPUTE_PARTITION_CPX:
|
|
return "CPX";
|
|
case RSMI_COMPUTE_PARTITION_SPX:
|
|
return "SPX";
|
|
case RSMI_COMPUTE_PARTITION_DPX:
|
|
return "DPX";
|
|
case RSMI_COMPUTE_PARTITION_TPX:
|
|
return "TPX";
|
|
case RSMI_COMPUTE_PARTITION_QPX:
|
|
return "QPX";
|
|
default:
|
|
return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
static std::map<std::string, rsmi_compute_partition_type_t>
|
|
mapStringToRSMIComputePartitionTypes {
|
|
{"CPX", RSMI_COMPUTE_PARTITION_CPX},
|
|
{"SPX", RSMI_COMPUTE_PARTITION_SPX},
|
|
{"DPX", RSMI_COMPUTE_PARTITION_DPX},
|
|
{"TPX", RSMI_COMPUTE_PARTITION_TPX},
|
|
{"QPX", RSMI_COMPUTE_PARTITION_QPX}
|
|
};
|
|
|
|
static const std::string
|
|
memory_partition_string(rsmi_memory_partition_type_t partition) {
|
|
switch (partition) {
|
|
case RSMI_MEMORY_PARTITION_NPS1:
|
|
return "NPS1";
|
|
case RSMI_MEMORY_PARTITION_NPS2:
|
|
return "NPS2";
|
|
case RSMI_MEMORY_PARTITION_NPS4:
|
|
return "NPS4";
|
|
case RSMI_MEMORY_PARTITION_NPS8:
|
|
return "NPS8";
|
|
default:
|
|
return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
static std::map<std::string, rsmi_memory_partition_type_t>
|
|
mapStringToRSMIMemoryPartitionTypes {
|
|
{"NPS1", RSMI_MEMORY_PARTITION_NPS1},
|
|
{"NPS2", RSMI_MEMORY_PARTITION_NPS2},
|
|
{"NPS4", RSMI_MEMORY_PARTITION_NPS4},
|
|
{"NPS8", RSMI_MEMORY_PARTITION_NPS8}
|
|
};
|
|
|
|
static const char *
|
|
perf_level_string(rsmi_dev_perf_level_t perf_lvl) {
|
|
switch (perf_lvl) {
|
|
case RSMI_DEV_PERF_LEVEL_AUTO:
|
|
return "AUTO";
|
|
case RSMI_DEV_PERF_LEVEL_LOW:
|
|
return "LOW";
|
|
case RSMI_DEV_PERF_LEVEL_HIGH:
|
|
return "HIGH";
|
|
case RSMI_DEV_PERF_LEVEL_MANUAL:
|
|
return "MANUAL";
|
|
default:
|
|
return "UNKNOWN";
|
|
}
|
|
}
|
|
|
|
static const std::string
|
|
clock_type_string(rsmi_clk_type_t clk) {
|
|
switch (clk) {
|
|
case RSMI_CLK_TYPE_SYS:
|
|
return "RSMI_CLK_TYPE_SYS";
|
|
case RSMI_CLK_TYPE_DF:
|
|
return "RSMI_CLK_TYPE_DF";
|
|
case RSMI_CLK_TYPE_DCEF:
|
|
return "RSMI_CLK_TYPE_DCEF";
|
|
case RSMI_CLK_TYPE_SOC:
|
|
return "RSMI_CLK_TYPE_SOC";
|
|
case RSMI_CLK_TYPE_MEM:
|
|
return "RSMI_CLK_TYPE_MEM";
|
|
case RSMI_CLK_TYPE_PCIE:
|
|
return "RSMI_CLK_TYPE_PCIE";
|
|
default:
|
|
return "RSMI_CLK_INVALID";
|
|
}
|
|
}
|
|
|
|
static bool isFileWritable(rsmi_status_t response) {
|
|
// Clock files may not be writable, causing sets to
|
|
// return RSMI_STATUS_PERMISSION. If running as sudo,
|
|
// this means file is not writable.
|
|
// isFileWritable(ret) - intends to capture this
|
|
// response situation.
|
|
bool fileWritable = true;
|
|
if (amd::smi::is_sudo_user() && (response == RSMI_STATUS_PERMISSION)) {
|
|
std::cout << "[WARN] User is running with sudo "
|
|
<< "permissions, file is not writable." << "\n";
|
|
fileWritable = false;
|
|
} else {
|
|
CHK_AND_PRINT_RSMI_ERR_RET(response)
|
|
}
|
|
return fileWritable;
|
|
}
|
|
|
|
static rsmi_status_t test_power_profile(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
rsmi_power_profile_status_t status;
|
|
|
|
print_test_header("Power Profile", dv_ind);
|
|
|
|
std::cout << "The available power profiles are: ";
|
|
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
if (ret != RSMI_STATUS_SUCCESS) {
|
|
std::cout << "***Skipping Power Profile test." << "\n";
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
CHK_RSMI_RET(ret)
|
|
|
|
std::cout << "The available power profiles are:" << "\n";
|
|
uint64_t tmp = 1;
|
|
|
|
while (tmp <= RSMI_PWR_PROF_PRST_LAST) {
|
|
if ((tmp & status.available_profiles) == tmp) {
|
|
std::cout << "\t" <<
|
|
power_profile_string((rsmi_power_profile_preset_masks_t)tmp) << "\n";
|
|
}
|
|
tmp = tmp << 1;
|
|
}
|
|
std::cout << "The current power profile is: " <<
|
|
power_profile_string(status.current) << "\n";
|
|
|
|
// Try setting the profile to a different power profile
|
|
rsmi_bit_field_t diff_profiles;
|
|
rsmi_power_profile_preset_masks_t new_prof;
|
|
diff_profiles = status.available_profiles & (~status.current);
|
|
|
|
if (diff_profiles & RSMI_PWR_PROF_PRST_COMPUTE_MASK) {
|
|
new_prof = RSMI_PWR_PROF_PRST_COMPUTE_MASK;
|
|
} else if (diff_profiles & RSMI_PWR_PROF_PRST_VIDEO_MASK) {
|
|
new_prof = RSMI_PWR_PROF_PRST_VIDEO_MASK;
|
|
} else if (diff_profiles & RSMI_PWR_PROF_PRST_VR_MASK) {
|
|
new_prof = RSMI_PWR_PROF_PRST_VR_MASK;
|
|
} else if (diff_profiles & RSMI_PWR_PROF_PRST_POWER_SAVING_MASK) {
|
|
new_prof = RSMI_PWR_PROF_PRST_POWER_SAVING_MASK;
|
|
} else if (diff_profiles & RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK) {
|
|
new_prof = RSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK;
|
|
} else {
|
|
std::cout << "No other non-custom power profiles to set to" << "\n";
|
|
return ret;
|
|
}
|
|
|
|
std::cout << "Setting power profile to " << power_profile_string(new_prof)
|
|
<< "..." << "\n";
|
|
ret = rsmi_dev_power_profile_set(dv_ind, 0, new_prof);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "Done." << "\n";
|
|
rsmi_dev_perf_level_t pfl;
|
|
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "Performance Level is now " <<
|
|
perf_level_string(pfl) << "\n";
|
|
|
|
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "The current power profile is: " <<
|
|
power_profile_string(status.current) << "\n";
|
|
std::cout << "Resetting perf level to auto..." << "\n";
|
|
|
|
ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "Done." << "\n";
|
|
|
|
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "Performance Level is now " <<
|
|
perf_level_string(pfl) << "\n";
|
|
|
|
ret = rsmi_dev_power_profile_presets_get(dv_ind, 0, &status);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "The current power profile is: " <<
|
|
power_profile_string(status.current) << "\n";
|
|
|
|
return ret;
|
|
}
|
|
|
|
static rsmi_status_t test_power_cap(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
uint64_t orig, min, max, new_cap;
|
|
|
|
print_test_header("Power Control", dv_ind);
|
|
|
|
ret = rsmi_dev_power_cap_range_get(dv_ind, 0, &max, &min);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
ret = rsmi_dev_power_cap_get(dv_ind, 0, &orig);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
std::cout << "Original Power Cap: " << orig << " uW" << "\n";
|
|
std::cout << "Power Cap Range: " << max << " uW to " << min <<
|
|
" uW" << "\n";
|
|
new_cap = (max + min)/2;
|
|
|
|
std::cout << "Setting new cap to " << new_cap << "..." << "\n";
|
|
|
|
ret = rsmi_dev_power_cap_set(dv_ind, 0, new_cap);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
ret = rsmi_dev_power_cap_get(dv_ind, 0, &new_cap);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
std::cout << "New Power Cap: " << new_cap << " uW" << "\n";
|
|
std::cout << "Resetting cap to " << orig << "..." << "\n";
|
|
|
|
ret = rsmi_dev_power_cap_set(dv_ind, 0, orig);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
ret = rsmi_dev_power_cap_get(dv_ind, 0, &new_cap);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "Current Power Cap: " << new_cap << " uW" << "\n";
|
|
|
|
return ret;
|
|
}
|
|
|
|
static rsmi_status_t test_set_overdrive(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
uint32_t val;
|
|
|
|
print_test_header("Overdrive Control", dv_ind);
|
|
std::cout << "Set Overdrive level to 0%..." << "\n";
|
|
ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 0);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "Set Overdrive level to 10%..." << "\n";
|
|
ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 10);
|
|
CHK_RSMI_RET(ret)
|
|
ret = rsmi_dev_overdrive_level_get(dv_ind, &val);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "\t**New OverDrive Level:" << std::dec << val << "\n";
|
|
std::cout << "Reset Overdrive level to 0%..." << "\n";
|
|
ret = rsmi_dev_overdrive_level_set_v1(dv_ind, 0);
|
|
CHK_RSMI_RET(ret)
|
|
ret = rsmi_dev_overdrive_level_get(dv_ind, &val);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "\t**New OverDrive Level:" << std::dec << val << "\n";
|
|
|
|
return ret;
|
|
}
|
|
|
|
static rsmi_status_t test_set_fan_speed(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
int64_t orig_speed;
|
|
double new_speed;
|
|
int64_t cur_spd;
|
|
|
|
print_test_header("Fan Speed Control", dv_ind);
|
|
|
|
std::cout << "Original fan speed: ";
|
|
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &orig_speed);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << orig_speed << "\n";
|
|
} else {
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
std::cout << "***Skipping Fan Speed Control test." << "\n";
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
if (orig_speed == 0) {
|
|
std::cout << "***System fan speed value is 0. Skip fan test." << "\n";
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
new_speed = 1.1 * static_cast<double>(orig_speed);
|
|
|
|
std::cout << "Setting fan speed to " << new_speed << "\n";
|
|
|
|
ret = rsmi_dev_fan_speed_set(dv_ind, 0, static_cast<uint64_t>(new_speed));
|
|
CHK_RSMI_RET(ret)
|
|
|
|
sleep(4);
|
|
|
|
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &cur_spd);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
std::cout << "New fan speed: " << cur_spd << "\n";
|
|
|
|
assert(
|
|
(cur_spd > static_cast<int64_t>(0.95 * static_cast<double>(new_speed)) &&
|
|
cur_spd < static_cast<int64_t>(1.1 * static_cast<double>(new_speed))) ||
|
|
(cur_spd >
|
|
static_cast<int64_t>(0.95 * static_cast<double>(RSMI_MAX_FAN_SPEED))));
|
|
|
|
std::cout << "Resetting fan control to auto..." << "\n";
|
|
|
|
ret = rsmi_dev_fan_reset(dv_ind, 0);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
sleep(3);
|
|
|
|
ret = rsmi_dev_fan_speed_get(dv_ind, 0, &cur_spd);
|
|
CHK_RSMI_RET(ret)
|
|
|
|
std::cout << "End fan speed: " << cur_spd << "\n";
|
|
|
|
return ret;
|
|
}
|
|
|
|
static rsmi_status_t test_set_perf_level(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
|
|
rsmi_dev_perf_level_t pfl, orig_pfl;
|
|
|
|
print_test_header("Performance Level Control", dv_ind);
|
|
|
|
ret = rsmi_dev_perf_level_get(dv_ind, &orig_pfl);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "\t**Original Perf Level:" << perf_level_string(orig_pfl) <<
|
|
"\n";
|
|
|
|
pfl =
|
|
(rsmi_dev_perf_level_t)((orig_pfl + 1) % (RSMI_DEV_PERF_LEVEL_LAST + 1));
|
|
|
|
std::cout << "Set Performance Level to " << (uint32_t)pfl << " ..." <<
|
|
"\n";
|
|
ret = rsmi_dev_perf_level_set_v1(dv_ind, pfl);
|
|
if (ret != RSMI_STATUS_SUCCESS) {
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
std::cout << "***Skipping Performance Level Control test." << "\n";
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
CHK_RSMI_RET(ret)
|
|
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << "\n";
|
|
std::cout << "Reset Perf level to " << orig_pfl << " ..." << "\n";
|
|
ret = rsmi_dev_perf_level_set_v1(dv_ind, orig_pfl);
|
|
CHK_RSMI_RET(ret)
|
|
ret = rsmi_dev_perf_level_get(dv_ind, &pfl);
|
|
CHK_RSMI_RET(ret)
|
|
std::cout << "\t**New Perf Level:" << perf_level_string(pfl) << "\n";
|
|
return ret;
|
|
}
|
|
|
|
static rsmi_status_t test_set_freq(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
rsmi_frequencies_t f;
|
|
uint32_t freq_bitmask;
|
|
rsmi_clk_type rsmi_clk;
|
|
|
|
// Clock files may not be writable, causing sets to
|
|
// return RSMI_STATUS_PERMISSION even if running with
|
|
// sudo. See isFileWritable() for more info.
|
|
|
|
print_test_header("Clock Frequency Control", dv_ind);
|
|
for (uint32_t clk = (uint32_t)RSMI_CLK_TYPE_FIRST;
|
|
clk <= RSMI_CLK_TYPE_LAST; ++clk) {
|
|
std::string miniHeader = "Testing clock" + std::to_string(clk);
|
|
print_mini_header(miniHeader.c_str());
|
|
rsmi_clk = (rsmi_clk_type)clk;
|
|
|
|
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
|
|
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
|
|
|
|
std::cout << "Initial frequency for clock" << rsmi_clk << " is " <<
|
|
f.current << "\n";
|
|
|
|
// Set clocks to something other than the usual default of the lowest
|
|
// frequency.
|
|
freq_bitmask = 0b01100; // Try the 3rd and 4th clocks
|
|
|
|
std::string freq_bm_str =
|
|
std::bitset<RSMI_MAX_NUM_FREQUENCIES>(freq_bitmask).to_string();
|
|
|
|
freq_bm_str.erase(0, std::min(freq_bm_str.find_first_not_of('0'),
|
|
freq_bm_str.size()-1));
|
|
|
|
std::cout << "Setting frequency mask for clock " << rsmi_clk <<
|
|
" to 0b" << freq_bm_str << " ..." << "\n";
|
|
|
|
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, freq_bitmask);
|
|
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
|
|
|
|
ret = rsmi_dev_gpu_clk_freq_get(dv_ind, rsmi_clk, &f);
|
|
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
|
|
|
|
std::cout << "Frequency is now index " << f.current << "\n";
|
|
std::cout << "Resetting mask to all frequencies." << "\n";
|
|
ret = rsmi_dev_gpu_clk_freq_set(dv_ind, rsmi_clk, 0xFFFFFFFF);
|
|
CHK_FILE_PERMISSIONS_AND_NOT_SUPPORTED_OR_UNIMPLEMENTED(ret)
|
|
|
|
ret = rsmi_dev_perf_level_set_v1(dv_ind, RSMI_DEV_PERF_LEVEL_AUTO);
|
|
CHK_FILE_PERMISSIONS(ret)
|
|
}
|
|
std::cout << "\n";
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
static void print_frequencies(rsmi_frequencies_t *f) {
|
|
bool hasDeepSleep = false;
|
|
if (f == nullptr) {
|
|
std::cout << "Freq was nullptr\n";
|
|
return;
|
|
}
|
|
for (uint32_t j = 0; j < f->num_supported; ++j) {
|
|
if (f->has_deep_sleep && j == 0) {
|
|
std::cout << "\t** S: " << std::to_string(f->frequency[j]);
|
|
hasDeepSleep = true;
|
|
} else {
|
|
std::cout << "\t** " << (hasDeepSleep ? j-1 : j)
|
|
<< ": " << std::to_string(f->frequency[j]);
|
|
}
|
|
if (j == f->current) {
|
|
std::cout << " *";
|
|
}
|
|
std::cout << "\n";
|
|
}
|
|
}
|
|
|
|
static rsmi_status_t test_set_compute_partitioning(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
const uint32_t kLength = 10;
|
|
char originalComputePartition[kLength];
|
|
originalComputePartition[0] = '\0';
|
|
print_test_header("Compute Partitioning Control", dv_ind);
|
|
|
|
ret = rsmi_dev_compute_partition_get(dv_ind, originalComputePartition,
|
|
kLength);
|
|
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
|
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
std::cout << "Original Compute Partition: "
|
|
<< (((originalComputePartition == nullptr)
|
|
|| ((originalComputePartition != nullptr)
|
|
&& (originalComputePartition[0] == '\0')))
|
|
? "UNKNOWN" : originalComputePartition)
|
|
<< "\n" << "\n";
|
|
|
|
for (int newComputePartition = RSMI_COMPUTE_PARTITION_CPX;
|
|
newComputePartition <= RSMI_COMPUTE_PARTITION_QPX;
|
|
newComputePartition++) {
|
|
rsmi_compute_partition_type_t newPartition
|
|
= static_cast<rsmi_compute_partition_type_t>(newComputePartition);
|
|
std::cout << "Attempting to set compute partition to "
|
|
<< compute_partition_string(newPartition) << "..."
|
|
<< "\n";
|
|
ret = rsmi_dev_compute_partition_set(dv_ind, newPartition);
|
|
CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(ret)
|
|
std::cout << "Done setting compute partition to "
|
|
<< compute_partition_string(newPartition) << "." << "\n";
|
|
std::cout << "\n" << "\n";
|
|
}
|
|
|
|
std::cout << "About to initate compute partition reset..." << "\n";
|
|
ret = rsmi_dev_compute_partition_reset(dv_ind);
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
std::cout << "Done resetting compute partition." << "\n";
|
|
|
|
std::string myComputePartition = originalComputePartition;
|
|
if (myComputePartition.empty() == false) {
|
|
std::cout << "Resetting back to original compute partition to "
|
|
<< originalComputePartition << "... " << "\n";
|
|
rsmi_compute_partition_type origComputePartitionType
|
|
= mapStringToRSMIComputePartitionTypes[originalComputePartition];
|
|
ret = rsmi_dev_compute_partition_set(dv_ind, origComputePartitionType);
|
|
CHK_RSMI_NOT_SUPPORTED_OR_SETTING_UNAVAILABLE_RET(ret)
|
|
std::cout << "Done" << "\n";
|
|
}
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
static rsmi_status_t test_set_memory_partition(uint32_t dv_ind) {
|
|
rsmi_status_t ret;
|
|
const uint32_t kLength = 10;
|
|
char originalMemoryPartition[kLength];
|
|
originalMemoryPartition[0] = '\0';
|
|
print_test_header("Memory Partition Control", dv_ind);
|
|
|
|
ret = rsmi_dev_memory_partition_get(dv_ind, originalMemoryPartition, kLength);
|
|
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
|
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
std::cout << "Original Memory Partition: "
|
|
<< (((originalMemoryPartition == nullptr)
|
|
|| ((originalMemoryPartition != nullptr)
|
|
&& (originalMemoryPartition[0] == '\0')))
|
|
? "UNKNOWN" : originalMemoryPartition)
|
|
<< "\n\n";
|
|
|
|
for (int newMemPartition = RSMI_MEMORY_PARTITION_NPS1;
|
|
newMemPartition <= RSMI_MEMORY_PARTITION_NPS8;
|
|
newMemPartition++) {
|
|
rsmi_memory_partition_type_t newMemoryPartition
|
|
= static_cast<rsmi_memory_partition_type_t>(newMemPartition);
|
|
std::cout << "Attempting to set memory partition to "
|
|
<< memory_partition_string(newMemoryPartition) << "..."
|
|
<< "\n";
|
|
ret = rsmi_dev_memory_partition_set(dv_ind, newMemoryPartition);
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
if (ret == RSMI_STATUS_NOT_SUPPORTED) {
|
|
// do not continue attempting to set, device does not support setting
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
std::cout << "Done setting memory partition to "
|
|
<< memory_partition_string(newMemoryPartition)
|
|
<< "." << "\n\n\n";
|
|
}
|
|
|
|
std::cout << "About to initate memory partition reset...\n";
|
|
ret = rsmi_dev_memory_partition_reset(dv_ind);
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
std::cout << "Done resetting memory partition.\n";
|
|
|
|
std::string myMemPart = originalMemoryPartition;
|
|
if (myMemPart.empty() == false) {
|
|
std::cout << "Resetting memory partition to " << originalMemoryPartition
|
|
<< "...\n";
|
|
rsmi_memory_partition_type_t origMemoryPartitionType
|
|
= mapStringToRSMIMemoryPartitionTypes[originalMemoryPartition];
|
|
ret = rsmi_dev_memory_partition_set(dv_ind, origMemoryPartitionType);
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
std::cout << "Done\n";
|
|
}
|
|
return RSMI_STATUS_SUCCESS;
|
|
}
|
|
|
|
template<typename T> constexpr float convert_mw_to_w(T mw) {
|
|
return static_cast<float>(mw / 1000.0);
|
|
}
|
|
|
|
template <typename T>
|
|
auto print_error_or_value(rsmi_status_t status_code, const T& metric) {
|
|
if (status_code == rsmi_status_t::RSMI_STATUS_SUCCESS) {
|
|
if constexpr (std::is_array_v<T>) {
|
|
auto idx = uint16_t(0);
|
|
auto str_values = std::string();
|
|
const auto num_elems = static_cast<uint16_t>(std::end(metric) - std::begin(metric));
|
|
str_values = ("\n\t\t num of values: " + std::to_string(num_elems) + "\n");
|
|
for (const auto& el : metric) {
|
|
str_values += "\t\t [" + std::to_string(idx) + "]: " + std::to_string(el) + "\n";
|
|
++idx;
|
|
}
|
|
return str_values;
|
|
}
|
|
else if constexpr ((std::is_same_v<T, std::uint16_t>) ||
|
|
(std::is_same_v<T, std::uint32_t>) ||
|
|
(std::is_same_v<T, std::uint64_t>)) {
|
|
return std::to_string(metric);
|
|
}
|
|
}
|
|
else {
|
|
return ("\n\t\tStatus: [" + std::to_string(status_code) + "] " + "-> " + amd::smi::getRSMIStatusString(status_code));
|
|
}
|
|
};
|
|
|
|
template <typename T>
|
|
std::string print_unsigned_int(T value) {
|
|
std::stringstream ss;
|
|
ss << static_cast<uint64_t>(value | 0);
|
|
|
|
return ss.str();
|
|
}
|
|
|
|
|
|
int main() {
|
|
rsmi_status_t ret;
|
|
|
|
ret = rsmi_init(0);
|
|
CHK_RSMI_RET_I(ret)
|
|
|
|
std::vector<std::string> val_vec;
|
|
uint64_t val_ui64, val2_ui64;
|
|
int64_t val_i64;
|
|
uint32_t val_ui32;
|
|
uint16_t val_ui16;
|
|
rsmi_dev_perf_level_t pfl;
|
|
rsmi_frequencies_t f;
|
|
uint32_t num_monitor_devs = 0;
|
|
rsmi_gpu_metrics_t gpu_metrics;
|
|
std::string val_str;
|
|
RSMI_POWER_TYPE power_type = RSMI_INVALID_POWER;
|
|
|
|
rsmi_num_monitor_devices(&num_monitor_devs);
|
|
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
|
|
std::cout << "\t**Device #: " << std::dec << i << "\n";
|
|
ret = rsmi_dev_id_get(i, &val_ui16);
|
|
CHK_RSMI_RET_I(ret)
|
|
std::cout << "\t**Device ID: 0x" << std::hex << val_ui16 << "\n";
|
|
ret = rsmi_dev_revision_get(i, &val_ui16);
|
|
CHK_RSMI_RET_I(ret)
|
|
std::cout << "\t**Dev.Rev.ID: 0x" << std::hex << val_ui16 << "\n";
|
|
ret = amd::smi::rsmi_get_gfx_target_version(i , &val_str);
|
|
std::cout << "\t**Target Graphics Version: " << val_str << "\n";
|
|
|
|
char pcie_vendor_name[256];
|
|
ret = rsmi_dev_pcie_vendor_name_get(i, pcie_vendor_name, 256);
|
|
CHK_RSMI_RET_I(ret)
|
|
std::cout << "\t**PCIe vendor name: " << pcie_vendor_name << std::endl;
|
|
|
|
char current_compute_partition[256];
|
|
current_compute_partition[0] = '\0';
|
|
ret = rsmi_dev_compute_partition_get(i, current_compute_partition, 256);
|
|
std::cout << "\t**Current Compute Partition: "
|
|
<< (((current_compute_partition == nullptr)
|
|
|| ((current_compute_partition != nullptr)
|
|
&& (current_compute_partition[0] == '\0')))
|
|
? "UNKNOWN" : current_compute_partition);
|
|
if (ret != RSMI_STATUS_SUCCESS) {
|
|
std::cout << ", RSMI_STATUS = ";
|
|
} else {
|
|
std::cout << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
|
|
|
const uint32_t kLength = 5;
|
|
char memory_partition[kLength];
|
|
memory_partition[0] = '\0';
|
|
ret = rsmi_dev_memory_partition_get(i, memory_partition, kLength);
|
|
std::cout << "\t**Current Memory Partition: "
|
|
<< (((memory_partition == nullptr)
|
|
|| ((memory_partition != nullptr)
|
|
&& (memory_partition[0] == '\0')))
|
|
? "UNKNOWN" : memory_partition);
|
|
if (ret != RSMI_STATUS_SUCCESS) {
|
|
std::cout << ", RSMI_STATUS = ";
|
|
} else {
|
|
std::cout << "\n";
|
|
}
|
|
CHK_NOT_SUPPORTED_OR_UNEXPECTED_DATA_OR_INSUFFICIENT_SIZE_RET(ret)
|
|
|
|
std::cout << "\t**rsmi_minmax_bandwidth_get(0, " << i << ", ...): ";
|
|
ret = rsmi_dev_pci_id_get(0, &val_ui64);
|
|
ret = rsmi_dev_pci_id_get(i, &val2_ui64);
|
|
if (i > 0 && val_ui64 != val2_ui64) {
|
|
uint64_t min_bandwidth = 0;
|
|
uint64_t max_bandwidth = 0;
|
|
ret = rsmi_minmax_bandwidth_get(0, i, &min_bandwidth, &max_bandwidth);
|
|
CHK_RSMI_NOT_SUPPORTED_OR_UNEXPECTED_DATA_RET(ret)
|
|
std::cout << "\n\t**\tMinimum Bandwidth: " << std::dec << min_bandwidth
|
|
<< "\n\t**\tMaximum Bandwidth: " << std::dec
|
|
<< max_bandwidth << "\n";
|
|
} else {
|
|
std::cout << "Not Supported\n";
|
|
}
|
|
|
|
//
|
|
std::cout << "\n";
|
|
print_test_header("GPU METRICS: Using static struct (Backwards Compatibility) ", i);
|
|
print_function_header_with_rsmi_ret(ret, "rsmi_dev_gpu_metrics_info_get(" + std::to_string(i) + ", &gpu_metrics)");
|
|
rsmi_dev_gpu_metrics_info_get(i, &gpu_metrics);
|
|
|
|
std::cout << "\t**.common_header.format_revision : "
|
|
<< print_unsigned_int(gpu_metrics.common_header.format_revision) << "\n";
|
|
std::cout << "\t**.common_header.content_revision : "
|
|
<< print_unsigned_int(gpu_metrics.common_header.content_revision) << "\n";
|
|
|
|
std::cout << "\t**.temperature_edge : " << std::dec
|
|
<< gpu_metrics.temperature_edge << "\n";
|
|
std::cout << "\t**.temperature_hotspot : " << std::dec
|
|
<< gpu_metrics.temperature_hotspot << "\n";
|
|
std::cout << "\t**.temperature_mem : " << std::dec
|
|
<< gpu_metrics.temperature_mem << "\n";
|
|
std::cout << "\t**.temperature_vrgfx : " << std::dec
|
|
<< gpu_metrics.temperature_vrgfx << "\n";
|
|
std::cout << "\t**.temperature_vrsoc : " << std::dec
|
|
<< gpu_metrics.temperature_vrsoc << "\n";
|
|
std::cout << "\t**.temperature_vrmem : " << std::dec
|
|
<< gpu_metrics.temperature_vrmem << "\n";
|
|
std::cout << "\t**.average_gfx_activity : " << std::dec
|
|
<< gpu_metrics.average_gfx_activity << "\n";
|
|
std::cout << "\t**.average_umc_activity : " << std::dec
|
|
<< gpu_metrics.average_umc_activity << "\n";
|
|
std::cout << "\t**.average_mm_activity : " << std::dec
|
|
<< gpu_metrics.average_mm_activity << "\n";
|
|
std::cout << "\t**.average_socket_power : " << std::dec
|
|
<< gpu_metrics.average_socket_power << "\n";
|
|
std::cout << "\t**.energy_accumulator : " << std::dec
|
|
<< gpu_metrics.energy_accumulator << "\n";
|
|
std::cout << "\t**.system_clock_counter : " << std::dec
|
|
<< gpu_metrics.system_clock_counter << "\n";
|
|
std::cout << "\t**.average_gfxclk_frequency : " << std::dec
|
|
<< gpu_metrics.average_gfxclk_frequency << "\n";
|
|
std::cout << "\t**.average_socclk_frequency : " << std::dec
|
|
<< gpu_metrics.average_socclk_frequency << "\n";
|
|
std::cout << "\t**.average_uclk_frequency : " << std::dec
|
|
<< gpu_metrics.average_uclk_frequency << "\n";
|
|
std::cout << "\t**.average_vclk0_frequency : " << std::dec
|
|
<< gpu_metrics.average_vclk0_frequency<< "\n";
|
|
std::cout << "\t**.average_dclk0_frequency : " << std::dec
|
|
<< gpu_metrics.average_dclk0_frequency << "\n";
|
|
std::cout << "\t**.average_vclk1_frequency : " << std::dec
|
|
<< gpu_metrics.average_vclk1_frequency << "\n";
|
|
std::cout << "\t**.average_dclk1_frequency : " << std::dec
|
|
<< gpu_metrics.average_dclk1_frequency << "\n";
|
|
std::cout << "\t**.current_gfxclk : " << std::dec
|
|
<< gpu_metrics.current_gfxclk << "\n";
|
|
std::cout << "\t**.current_socclk : " << std::dec
|
|
<< gpu_metrics.current_socclk << "\n";
|
|
std::cout << "\t**.current_uclk : " << std::dec
|
|
<< gpu_metrics.current_uclk << "\n";
|
|
std::cout << "\t**.current_vclk0 : " << std::dec
|
|
<< gpu_metrics.current_vclk0 << "\n";
|
|
std::cout << "\t**.current_dclk0 : " << std::dec
|
|
<< gpu_metrics.current_dclk0 << "\n";
|
|
std::cout << "\t**.current_vclk1 : " << std::dec
|
|
<< gpu_metrics.current_vclk1 << "\n";
|
|
std::cout << "\t**.current_dclk1 : " << std::dec
|
|
<< gpu_metrics.current_dclk1 << "\n";
|
|
std::cout << "\t**.throttle_status : " << std::dec
|
|
<< gpu_metrics.throttle_status << "\n";
|
|
std::cout << "\t**.current_fan_speed : " << std::dec
|
|
<< gpu_metrics.current_fan_speed << "\n";
|
|
std::cout << "\t**.pcie_link_width : " << std::dec
|
|
<< gpu_metrics.pcie_link_width << "\n";
|
|
std::cout << "\t**.pcie_link_speed : " << std::dec
|
|
<< gpu_metrics.pcie_link_speed << "\n";
|
|
std::cout << "\t**.gfx_activity_acc : " << std::dec
|
|
<< gpu_metrics.gfx_activity_acc << "\n";
|
|
std::cout << "\t**.mem_activity_acc : " << std::dec
|
|
<< gpu_metrics.mem_activity_acc << "\n";
|
|
std::cout << "\t**.firmware_timestamp : " << std::dec
|
|
<< gpu_metrics.firmware_timestamp << "\n";
|
|
std::cout << "\t**.voltage_soc : " << std::dec
|
|
<< gpu_metrics.voltage_soc << "\n";
|
|
std::cout << "\t**.voltage_gfx : " << std::dec
|
|
<< gpu_metrics.voltage_gfx << "\n";
|
|
std::cout << "\t**.voltage_mem : " << std::dec
|
|
<< gpu_metrics.voltage_mem << "\n";
|
|
std::cout << "\t**.indep_throttle_status : " << std::dec
|
|
<< gpu_metrics.indep_throttle_status << "\n";
|
|
std::cout << "\t**.current_socket_power : " << std::dec
|
|
<< gpu_metrics.current_socket_power << "\n";
|
|
std::cout << "\t**.gfxclk_lock_status : " << std::dec
|
|
<< gpu_metrics.gfxclk_lock_status << "\n";
|
|
std::cout << "\t**.xgmi_link_width : " << std::dec
|
|
<< gpu_metrics.xgmi_link_width << "\n";
|
|
std::cout << "\t**.xgmi_link_speed : " << std::dec
|
|
<< gpu_metrics.xgmi_link_speed << "\n";
|
|
std::cout << "\t**.pcie_bandwidth_acc : " << std::dec
|
|
<< gpu_metrics.pcie_bandwidth_acc << "\n";
|
|
std::cout << "\t**.pcie_bandwidth_inst : " << std::dec
|
|
<< gpu_metrics.pcie_bandwidth_inst << "\n";
|
|
std::cout << "\t**.pcie_l0_to_recov_count_acc : " << std::dec
|
|
<< gpu_metrics.pcie_l0_to_recov_count_acc << "\n";
|
|
std::cout << "\t**.pcie_replay_count_acc : " << std::dec
|
|
<< gpu_metrics.pcie_replay_count_acc << "\n";
|
|
std::cout << "\t**.pcie_replay_rover_count_acc : " << std::dec
|
|
<< gpu_metrics.pcie_replay_rover_count_acc << "\n";
|
|
|
|
std::cout << "\t**.temperature_hbm[] : " << std::dec << "\n";
|
|
for (const auto& temp : gpu_metrics.temperature_hbm) {
|
|
std::cout << "\t -> " << std::dec << temp << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.vcn_activity[] : " << std::dec << "\n";
|
|
for (const auto& vcn : gpu_metrics.vcn_activity) {
|
|
std::cout << "\t -> " << std::dec << vcn << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.xgmi_read_data_acc[] : " << std::dec << "\n";
|
|
for (const auto& read_data : gpu_metrics.xgmi_read_data_acc) {
|
|
std::cout << "\t -> " << std::dec << read_data << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.xgmi_write_data_acc[] : " << std::dec << "\n";
|
|
for (const auto& write_data : gpu_metrics.xgmi_write_data_acc) {
|
|
std::cout << "\t -> " << std::dec << write_data << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.current_gfxclks[] : " << std::dec << "\n";
|
|
for (const auto& gfxclk : gpu_metrics.current_gfxclks) {
|
|
std::cout << "\t -> " << std::dec << gfxclk << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.current_socclks[] : " << std::dec << "\n";
|
|
for (const auto& socclk : gpu_metrics.current_socclks) {
|
|
std::cout << "\t -> " << std::dec << socclk << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.current_vclk0s[] : " << std::dec << "\n";
|
|
for (const auto& vclk : gpu_metrics.current_vclk0s) {
|
|
std::cout << "\t -> " << std::dec << vclk << "\n";
|
|
}
|
|
|
|
std::cout << "\t**.current_dclk0s[] : " << std::dec << "\n";
|
|
for (const auto& dclk : gpu_metrics.current_dclk0s) {
|
|
std::cout << "\t -> " << std::dec << dclk << "\n";
|
|
}
|
|
std::cout << " ** Note: Values MAX'ed out (UINTX MAX are unsupported for the version in question) ** " << "\n";
|
|
|
|
std::cout << "\n\n";
|
|
print_test_header("GPU METRICS: Using direct APIs (newer)", i);
|
|
metrics_table_header_t header_values;
|
|
GPUMetricTempHbm_t hbm_values;
|
|
GPUMetricVcnActivity_t vcn_values;
|
|
GPUMetricXgmiReadDataAcc_t xgmi_read_values;
|
|
GPUMetricXgmiWriteDataAcc_t xgmi_write_values;
|
|
GPUMetricCurrGfxClk_t curr_gfxclk_values;
|
|
GPUMetricCurrSocClk_t curr_socclk_values;
|
|
GPUMetricCurrVClk0_t curr_vclk0_values;
|
|
GPUMetricCurrDClk0_t curr_dclk0_values;
|
|
|
|
ret = rsmi_dev_metrics_header_info_get(i, &header_values);
|
|
std::cout << "\t[Metrics Header]" << "\n";
|
|
std::cout << "\t -> format_revision : " << print_unsigned_int(header_values.format_revision) << "\n";
|
|
std::cout << "\t -> content_revision : " << print_unsigned_int(header_values.content_revision) << "\n";
|
|
std::cout << "\t--------------------" << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Temperature]" << "\n";
|
|
ret = rsmi_dev_metrics_temp_edge_get(i, &val_ui16);
|
|
std::cout << "\t -> temp_edge(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_temp_hotspot_get(i, &val_ui16);
|
|
std::cout << "\t -> temp_hotspot(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_temp_mem_get(i, &val_ui16);
|
|
std::cout << "\t -> temp_mem(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_temp_vrgfx_get(i, &val_ui16);
|
|
std::cout << "\t -> temp_vrgfx(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_temp_vrsoc_get(i, &val_ui16);
|
|
std::cout << "\t -> temp_vrsoc(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_temp_vrmem_get(i, &val_ui16);
|
|
std::cout << "\t -> temp_vrmem(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_temp_hbm_get(i, &hbm_values);
|
|
std::cout << "\t -> temp_hbm(): " << print_error_or_value(ret, hbm_values) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Power/Energy]" << "\n";
|
|
ret = rsmi_dev_metrics_curr_socket_power_get(i, &val_ui16);
|
|
std::cout << "\t -> current_socket_power(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_energy_acc_get(i, &val_ui64);
|
|
std::cout << "\t -> energy_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_avg_socket_power_get(i, &val_ui16);
|
|
std::cout << "\t -> average_socket_power(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Utilization]" << "\n";
|
|
ret = rsmi_dev_metrics_avg_gfx_activity_get(i, &val_ui16);
|
|
std::cout << "\t -> average_gfx_activity(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_umc_activity_get(i, &val_ui16);
|
|
std::cout << "\t -> average_umc_activity(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_mm_activity_get(i, &val_ui16);
|
|
std::cout << "\t -> average_mm_activity(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_vcn_activity_get(i, &vcn_values);
|
|
std::cout << "\t -> vcn_activity(): " << print_error_or_value(ret, vcn_values) << "\n";
|
|
ret = rsmi_dev_metrics_mem_activity_acc_get(i, &val_ui32);
|
|
std::cout << "\t -> mem_activity_accum(): " << print_error_or_value(ret, val_ui32) << "\n";
|
|
ret = rsmi_dev_metrics_gfx_activity_acc_get(i, &val_ui32);
|
|
std::cout << "\t -> gfx_activity_accum(): " << print_error_or_value(ret, val_ui32) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Average Clock]" << "\n";
|
|
ret = rsmi_dev_metrics_avg_gfx_clock_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_gfx_clock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_soc_clock_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_soc_clock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_uclock_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_uclock_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_vclock0_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_vclock0_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_dclock0_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_dclock0_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_vclock1_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_vclock1_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_avg_dclock1_frequency_get(i, &val_ui16);
|
|
std::cout << "\t -> average_dclock1_frequency(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Current Clock]" << "\n";
|
|
ret = rsmi_dev_metrics_curr_vclk1_get(i, &val_ui16);
|
|
std::cout << "\t -> current_vclock1(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_curr_dclk1_get(i, &val_ui16);
|
|
std::cout << "\t -> current_dclock1(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_curr_uclk_get(i, &val_ui16);
|
|
std::cout << "\t -> current_uclock(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_curr_dclk0_get(i, &curr_dclk0_values);
|
|
std::cout << "\t -> current_dclk0(): " << print_error_or_value(ret, curr_dclk0_values) << "\n";
|
|
ret = rsmi_dev_metrics_curr_gfxclk_get(i, &curr_gfxclk_values);
|
|
std::cout << "\t -> current_gfxclk(): " << print_error_or_value(ret, curr_gfxclk_values) << "\n";
|
|
ret = rsmi_dev_metrics_curr_socclk_get(i, &curr_socclk_values);
|
|
std::cout << "\t -> current_soc_clock(): " << print_error_or_value(ret, curr_socclk_values) << "\n";
|
|
ret = rsmi_dev_metrics_curr_vclk0_get(i, &curr_vclk0_values);
|
|
std::cout << "\t -> current_vclk0(): " << print_error_or_value(ret, curr_vclk0_values) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Throttle]" << "\n";
|
|
ret = rsmi_dev_metrics_indep_throttle_status_get(i, &val_ui64);
|
|
std::cout << "\t -> indep_throttle_status(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_throttle_status_get(i, &val_ui32);
|
|
std::cout << "\t -> throttle_status(): " << print_error_or_value(ret, val_ui32) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Gfx Clock Lock]" << "\n";
|
|
ret = rsmi_dev_metrics_gfxclk_lock_status_get(i, &val_ui32);
|
|
std::cout << "\t -> gfxclk_lock_status(): " << print_error_or_value(ret, val_ui32) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Current Fan Speed]" << "\n";
|
|
ret = rsmi_dev_metrics_curr_fan_speed_get(i, &val_ui16);
|
|
std::cout << "\t -> current_fan_speed(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Link/Bandwidth/Speed]" << "\n";
|
|
ret = rsmi_dev_metrics_pcie_link_width_get(i, &val_ui16);
|
|
std::cout << "\t -> pcie_link_width(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_pcie_link_speed_get(i, &val_ui16);
|
|
std::cout << "\t -> pcie_link_speed(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_pcie_bandwidth_acc_get(i, &val_ui64);
|
|
std::cout << "\t -> pcie_bandwidth_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_pcie_bandwidth_inst_get(i, &val_ui64);
|
|
std::cout << "\t -> pcie_bandwidth_inst(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_pcie_l0_recov_count_acc_get(i, &val_ui64);
|
|
std::cout << "\t -> pcie_l0_recov_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_pcie_replay_count_acc_get(i, &val_ui64);
|
|
std::cout << "\t -> pcie_replay_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_pcie_replay_rover_count_acc_get(i, &val_ui64);
|
|
std::cout << "\t -> pcie_replay_rollover_count_accum(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_xgmi_link_width_get(i, &val_ui16);
|
|
std::cout << "\t -> xgmi_link_width(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_xgmi_link_speed_get(i, &val_ui16);
|
|
std::cout << "\t -> xgmi_link_speed(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_xgmi_read_data_get(i, &xgmi_read_values);
|
|
std::cout << "\t -> xgmi_read_data(): " << print_error_or_value(ret, xgmi_read_values) << "\n";
|
|
ret = rsmi_dev_metrics_xgmi_write_data_get(i, &xgmi_write_values);
|
|
std::cout << "\t -> xgmi_write_data(): " << print_error_or_value(ret, xgmi_write_values) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Voltage]" << "\n";
|
|
ret = rsmi_dev_metrics_volt_soc_get(i, &val_ui16);
|
|
std::cout << "\t -> voltage_soc(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_volt_gfx_get(i, &val_ui16);
|
|
std::cout << "\t -> voltage_gfx(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
ret = rsmi_dev_metrics_volt_mem_get(i, &val_ui16);
|
|
std::cout << "\t -> voltage_mem(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[Timestamp]" << "\n";
|
|
ret = rsmi_dev_metrics_system_clock_counter_get(i, &val_ui64);
|
|
std::cout << "\t -> system_clock_counter(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
ret = rsmi_dev_metrics_firmware_timestamp_get(i, &val_ui64);
|
|
std::cout << "\t -> firmware_timestamp(): " << print_error_or_value(ret, val_ui64) << "\n";
|
|
|
|
std::cout << "\n";
|
|
std::cout << "\t[XCD CounterVoltage]" << "\n";
|
|
ret = rsmi_dev_metrics_xcd_counter_get(i, &val_ui16);
|
|
std::cout << "\t -> xcd_counter(): " << print_error_or_value(ret, val_ui16) << "\n";
|
|
std::cout << "\n\n";
|
|
|
|
|
|
ret = rsmi_dev_perf_level_get(i, &pfl);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
std::cout << "\t**Performance Level:" <<
|
|
perf_level_string(pfl) << "\n";
|
|
ret = rsmi_dev_overdrive_level_get(i, &val_ui32);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
std::cout << "\t**OverDrive Level:" << val_ui32 << "\n";
|
|
|
|
print_test_header("GPU Clocks", i);
|
|
for (int clkType = static_cast<int>(RSMI_CLK_TYPE_SYS);
|
|
clkType <= static_cast<int>(RSMI_CLK_TYPE_PCIE);
|
|
clkType++) {
|
|
rsmi_clk_type_t type = static_cast<rsmi_clk_type_t>(clkType);
|
|
ret = rsmi_dev_gpu_clk_freq_get(i, type, &f);
|
|
print_function_header_with_rsmi_ret(ret,
|
|
"rsmi_dev_gpu_clk_freq_get(" + std::to_string(i) +
|
|
", " + clock_type_string(type) + ", &f)");
|
|
if (ret != RSMI_STATUS_SUCCESS) {
|
|
continue;
|
|
}
|
|
std::cout << "\t** " << clock_type_string(type)
|
|
<< " - Supported # of freqs: ";
|
|
std::cout << f.num_supported << "\n";
|
|
std::cout << "\t** " << clock_type_string(type) << " f.current: "
|
|
<< f.current << "\n";
|
|
print_frequencies(&f);
|
|
}
|
|
|
|
std::cout << "\t**Monitor name: ";
|
|
char name[128];
|
|
ret = rsmi_dev_name_get(i, name, 128);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
std::cout << name << "\n";
|
|
|
|
std::cout << "\t**Temperature (edge): ";
|
|
ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_EDGE,
|
|
rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << std::dec << val_i64/1000 << " C" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Temperature (junction): ";
|
|
ret = rsmi_dev_temp_metric_get(i, RSMI_TEMP_TYPE_JUNCTION,
|
|
rsmi_temperature_metric_t::RSMI_TEMP_CURRENT, &val_i64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << std::dec << (val_i64 / 1000) << " C" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Voltage: ";
|
|
ret = rsmi_dev_volt_metric_get(i, RSMI_VOLT_TYPE_VDDGFX,
|
|
RSMI_VOLT_CURRENT, &val_i64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << val_i64 << "mV" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Current Fan Speed: ";
|
|
ret = rsmi_dev_fan_speed_get(i, 0, &val_i64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
ret = rsmi_dev_fan_speed_max_get(i, 0, &val_ui64);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
std::cout << (static_cast<float>(val_i64)/val_ui64) * 100;
|
|
std::cout << "% (" << std::dec << val_i64 << "/"
|
|
<< std::dec << val_ui64 << ")" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Current fan RPMs: ";
|
|
ret = rsmi_dev_fan_rpms_get(i, 0, &val_i64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << std::dec << val_i64 << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Current Power Cap: ";
|
|
ret = rsmi_dev_power_cap_get(i, 0, &val_ui64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << std::dec << val_ui64 << "uW" <<"\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Power Cap Range: ";
|
|
ret = rsmi_dev_power_cap_range_get(i, 0, &val_ui64, &val2_ui64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << std::dec << val2_ui64 << " to "
|
|
<< std::dec << val_ui64 << " uW" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Average Power Usage: ";
|
|
ret = rsmi_dev_power_ave_get(i, 0, &val_ui64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << convert_mw_to_w(val_ui64) << " W" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Current Socket Power Usage: ";
|
|
ret = rsmi_dev_current_socket_power_get(i, &val_ui64);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << convert_mw_to_w(val_ui64) << " W" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
|
|
std::cout << "\t**Generic Power Usage: ";
|
|
ret = rsmi_dev_power_get(i, &val_ui64, &power_type);
|
|
if (ret == RSMI_STATUS_SUCCESS) {
|
|
std::cout << "[" << amd::smi::power_type_string(power_type) << "] "
|
|
<< convert_mw_to_w(val_ui64) << " W" << "\n";
|
|
}
|
|
CHK_RSMI_NOT_SUPPORTED_RET(ret)
|
|
std::cout << "\t=======" << "\n";
|
|
}
|
|
|
|
std::cout << "***** Testing write api's" << "\n";
|
|
if (amd::smi::is_sudo_user() == false) {
|
|
std::cout << "Write APIs require users to execute with sudo. "
|
|
<< "Cannot proceed." << "\n";
|
|
return 0;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < num_monitor_devs; ++i) {
|
|
ret = test_set_overdrive(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_set_perf_level(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_set_fan_speed(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_power_cap(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_power_profile(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_set_compute_partitioning(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_set_freq(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
|
|
ret = test_set_memory_partition(i);
|
|
CHK_AND_PRINT_RSMI_ERR_RET(ret)
|
|
}
|
|
|
|
return 0;
|
|
}
|