Merge remote-tracking branch 'rocmsmi/amd-staging' into HEAD

Change-Id: I0661926c10eef2bc32b83d9a63a3a6eb6991e781
This commit is contained in:
Galantsev, Dmitrii
2023-09-25 04:22:09 -05:00
18 changed files with 528 additions and 284 deletions
+1
View File
@@ -290,6 +290,7 @@ typedef enum {
FW_ID_SDMA_TH0,
FW_ID_SDMA_TH1,
FW_ID_CP_MES,
FW_ID_MES_KIQ,
FW_ID_MES_STACK,
FW_ID_MES_THREAD1,
FW_ID_MES_THREAD1_STACK,
+74 -72
View File
@@ -444,38 +444,39 @@ amdsmi_fw_block_t__enumvalues = {
44: 'FW_ID_SDMA_TH0',
45: 'FW_ID_SDMA_TH1',
46: 'FW_ID_CP_MES',
47: 'FW_ID_MES_STACK',
48: 'FW_ID_MES_THREAD1',
49: 'FW_ID_MES_THREAD1_STACK',
50: 'FW_ID_RLX6',
51: 'FW_ID_RLX6_DRAM_BOOT',
52: 'FW_ID_RS64_ME',
53: 'FW_ID_RS64_ME_P0_DATA',
54: 'FW_ID_RS64_ME_P1_DATA',
55: 'FW_ID_RS64_PFP',
56: 'FW_ID_RS64_PFP_P0_DATA',
57: 'FW_ID_RS64_PFP_P1_DATA',
58: 'FW_ID_RS64_MEC',
59: 'FW_ID_RS64_MEC_P0_DATA',
60: 'FW_ID_RS64_MEC_P1_DATA',
61: 'FW_ID_RS64_MEC_P2_DATA',
62: 'FW_ID_RS64_MEC_P3_DATA',
63: 'FW_ID_PPTABLE',
64: 'FW_ID_PSP_SOC',
65: 'FW_ID_PSP_DBG',
66: 'FW_ID_PSP_INTF',
67: 'FW_ID_RLX6_CORE1',
68: 'FW_ID_RLX6_DRAM_BOOT_CORE1',
69: 'FW_ID_RLCV_LX7',
70: 'FW_ID_RLC_SAVE_RESTORE_LIST',
71: 'FW_ID_ASD',
72: 'FW_ID_TA_RAS',
73: 'FW_ID_XGMI',
74: 'FW_ID_RLC_SRLG',
75: 'FW_ID_RLC_SRLS',
76: 'FW_ID_SMC',
77: 'FW_ID_DMCU',
78: 'FW_ID__MAX',
47: 'FW_ID_MES_KIQ',
48: 'FW_ID_MES_STACK',
49: 'FW_ID_MES_THREAD1',
50: 'FW_ID_MES_THREAD1_STACK',
51: 'FW_ID_RLX6',
52: 'FW_ID_RLX6_DRAM_BOOT',
53: 'FW_ID_RS64_ME',
54: 'FW_ID_RS64_ME_P0_DATA',
55: 'FW_ID_RS64_ME_P1_DATA',
56: 'FW_ID_RS64_PFP',
57: 'FW_ID_RS64_PFP_P0_DATA',
58: 'FW_ID_RS64_PFP_P1_DATA',
59: 'FW_ID_RS64_MEC',
60: 'FW_ID_RS64_MEC_P0_DATA',
61: 'FW_ID_RS64_MEC_P1_DATA',
62: 'FW_ID_RS64_MEC_P2_DATA',
63: 'FW_ID_RS64_MEC_P3_DATA',
64: 'FW_ID_PPTABLE',
65: 'FW_ID_PSP_SOC',
66: 'FW_ID_PSP_DBG',
67: 'FW_ID_PSP_INTF',
68: 'FW_ID_RLX6_CORE1',
69: 'FW_ID_RLX6_DRAM_BOOT_CORE1',
70: 'FW_ID_RLCV_LX7',
71: 'FW_ID_RLC_SAVE_RESTORE_LIST',
72: 'FW_ID_ASD',
73: 'FW_ID_TA_RAS',
74: 'FW_ID_XGMI',
75: 'FW_ID_RLC_SRLG',
76: 'FW_ID_RLC_SRLS',
77: 'FW_ID_SMC',
78: 'FW_ID_DMCU',
79: 'FW_ID__MAX',
}
FW_ID_SMU = 1
FW_ID_FIRST = 1
@@ -524,38 +525,39 @@ FW_ID_IMU_IRAM = 43
FW_ID_SDMA_TH0 = 44
FW_ID_SDMA_TH1 = 45
FW_ID_CP_MES = 46
FW_ID_MES_STACK = 47
FW_ID_MES_THREAD1 = 48
FW_ID_MES_THREAD1_STACK = 49
FW_ID_RLX6 = 50
FW_ID_RLX6_DRAM_BOOT = 51
FW_ID_RS64_ME = 52
FW_ID_RS64_ME_P0_DATA = 53
FW_ID_RS64_ME_P1_DATA = 54
FW_ID_RS64_PFP = 55
FW_ID_RS64_PFP_P0_DATA = 56
FW_ID_RS64_PFP_P1_DATA = 57
FW_ID_RS64_MEC = 58
FW_ID_RS64_MEC_P0_DATA = 59
FW_ID_RS64_MEC_P1_DATA = 60
FW_ID_RS64_MEC_P2_DATA = 61
FW_ID_RS64_MEC_P3_DATA = 62
FW_ID_PPTABLE = 63
FW_ID_PSP_SOC = 64
FW_ID_PSP_DBG = 65
FW_ID_PSP_INTF = 66
FW_ID_RLX6_CORE1 = 67
FW_ID_RLX6_DRAM_BOOT_CORE1 = 68
FW_ID_RLCV_LX7 = 69
FW_ID_RLC_SAVE_RESTORE_LIST = 70
FW_ID_ASD = 71
FW_ID_TA_RAS = 72
FW_ID_XGMI = 73
FW_ID_RLC_SRLG = 74
FW_ID_RLC_SRLS = 75
FW_ID_SMC = 76
FW_ID_DMCU = 77
FW_ID__MAX = 78
FW_ID_MES_KIQ = 47
FW_ID_MES_STACK = 48
FW_ID_MES_THREAD1 = 49
FW_ID_MES_THREAD1_STACK = 50
FW_ID_RLX6 = 51
FW_ID_RLX6_DRAM_BOOT = 52
FW_ID_RS64_ME = 53
FW_ID_RS64_ME_P0_DATA = 54
FW_ID_RS64_ME_P1_DATA = 55
FW_ID_RS64_PFP = 56
FW_ID_RS64_PFP_P0_DATA = 57
FW_ID_RS64_PFP_P1_DATA = 58
FW_ID_RS64_MEC = 59
FW_ID_RS64_MEC_P0_DATA = 60
FW_ID_RS64_MEC_P1_DATA = 61
FW_ID_RS64_MEC_P2_DATA = 62
FW_ID_RS64_MEC_P3_DATA = 63
FW_ID_PPTABLE = 64
FW_ID_PSP_SOC = 65
FW_ID_PSP_DBG = 66
FW_ID_PSP_INTF = 67
FW_ID_RLX6_CORE1 = 68
FW_ID_RLX6_DRAM_BOOT_CORE1 = 69
FW_ID_RLCV_LX7 = 70
FW_ID_RLC_SAVE_RESTORE_LIST = 71
FW_ID_ASD = 72
FW_ID_TA_RAS = 73
FW_ID_XGMI = 74
FW_ID_RLC_SRLG = 75
FW_ID_RLC_SRLS = 76
FW_ID_SMC = 77
FW_ID_DMCU = 78
FW_ID__MAX = 79
amdsmi_fw_block_t = ctypes.c_uint32 # enum
# values for enumeration 'amdsmi_vram_type_t'
@@ -734,7 +736,7 @@ struct_amdsmi_fw_info_t._pack_ = 1 # source:False
struct_amdsmi_fw_info_t._fields_ = [
('num_fw_info', ctypes.c_ubyte),
('PADDING_0', ctypes.c_ubyte * 7),
('fw_info_list', struct_fw_info_list_ * 78),
('fw_info_list', struct_fw_info_list_ * 79),
('reserved', ctypes.c_uint32 * 7),
('PADDING_1', ctypes.c_ubyte * 4),
]
@@ -1853,13 +1855,13 @@ __all__ = \
'FW_ID_CP_PFP', 'FW_ID_CP_PM4', 'FW_ID_DFC', 'FW_ID_DMCU',
'FW_ID_DMCU_ERAM', 'FW_ID_DMCU_ISR', 'FW_ID_DRV_CAP',
'FW_ID_FIRST', 'FW_ID_IMU_DRAM', 'FW_ID_IMU_IRAM', 'FW_ID_ISP',
'FW_ID_MC', 'FW_ID_MES_STACK', 'FW_ID_MES_THREAD1',
'FW_ID_MES_THREAD1_STACK', 'FW_ID_MMSCH', 'FW_ID_PPTABLE',
'FW_ID_PSP_BL', 'FW_ID_PSP_DBG', 'FW_ID_PSP_INTF',
'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC', 'FW_ID_PSP_SOSDRV',
'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV', 'FW_ID_PSP_TOC',
'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC', 'FW_ID_RLCV_LX7',
'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL',
'FW_ID_MC', 'FW_ID_MES_KIQ', 'FW_ID_MES_STACK',
'FW_ID_MES_THREAD1', 'FW_ID_MES_THREAD1_STACK', 'FW_ID_MMSCH',
'FW_ID_PPTABLE', 'FW_ID_PSP_BL', 'FW_ID_PSP_DBG',
'FW_ID_PSP_INTF', 'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC',
'FW_ID_PSP_SOSDRV', 'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV',
'FW_ID_PSP_TOC', 'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC',
'FW_ID_RLCV_LX7', 'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL',
'FW_ID_RLC_RESTORE_LIST_GPM_MEM',
'FW_ID_RLC_RESTORE_LIST_SRM_MEM', 'FW_ID_RLC_SAVE_RESTORE_LIST',
'FW_ID_RLC_SRLG', 'FW_ID_RLC_SRLS', 'FW_ID_RLC_V', 'FW_ID_RLX6',
+33 -1
View File
@@ -70,7 +70,8 @@ extern "C" {
*/
//! Guaranteed maximum possible number of supported frequencies
#define RSMI_MAX_NUM_FREQUENCIES 32
//! (32 normal + 1 sleep frequency)
#define RSMI_MAX_NUM_FREQUENCIES 33
//! Maximum possible value for fan speed. Should be used as the denominator
//! when determining fan speed percentage.
@@ -639,6 +640,8 @@ typedef enum {
RSMI_FW_BLOCK_ME,
RSMI_FW_BLOCK_MEC,
RSMI_FW_BLOCK_MEC2,
RSMI_FW_BLOCK_MES,
RSMI_FW_BLOCK_MES_KIQ,
RSMI_FW_BLOCK_PFP,
RSMI_FW_BLOCK_RLC,
RSMI_FW_BLOCK_RLC_SRLC,
@@ -759,6 +762,11 @@ typedef rsmi_power_profile_status_t rsmi_power_profile_status;
* @brief This structure holds information about clock frequencies.
*/
typedef struct {
/**
* Deep Sleep frequency is only supported by some GPUs
*/
bool has_deep_sleep;
/**
* The number of supported frequencies
*/
@@ -1757,6 +1765,30 @@ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask);
rsmi_status_t
rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power);
/**
* @brief Get the current socket power (also known as instant
* power) of the device index provided.
*
* @details Given a device index @p dv_ind and a pointer to a uint64_t
* @p socket_power, this function will write the current socket power
* (in microwatts) to the uint64_t pointed to by @p socket_power.
*
* @param[in] dv_ind a device index
*
* @param[inout] socket_power a pointer to uint64_t to which the current
* socket power will be written to. If this parameter is nullptr,
* this function will return ::RSMI_STATUS_INVALID_ARGS if the function is
* supported with the provided, arguments and ::RSMI_STATUS_NOT_SUPPORTED
* if it is not supported with the provided arguments.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function with the given arguments
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
*/
rsmi_status_t
rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power);
/**
* @brief Get the energy accumulator counter of the device with provided
* device index.
@@ -146,6 +146,8 @@ enum DevInfoTypes {
kDevFwVersionMe,
kDevFwVersionMec,
kDevFwVersionMec2,
kDevFwVersionMes,
kDevFwVersionMesKiq,
kDevFwVersionPfp,
kDevFwVersionRlc,
kDevFwVersionRlcSrlc,
+3 -3
View File
@@ -130,18 +130,18 @@ class Logger {
break;
}
return *getInstance();
};
}
Logger &operator<<(const char* s) {
return operator<<(std::string(s));
};
}
template <class T> Logger &operator<<(const T &v) {
std::ostringstream s;
s << v;
std::string str = s.str();
return operator<<(str);
};
}
// Interface for Error Log
void error(const char* text) throw();
+42 -38
View File
@@ -5,7 +5,7 @@
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
@@ -67,6 +67,8 @@ enum MonitorTypes {
kMonPowerCapMax,
kMonPowerCapMin,
kMonPowerAve,
kMonPowerInput,
kMonPowerLabel,
kMonTempMax,
kMonTempMin,
kMonTempMaxHyst,
@@ -94,45 +96,47 @@ enum MonitorTypes {
kMonInvalid = 0xFFFFFFFF,
};
const std::map<MonitorTypes,std::string> monitorTypesToString {
{MonitorTypes::kMonName, "amd::smi::kMonName"},
{MonitorTypes::kMonTemp, "amd::smi::kMonName"},
{MonitorTypes::kMonFanSpeed, "amd::smi::kMonName"},
{MonitorTypes::kMonMaxFanSpeed, "amd::smi::kMonName"},
{MonitorTypes::kMonFanRPMs, "amd::smi::kMonName"},
{MonitorTypes::kMonFanCntrlEnable, "amd::smi::kMonName"},
{MonitorTypes::kMonPowerCap, "amd::smi::kMonName"},
{MonitorTypes::kMonPowerCapDefault, "amd::smi::kMonName"},
{MonitorTypes::kMonPowerCapMax, "amd::smi::kMonName"},
{MonitorTypes::kMonPowerCapMin, "amd::smi::kMonName"},
{MonitorTypes::kMonPowerAve, "amd::smi::kMonName"},
{MonitorTypes::kMonTempMax, "amd::smi::kMonName"},
{MonitorTypes::kMonTempMin, "amd::smi::kMonName"},
{MonitorTypes::kMonTempMaxHyst, "amd::smi::kMonName"},
{MonitorTypes::kMonTempMinHyst, "amd::smi::kMonName"},
{MonitorTypes::kMonTempCritical, "amd::smi::kMonName"},
{MonitorTypes::kMonTempCriticalHyst, "amd::smi::kMonName"},
{MonitorTypes::kMonTempEmergency, "amd::smi::kMonName"},
{MonitorTypes::kMonTempEmergencyHyst, "amd::smi::kMonName"},
{MonitorTypes::kMonTempCritMin, "amd::smi::kMonName"},
{MonitorTypes::kMonTempCritMinHyst, "amd::smi::kMonName"},
{MonitorTypes::kMonTempOffset, "amd::smi::kMonName"},
{MonitorTypes::kMonTempLowest, "amd::smi::kMonName"},
{MonitorTypes::kMonTempHighest, "amd::smi::kMonName"},
{MonitorTypes::kMonTempLabel, "amd::smi::kMonName"},
{MonitorTypes::kMonVolt, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltMax, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltMinCrit, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltMin, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltMaxCrit, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltAverage, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltLowest, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltHighest, "amd::smi::kMonName"},
{MonitorTypes::kMonVoltLabel, "amd::smi::kMonName"},
{MonitorTypes::kMonInvalid, "amd::smi::kMonName"},
const std::map<MonitorTypes, std::string> monitorTypesToString{
{MonitorTypes::kMonName, "MonitorTypes::kMonName"},
{MonitorTypes::kMonTemp, "MonitorTypes::kMonTemp"},
{MonitorTypes::kMonFanSpeed, "MonitorTypes::kMonFanSpeed"},
{MonitorTypes::kMonMaxFanSpeed, "MonitorTypes::kMonMaxFanSpeed"},
{MonitorTypes::kMonFanRPMs, "MonitorTypes::kMonFanRPMs"},
{MonitorTypes::kMonFanCntrlEnable, "MonitorTypes::kMonFanCntrlEnable"},
{MonitorTypes::kMonPowerCap, "MonitorTypes::kMonPowerCap"},
{MonitorTypes::kMonPowerCapDefault, "MonitorTypes::kMonPowerCapDefault"},
{MonitorTypes::kMonPowerCapMax, "MonitorTypes::kMonPowerCapMax"},
{MonitorTypes::kMonPowerCapMin, "MonitorTypes::kMonPowerCapMin"},
{MonitorTypes::kMonPowerAve, "MonitorTypes::kMonPowerAve"},
{MonitorTypes::kMonPowerInput, "MonitorTypes::kMonPowerInput"},
{MonitorTypes::kMonPowerLabel, "MonitorTypes::kMonPowerLabel"},
{MonitorTypes::kMonTempMax, "MonitorTypes::kMonTempMax"},
{MonitorTypes::kMonTempMin, "MonitorTypes::kMonTempMin"},
{MonitorTypes::kMonTempMaxHyst, "MonitorTypes::kMonTempMaxHyst"},
{MonitorTypes::kMonTempMinHyst, "MonitorTypes::kMonTempMinHyst"},
{MonitorTypes::kMonTempCritical, "MonitorTypes::kMonTempCritical"},
{MonitorTypes::kMonTempCriticalHyst, "MonitorTypes::kMonTempCriticalHyst"},
{MonitorTypes::kMonTempEmergency, "MonitorTypes::kMonTempEmergency"},
{MonitorTypes::kMonTempEmergencyHyst,
"MonitorTypes::kMonTempEmergencyHyst"},
{MonitorTypes::kMonTempCritMin, "MonitorTypes::kMonTempCritMin"},
{MonitorTypes::kMonTempCritMinHyst, "MonitorTypes::kMonTempCritMinHyst"},
{MonitorTypes::kMonTempOffset, "MonitorTypes::kMonTempOffset"},
{MonitorTypes::kMonTempLowest, "MonitorTypes::kMonTempLowest"},
{MonitorTypes::kMonTempHighest, "MonitorTypes::kMonTempHighest"},
{MonitorTypes::kMonTempLabel, "MonitorTypes::kMonTempLabel"},
{MonitorTypes::kMonVolt, "MonitorTypes::kMonVolt"},
{MonitorTypes::kMonVoltMax, "MonitorTypes::kMonVoltMax"},
{MonitorTypes::kMonVoltMinCrit, "MonitorTypes::kMonVoltMinCrit"},
{MonitorTypes::kMonVoltMin, "MonitorTypes::kMonVoltMin"},
{MonitorTypes::kMonVoltMaxCrit, "MonitorTypes::kMonVoltMaxCrit"},
{MonitorTypes::kMonVoltAverage, "MonitorTypes::kMonVoltAverage"},
{MonitorTypes::kMonVoltLowest, "MonitorTypes::kMonVoltLowest"},
{MonitorTypes::kMonVoltHighest, "MonitorTypes::kMonVoltHighest"},
{MonitorTypes::kMonVoltLabel, "MonitorTypes::kMonVoltLabel"},
{MonitorTypes::kMonInvalid, "MonitorTypes::kMonInvalid"},
};
class Monitor {
public:
explicit Monitor(std::string path, RocmSMI_env_vars const *e);
+1 -1
View File
@@ -74,7 +74,7 @@ Display Options:
-a, --showallinfo Show Temperature, Fan and Clock values
Topology:
-i, --showid Show GPU ID
-i, --showid Show DEVICE ID
-v, --showvbios Show VBIOS version
--showdriverversion Show kernel driver version
--showfwinfo [BLOCK [BLOCK ...]] Show FW information
+161 -74
View File
@@ -45,9 +45,8 @@ CLOCK_JSON_VERSION = 1
headerString = ' ROCm System Management Interface '
footerString = ' End of ROCm SMI Log '
# Output formatting
appWidth = 100
appWidth = 90
deviceList = []
# Enable or disable serialized format
@@ -383,8 +382,8 @@ def getPidList():
return
def getPower(device, silent=False):
""" Return the current power level of a given device
def getAvgPower(device, silent=False):
""" Return the average power level of a given device
@param device: DRM device identifier
@param silent=Turn on to silence error output
@@ -393,7 +392,21 @@ def getPower(device, silent=False):
power = c_uint32()
ret = rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power))
if rsmi_ret_ok(ret, device, 'get_power_avg', silent):
return power.value / 1000000
return str(power.value / 1000000)
return 'N/A'
def getCurrentSocketPower(device, silent=False):
""" Return the current (also known as instant)
socket power of a given device
@param device: DRM device identifier
@param silent=Turn on to silence error output
(you plan to handle manually). Default is off.
"""
power = c_uint32()
ret = rocmsmi.rsmi_dev_current_socket_power_get(device, byref(power))
if rsmi_ret_ok(ret, device, 'get_socket_power', silent):
return str(power.value / 1000000)
return 'N/A'
@@ -437,7 +450,7 @@ def findFirstAvailableTemp(device):
temp = c_int64(0)
metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT
ret_temp = "N/A"
ret_temp_type = "(Unknown)"
ret_temp_type = temp_type_lst[0]
for i, templist_val in enumerate(temp_type_lst):
ret = rocmsmi.rsmi_dev_temp_metric_get(c_uint32(device), i, metric, byref(temp))
if rsmi_ret_ok(ret, device, 'get_temp_metric_' + templist_val, silent=True):
@@ -448,6 +461,37 @@ def findFirstAvailableTemp(device):
continue
return (ret_temp_type, ret_temp)
def getTemperatureLabel(deviceList):
""" Discovers the the first identified power label
Returns a string label value
@param device: DRM device identifier
"""
# Default label is Edge
tempLabel = temp_type_lst[0].lower()
if len(deviceList) < 1:
return tempLabel
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
tempLabel = temp_type.lower().replace('(', '').replace(')', '')
return tempLabel
def getPowerLabel(deviceList):
""" Discovers the the first identified power label
Returns a string label value
@param device: DRM device identifier
"""
power = c_int64(0)
# Default label is AvgPower
powerLabel = rsmi_power_label.AVG_POWER
if len(deviceList) < 1:
return powerLabel
device=deviceList[0]
power = getCurrentSocketPower(device, True)
if power != '0.0' and power != 'N/A':
powerLabel = rsmi_power_label.CURRENT_SOCKET_POWER
return powerLabel
def getVbiosVersion(device, silent=False):
""" Returns the VBIOS version for a given device
@@ -679,23 +723,35 @@ def printListLog(metricName, valuesList):
print(listStr + line)
def printLogSpacer(displayString=None, fill='='):
def printLogSpacer(displayString=None, fill='=', contentSizeToFit=0):
""" Prints [name of the option]/[name of the program] in the spacer to explain data below
If no parameters are given, a default fill of the '=' string is used in the spacer
@param displayString: name of item to be displayed inside of the log spacer
@param fill: padding string which surrounds the given display string
@param contentSizeToFit: providing an integer > 0 allows
ability to dynamically change output padding/fill based on this value
instead of appWidth. Handy for concise info output.
"""
global appWidth, PRINT_JSON
resizeValue = appWidth
if contentSizeToFit != 0:
resizeValue = contentSizeToFit
if resizeValue % 2: # if odd -> make even
resizeValue += 1
# leaving below to check if resizing works properly
# print("resizeVal=" +str(resizeValue) + "; appWidth=" + str(appWidth) +
# "; contentSizeToFit=" + str(contentSizeToFit) + "; fill=" + fill)
if not PRINT_JSON:
if displayString:
if len(displayString) % 2:
displayString += fill
logSpacer = fill * int((appWidth - (len(displayString))) / 2) + displayString + fill * int(
(appWidth - (len(displayString))) / 2)
logSpacer = fill * int((resizeValue - (len(displayString))) / 2) + displayString + fill * int(
(resizeValue - (len(displayString))) / 2)
else:
logSpacer = fill * appWidth
logSpacer = fill * resizeValue
print(logSpacer)
@@ -1630,22 +1686,15 @@ def showAllConcise(deviceList):
print('ERROR: Cannot print JSON/CSV output for concise output')
sys.exit(1)
""" Place holder for the actual max size """
MAX_ALL_CONCISE_WIDTH = 100
appWidth_temp = appWidth
appWidth = MAX_ALL_CONCISE_WIDTH
silent = True
printLogSpacer(' Concise Info ')
deviceList.sort()
temp_type = '(' + temp_type_lst[0] + ')'
if len(deviceList) >= 1:
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
available_temp_type = temp_type.lower()
available_temp_type = available_temp_type.replace('(', '')
available_temp_type = available_temp_type.replace(')', '')
header = ['GPU', '[Model : Revision]', 'Temp', 'AvgPwr', 'Partitions', 'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%']
subheader = ['', 'Name (20 chars)', temp_type, '', '(Mem, Compute)', '', '', '', '', '', '', '']
available_temp_type = getTemperatureLabel(deviceList)
temp_type = "(" + available_temp_type.capitalize() + ")"
header=['Device', '[Model : Revision]', 'Temp', 'Power', 'Partitions',
'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%']
subheader = ['', 'Name (20 chars)', temp_type, getPowerLabel(deviceList),
'(Mem, Compute)', '', '', '', '', '', '', '']
# add additional spaces to match header
for idx, item in enumerate(subheader):
header_size = len(header[idx])
@@ -1667,11 +1716,17 @@ def showAllConcise(deviceList):
temp_val = str(getTemp(device, available_temp_type, silent))
if temp_val != 'N/A':
temp_val += degree_sign + 'C'
avgPwr = str(getPower(device))
if avgPwr != '0.0' and avgPwr != 'N/A':
socketPwr = getCurrentSocketPower(device, True)
avgPwr = getAvgPower(device, True)
powerVal = 'N/A'
if socketPwr != '0.0' and socketPwr != 'N/A':
socketPwr += 'W'
powerVal=socketPwr
elif avgPwr != '0.0' and avgPwr != 'N/A':
avgPwr += 'W'
powerVal=avgPwr
else:
avgPwr = 'N/A'
powerVal = 'N/A'
combined_partition = (getMemoryPartition(device, silent) + ", "
+ getComputePartition(device, silent))
sclk = showCurrentClocks([device], 'sclk', concise=silent)
@@ -1704,10 +1759,10 @@ def showAllConcise(deviceList):
'', '', '', '']
gpu_dev_product_info_top_name = gpu_dev_product_info_names[1]
values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val, avgPwr,
combined_partition, sclk, mclk,
fan, str(perf).lower(), pwrCap,
mem_use_pct, gpu_busy]
values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val,
powerVal, combined_partition, sclk, mclk,
fan, str(perf).lower(), pwrCap, mem_use_pct,
gpu_busy]
val_widths = {}
for device in deviceList:
@@ -1716,10 +1771,17 @@ def showAllConcise(deviceList):
for device in deviceList:
for col in range(len(val_widths[device])):
max_widths[col] = max(max_widths[col], val_widths[device][col])
printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)), None)
printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader)),
None, useItalics=True)
printLogSpacer(fill='=')
########################
# Display concise info #
########################
header_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header))
subheader_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader))
printLogSpacer(headerString, contentSizeToFit=len(header_output))
printLogSpacer(' Concise Info ', contentSizeToFit=len(header_output))
printLog(None, header_output, None)
printLog(None, subheader_output, None, useItalics=True)
printLogSpacer(fill='=', contentSizeToFit=len(header_output))
for device in deviceList:
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
@@ -1730,9 +1792,8 @@ def showAllConcise(deviceList):
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
zip(range(len(max_widths)), values['card%s_Info' % (str(device))])), None)
printLogSpacer()
""" Restore original max size """
appWidth = appWidth_temp
printLogSpacer(contentSizeToFit=len(header_output))
printLogSpacer(footerString, contentSizeToFit=len(header_output))
def showAllConciseHw(deviceList):
@@ -1808,12 +1869,21 @@ def showClocks(deviceList):
if not rsmi_ret_ok(ret, device, 'get_clk_freq_' + clk_type, True):
continue
printLog(device, 'Supported %s frequencies on GPU%s' % (clk_type, str(device)), None)
for x in range(freq.num_supported):
fr = '{:>.0f}Mhz'.format(freq.frequency[x] / 1000000)
if x == freq.current:
printLog(device, str(x), str(fr) + ' *')
else:
printLog(device, str(x), str(fr))
for i in range(freq.num_supported):
freq_string = '{:>.0f}Mhz'.format(freq.frequency[i] / 1000000)
if i == freq.current:
freq_string += ' *'
freq_index = i
# Deep Sleep frequency is only supported by some GPUs
# It is indicated by letter 'S' instead of the index number
if freq.has_deep_sleep:
# sleep state
if i == 0:
freq_index = 'S'
# all indices are offset by 1 because Deep Sleep occupies index 0
else:
freq_index = i - 1
printLog(device, str(freq_index), freq_string)
printLog(device, '', None)
else:
logging.debug('{} frequency is unsupported on device[{}]'.format(clk_type, device))
@@ -1822,12 +1892,11 @@ def showClocks(deviceList):
ret = rocmsmi.rsmi_dev_pci_bandwidth_get(device, byref(bw))
if rsmi_ret_ok(ret, device, 'get_PCIe_bandwidth', True):
printLog(device, 'Supported %s frequencies on GPU%s' % ('PCIe', str(device)), None)
for x in range(bw.transfer_rate.num_supported):
fr = '{:>.1f}GT/s x{}'.format(bw.transfer_rate.frequency[x] / 1000000000, bw.lanes[x])
if x == bw.transfer_rate.current:
printLog(device, str(x), str(fr) + ' *')
else:
printLog(device, str(x), str(fr))
for i in range(bw.transfer_rate.num_supported):
freq_string = '{:>.1f}GT/s x{}'.format(bw.transfer_rate.frequency[i] / 1000000000, bw.lanes[i])
if i == bw.transfer_rate.current:
freq_string += ' *'
printLog(device, str(i), str(freq_string))
printLog(device, '', None)
else:
logging.debug('PCIe frequency is unsupported on device [{}]'.format(device))
@@ -1857,9 +1926,17 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
printLog(device, '%s current clock frequency not found' % (clk_defined), None)
continue
fr = freq.frequency[levl] / 1000000
freq_index = levl
if freq.has_deep_sleep:
# sleep state
if levl == 0:
freq_index = 'S'
# all indices are offset by 1 because Deep Sleep occupies index 0
else:
freq_index = levl - 1
if concise: # in case function is used for concise output, no need to print.
return '{:.0f}Mhz'.format(fr)
printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(levl, fr))
printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(freq_index, fr))
elif not concise:
logging.debug('{} clock is unsupported on device[{}]'.format(clk_defined, device))
@@ -1872,12 +1949,20 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
if levl >= freq.num_supported:
printLog(device, '%s current clock frequency not found' % (clk_type), None)
continue
freq_index = levl
if freq.has_deep_sleep:
# sleep state
if levl == 0:
freq_index = 'S'
# all indices are offset by 1 because Deep Sleep occupies index 0
else:
freq_index = levl - 1
fr = freq.frequency[levl] / 1000000
if PRINT_JSON:
printLog(device, '%s clock speed:' % (clk_type), '(%sMhz)' % (str(fr)[:-2]))
printLog(device, '%s clock level:' % (clk_type), levl)
printLog(device, '%s clock level:' % (clk_type), freq_index)
else:
printLog(device, '%s clock level: %s' % (clk_type, levl), '(%sMhz)' % (str(fr)[:-2]))
printLog(device, '%s clock level: %s' % (clk_type, freq_index), '(%sMhz)' % (str(fr)[:-2]))
elif not concise:
logging.debug('{} clock is unsupported on device[{}]'.format(clk_type, device))
# pcie clocks
@@ -2108,8 +2193,8 @@ def showId(deviceList):
"""
printLogSpacer(' ID ')
for device in deviceList:
printLog(device, 'GPU ID', getId(device))
printLog(device, 'GPU Rev', getRev(device))
printLog(device, 'Device ID', getId(device))
printLog(device, 'Device Rev', getRev(device))
printLogSpacer()
@@ -2336,23 +2421,25 @@ def showPids(verbose):
def showPower(deviceList):
""" Display current Average Graphics Package Power Consumption for a list of devices
""" Display Current (also known as instant) Socket or Average
Graphics Package Power Consumption for a list of devices
@param deviceList: List of DRM devices (can be a single-item list)
"""
secondaryPresent=False
printLogSpacer(' Power Consumption ')
for device in deviceList:
if checkIfSecondaryDie(device):
if str(getCurrentSocketPower(device, True)) != 'N/A':
printLog(device, 'Current Socket Graphics Package Power (W)', getCurrentSocketPower(device))
elif checkIfSecondaryDie(device):
printLog(device, 'Average Graphics Package Power (W)', "N/A (Secondary die)")
secondaryPresent=True
elif str(getPower(device)) != '0.0':
printLog(device, 'Average Graphics Package Power (W)', getPower(device))
elif str(getAvgPower(device)) != '0.0':
printLog(device, 'Average Graphics Package Power (W)', getAvgPower(device))
else:
printErrLog(device, 'Unable to get Average Graphics Package Power Consumption')
printErrLog(device, 'Unable to get Average or Current Socket Graphics Package Power Consumption')
if secondaryPresent:
printLog(None, "\n\t\tPrimary die (usually one above or below the secondary) shows total (primary + secondary) socket power information", None)
printLogSpacer()
@@ -2848,13 +2935,8 @@ def getGraphColor(percentage):
def showTempGraph(deviceList):
deviceList.sort()
temp_type = '(' + temp_type_lst[0] + ')'
if len(deviceList) >= 1:
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
printLogSpacer(' Temperature Graph ' + temp_type + ' ')
temp_type = temp_type.lower()
temp_type = temp_type.replace('(', '')
temp_type = temp_type.replace(')', '')
temp_type = getTemperatureLabel(deviceList)
printLogSpacer(' Temperature Graph ' + temp_type.capitalize() + ' ')
# Start a thread for constantly printing
try:
# Create a thread (call print function, devices, delay in ms)
@@ -3523,9 +3605,14 @@ def save(deviceList, savefilepath):
# The code below is for when this script is run as an executable instead of when imported as a module
def isConciseInfoRequested(args):
return len(sys.argv) == 1 or \
len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \
len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=f'AMD ROCm System Management Interface | ROCM-SMI version: {__version__}',
description='AMD ROCm System Management Interface | ROCM-SMI version: %s' % __version__,
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=90, width=120))
groupDev = parser.add_argument_group()
groupDisplayOpt = parser.add_argument_group('Display Options')
@@ -3545,7 +3632,7 @@ if __name__ == '__main__':
groupDisplayOpt.add_argument('--showhw', help='Show Hardware details', action='store_true')
groupDisplayOpt.add_argument('-a', '--showallinfo', help='Show Temperature, Fan and Clock values',
action='store_true')
groupDisplayTop.add_argument('-i', '--showid', help='Show GPU ID', action='store_true')
groupDisplayTop.add_argument('-i', '--showid', help='Show DEVICE ID', action='store_true')
groupDisplayTop.add_argument('-v', '--showvbios', help='Show VBIOS version', action='store_true')
groupDisplayTop.add_argument('-e', '--showevents', help='Show event list', metavar='EVENT', type=str, nargs='*')
groupDisplayTop.add_argument('--showdriverversion', help='Show kernel driver version', action='store_true')
@@ -3731,7 +3818,8 @@ if __name__ == '__main__':
if not PRINT_JSON:
print('\n')
printLogSpacer(headerString)
if not isConciseInfoRequested(args):
printLogSpacer(headerString)
if args.showallinfo:
args.list = True
@@ -3785,9 +3873,7 @@ if __name__ == '__main__':
if not checkAmdGpus(deviceList):
logging.warning('No AMD GPUs specified')
if len(sys.argv) == 1 or \
len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \
len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv)):
if isConciseInfoRequested(args):
showAllConcise(deviceList)
if args.showhw:
showAllConciseHw(deviceList)
@@ -3994,7 +4080,8 @@ if __name__ == '__main__':
devCsv = formatCsv(deviceList)
print(devCsv)
printLogSpacer(footerString)
if not isConciseInfoRequested(args):
printLogSpacer(footerString)
rsmi_ret_ok(rocmsmi.rsmi_shut_down())
exit(RETCODE)
+8 -2
View File
@@ -59,7 +59,7 @@ gpu_id = c_uint32(0)
# Policy enums
RSMI_MAX_NUM_FREQUENCIES = 32
RSMI_MAX_NUM_FREQUENCIES = 33
RSMI_MAX_FAN_SPEED = 255
RSMI_NUM_VOLTAGE_CURVE_POINTS = 3
@@ -492,7 +492,8 @@ rsmi_power_profile_status = rsmi_power_profile_status_t
class rsmi_frequencies_t(Structure):
_fields_ = [('num_supported', c_int32),
_fields_ = [('has_deep_sleep', c_bool),
('num_supported', c_int32),
('current', c_uint32),
('frequency', c_uint64 * RSMI_MAX_NUM_FREQUENCIES)]
@@ -654,3 +655,8 @@ rsmi_nps_mode_type = rsmi_nps_mode_type_t
# nps_mode_type_l[rsmi_nps_mode_type_t.RSMI_MEMORY_PARTITION_NPS2]
# will return string 'NPS2'
nps_mode_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8']
class rsmi_power_label(str, Enum):
AVG_POWER = '(Avg)'
CURRENT_SOCKET_POWER = '(Socket)'
+122 -26
View File
@@ -77,7 +77,6 @@
#include "rocm_smi/rocm_smi64Config.h"
#include "rocm_smi/rocm_smi_logger.h"
using namespace ROCmLogging;
using namespace amd::smi;
static const uint32_t kMaxOverdriveLevel = 20;
@@ -147,14 +146,21 @@ static uint64_t freq_string_to_int(const std::vector<std::string> &freq_lines,
std::istringstream fs(freq_lines[i]);
uint32_t ind;
char junk_ch;
int ind;
float freq;
std::string junk;
std::string junk_str;
std::string units_str;
std::string star_str;
fs >> ind;
fs >> junk; // colon
if (fs.peek() == 'S') {
// Deep Sleep frequency is only supported by some GPUs
fs >> junk_ch;
} else {
// All other frequency indices are numbers
fs >> ind;
}
fs >> junk_str; // colon
fs >> freq;
fs >> units_str;
fs >> star_str;
@@ -1127,9 +1133,14 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_
}
f->num_supported = static_cast<uint32_t>(val_vec.size());
bool current = false;
f->current = RSMI_MAX_NUM_FREQUENCIES + 1; // init to an invalid value
// Deep Sleep frequency is only supported by some GPUs
// It is indicated by letter 'S' instead of the index number
f->has_deep_sleep = (val_vec[0][0] == 'S');
bool current = false;
for (uint32_t i = 0; i < f->num_supported; ++i) {
f->frequency[i] = freq_string_to_int(val_vec, &current, lanes, i);
@@ -1156,9 +1167,9 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_
sysvalue += " Previous Value";
sysvalue += ' ' + std::to_string(f->frequency[f->current]);
DEBUG_LOG("More than one current clock. ", sysvalue);
}
else
} else {
f->current = i;
}
}
}
@@ -1309,6 +1320,11 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
return RSMI_STATUS_UNEXPECTED_DATA;
}
// find last_item but skip empty lines
int last_item = val_vec.size()-1;
while (val_vec[last_item].empty() || val_vec[last_item][0] == 0)
last_item--;
p->curr_sclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_SCLK_label_array_index + 1);
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
@@ -1322,16 +1338,18 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
} else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") {
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 1);
// the upper memory frequency is the last
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 4);
nullptr, last_item);
return RSMI_STATUS_SUCCESS;
} else if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") {
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_SCLK_label_array_index + 3);
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 2);
// the upper memory frequency is the last
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
nullptr, kOD_MCLK_label_array_index + 5);
nullptr, last_item);
return RSMI_STATUS_SUCCESS;
} else {
return RSMI_STATUS_NOT_YET_IMPLEMENTED;
@@ -1708,6 +1726,8 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block,
{ RSMI_FW_BLOCK_ME, amd::smi::kDevFwVersionMe },
{ RSMI_FW_BLOCK_MEC, amd::smi::kDevFwVersionMec },
{ RSMI_FW_BLOCK_MEC2, amd::smi::kDevFwVersionMec2 },
{ RSMI_FW_BLOCK_MES, amd::smi::kDevFwVersionMes },
{ RSMI_FW_BLOCK_MES_KIQ, amd::smi::kDevFwVersionMesKiq },
{ RSMI_FW_BLOCK_PFP, amd::smi::kDevFwVersionPfp },
{ RSMI_FW_BLOCK_RLC, amd::smi::kDevFwVersionRlc },
{ RSMI_FW_BLOCK_RLC_SRLC, amd::smi::kDevFwVersionRlcSrlc },
@@ -2485,21 +2505,22 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
amd::smi::MonitorTypes mon_type = amd::smi::kMonInvalid;
uint16_t val_ui16;
static const std::map<rsmi_temperature_metric_t, amd::smi::MonitorTypes> kMetricTypeMap = {
{ RSMI_TEMP_CURRENT, amd::smi::kMonTemp },
{ RSMI_TEMP_MAX, amd::smi::kMonTempMax },
{ RSMI_TEMP_MIN, amd::smi::kMonTempMin },
{ RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst },
{ RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst },
{ RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical },
{ RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst },
{ RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency },
{ RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst },
{ RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin },
{ RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst },
{ RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset },
{ RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest },
{ RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest },
static const std::map<rsmi_temperature_metric_t, amd::smi::MonitorTypes>
kMetricTypeMap = {
{ RSMI_TEMP_CURRENT, amd::smi::kMonTemp },
{ RSMI_TEMP_MAX, amd::smi::kMonTempMax },
{ RSMI_TEMP_MIN, amd::smi::kMonTempMin },
{ RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst },
{ RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst },
{ RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical },
{ RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst },
{ RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency },
{ RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst },
{ RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin },
{ RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst },
{ RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset },
{ RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest },
{ RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest },
};
const auto mon_type_it = kMetricTypeMap.find(metric);
@@ -2584,7 +2605,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
return RSMI_STATUS_NOT_SUPPORTED;
}
*temperature = static_cast<int64_t>(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE;
*temperature =
static_cast<int64_t>(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE;
ss << __PRETTY_FUNCTION__ << " | ======= end ======= "
<< " | Success "
@@ -2919,6 +2941,80 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
CATCH
}
rsmi_status_t
rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power) {
TRY
std::ostringstream ss;
rsmi_status_t rsmiReturn = RSMI_STATUS_NOT_SUPPORTED;
std::string val_str;
uint32_t sensor_ind = 1; // socket_power sysfs files have 1-based indices
MonitorTypes mon_type = amd::smi::kMonPowerInput;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind="
<< std::to_string(dv_ind);
LOG_TRACE(ss);
if (socket_power == nullptr) {
rsmiReturn = RSMI_STATUS_INVALID_ARGS;
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: " << monitorTypesToString.at(mon_type)
<< " | Cause: socket_power was a null ptr reference"
<< " | Returning = "
<< getRSMIStatusString(rsmiReturn) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
CHK_SUPPORT_SUBVAR_ONLY(socket_power, sensor_ind)
DEVICE_MUTEX
if (dev->monitor() == nullptr) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: " << monitorTypesToString.at(mon_type)
<< " | Cause: hwmon monitor was a null ptr reference"
<< " | Returning = "
<< getRSMIStatusString(rsmiReturn) << " |";
LOG_ERROR(ss);
return rsmiReturn;
}
int ret = dev->monitor()->readMonitor(amd::smi::kMonPowerLabel,
sensor_ind, &val_str);
if (ret || val_str != "PPT" || val_str.size() != 3) {
if (ret != 0) {
rsmiReturn = amd::smi::ErrnoToRsmiStatus(ret);
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: " << monitorTypesToString.at(mon_type)
<< " | Cause: readMonitor() returned an error status"
<< " or Socket Power label did not show PPT or size of label data was"
<< " unexpected"
<< " | Returning = "
<< getRSMIStatusString(rsmiReturn) << " |";
LOG_ERROR(ss);
return rsmiReturn;
}
rsmiReturn = get_dev_mon_value(mon_type, dv_ind, sensor_ind,
socket_power);
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: " << monitorTypesToString.at(mon_type)
<< " | Data: " << *socket_power
<< " | Returning = "
<< getRSMIStatusString(rsmiReturn) << " |";
LOG_TRACE(ss);
return rsmiReturn;
CATCH
}
rsmi_status_t
rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power,
float *counter_resolution, uint64_t *timestamp) {
+10 -2
View File
@@ -68,8 +68,6 @@
#include "rocm_smi/rocm_smi_logger.h"
#include "shared_mutex.h" // NOLINT
using namespace ROCmLogging;
namespace amd {
namespace smi {
@@ -141,6 +139,8 @@ static const char *kDevFwVersionMcFName = "fw_version/mc_fw_version";
static const char *kDevFwVersionMeFName = "fw_version/me_fw_version";
static const char *kDevFwVersionMecFName = "fw_version/mec_fw_version";
static const char *kDevFwVersionMec2FName = "fw_version/mec2_fw_version";
static const char *kDevFwVersionMesFName = "fw_version/mes_fw_version";
static const char *kDevFwVersionMesKiqFName = "fw_version/mes_kiq_fw_version";
static const char *kDevFwVersionPfpFName = "fw_version/pfp_fw_version";
static const char *kDevFwVersionRlcFName = "fw_version/rlc_fw_version";
static const char *kDevFwVersionRlcSrlcFName = "fw_version/rlc_srlc_fw_version";
@@ -284,6 +284,8 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
{kDevFwVersionMe, kDevFwVersionMeFName},
{kDevFwVersionMec, kDevFwVersionMecFName},
{kDevFwVersionMec2, kDevFwVersionMec2FName},
{kDevFwVersionMes, kDevFwVersionMesFName},
{kDevFwVersionMesKiq, kDevFwVersionMesKiqFName},
{kDevFwVersionPfp, kDevFwVersionPfpFName},
{kDevFwVersionRlc, kDevFwVersionRlcFName},
{kDevFwVersionRlcSrlc, kDevFwVersionRlcSrlcFName},
@@ -347,6 +349,8 @@ static std::map<DevInfoTypes, uint8_t> kDevInfoVarTypeToRSMIVariant = {
{kDevFwVersionMe, RSMI_FW_BLOCK_ME},
{kDevFwVersionMec, RSMI_FW_BLOCK_MEC},
{kDevFwVersionMec2, RSMI_FW_BLOCK_MEC2},
{kDevFwVersionMes, RSMI_FW_BLOCK_MES},
{kDevFwVersionMesKiq, RSMI_FW_BLOCK_MES_KIQ},
{kDevFwVersionPfp, RSMI_FW_BLOCK_PFP},
{kDevFwVersionRlc, RSMI_FW_BLOCK_RLC},
{kDevFwVersionRlcSrlc, RSMI_FW_BLOCK_RLC_SRLC},
@@ -482,6 +486,8 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
kDevFwVersionMe,
kDevFwVersionMec,
kDevFwVersionMec2,
kDevFwVersionMes,
kDevFwVersionMesKiq,
kDevFwVersionPfp,
kDevFwVersionRlc,
kDevFwVersionRlcSrlc,
@@ -962,6 +968,8 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
case kDevFwVersionMe:
case kDevFwVersionMec:
case kDevFwVersionMec2:
case kDevFwVersionMes:
case kDevFwVersionMesKiq:
case kDevFwVersionPfp:
case kDevFwVersionRlc:
case kDevFwVersionRlcSrlc:
-1
View File
@@ -61,7 +61,6 @@
#include "rocm_smi/rocm_smi_exception.h"
#include "rocm_smi/rocm_smi_logger.h"
using namespace ROCmLogging;
using namespace amd::smi;
#define TRY try {
+56 -55
View File
@@ -71,9 +71,8 @@
#include "rocm_smi/rocm_smi_logger.h"
#include "rocm_smi/rocm_smi_main.h"
using namespace ROCmLogging;
Logger* Logger::m_Instance = nullptr;
ROCmLogging::Logger *ROCmLogging::Logger::m_Instance = nullptr;
// Log file name
// WARNING: File name should be changed here and
@@ -81,39 +80,39 @@ Logger* Logger::m_Instance = nullptr;
// in one place will cause a mismatch in these scripts,
// files may not have proper permissions, and logrotate
// would not function properly.
const std::string logPath = "/var/log/amd_smi_lib/";
const std::string logBaseFName = "AMD-SMI-lib";
const std::string logExtension = ".log";
const std::string logFileName = logPath + logBaseFName + logExtension;
#define LOGPATH "/var/log/amd_smi_lib/"
#define LOGBASE_FNAME "AMD-SMI-lib"
#define LOGEXTENSION ".log"
const char *logFileName = LOGPATH LOGBASE_FNAME LOGEXTENSION;
Logger::Logger() {
ROCmLogging::Logger::Logger() {
initialize_resources();
}
Logger::~Logger() {
ROCmLogging::Logger::~Logger() {
if (m_loggingIsOn) {
destroy_resources();
}
}
Logger* Logger::getInstance() throw() {
ROCmLogging::Logger* ROCmLogging::Logger::getInstance() throw() {
if (m_Instance == nullptr) {
m_Instance = new Logger();
m_Instance = new ROCmLogging::Logger();
}
return m_Instance;
}
void Logger::lock() {
void ROCmLogging::Logger::lock() {
m_Lock.lock();
}
void Logger::unlock() {
void ROCmLogging::Logger::unlock() {
m_Lock.unlock();
}
void Logger::logIntoFile(std::string& data) {
void ROCmLogging::Logger::logIntoFile(std::string& data) {
lock();
if(!m_File.is_open()) {
if (!m_File.is_open()) {
initialize_resources();
if (!m_File.is_open()) {
std::cout << "WARNING: re-initializing resources was unsuccessful."
@@ -127,24 +126,24 @@ void Logger::logIntoFile(std::string& data) {
unlock();
}
void Logger::logOnConsole(std::string& data) {
void ROCmLogging::Logger::logOnConsole(std::string& data) {
std::cout << getCurrentTime() << " " << data << std::endl;
}
// Returns: In string format, YY-MM-DD HH:MM:SS.microseconds
std::string Logger::getCurrentTime(void) {
using namespace std::chrono;
std::string ROCmLogging::Logger::getCurrentTime(void) {
std::string currentTime;
// get current time
auto now = system_clock::now();
auto now = std::chrono::system_clock::now();
// get number of milliseconds for the current second
// (remainder after division into seconds)
auto ms = duration_cast<microseconds>(now.time_since_epoch()) % 1000000;
auto ms = std::chrono::duration_cast<std::chrono::microseconds>(
now.time_since_epoch()) % 1000000;
// convert to std::time_t in order to convert to std::tm (broken time)
auto timer = system_clock::to_time_t(now);
auto timer = std::chrono::system_clock::to_time_t(now);
// convert to broken time
std::tm bt = *std::localtime(&timer);
@@ -159,7 +158,7 @@ std::string Logger::getCurrentTime(void) {
}
// Interface for Error Log
void Logger::error(const char* text) throw() {
void ROCmLogging::Logger::error(const char* text) throw() {
// By default, logging is disabled
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -182,18 +181,18 @@ void Logger::error(const char* text) throw() {
}
}
void Logger::error(std::string& text) throw() {
void ROCmLogging::Logger::error(std::string& text) throw() {
error(text.data());
}
void Logger::error(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::error(std::ostringstream& stream) throw() {
std::string text = stream.str();
error(text.data());
stream.str("");
}
// Interface for Alarm Log
void Logger::alarm(const char* text) throw() {
void ROCmLogging::Logger::alarm(const char* text) throw() {
// By default, logging is disabled (ie. no RSMI_LOGGING)
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -216,18 +215,18 @@ void Logger::alarm(const char* text) throw() {
}
}
void Logger::alarm(std::string& text) throw() {
void ROCmLogging::Logger::alarm(std::string& text) throw() {
alarm(text.data());
}
void Logger::alarm(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::alarm(std::ostringstream& stream) throw() {
std::string text = stream.str();
alarm(text.data());
stream.str("");
}
// Interface for Always Log
void Logger::always(const char* text) throw() {
void ROCmLogging::Logger::always(const char* text) throw() {
// By default, logging is disabled (ie. no RSMI_LOGGING)
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -250,18 +249,18 @@ void Logger::always(const char* text) throw() {
}
}
void Logger::always(std::string& text) throw() {
void ROCmLogging::Logger::always(std::string& text) throw() {
always(text.data());
}
void Logger::always(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::always(std::ostringstream& stream) throw() {
std::string text = stream.str();
always(text.data());
stream.str("");
}
// Interface for Buffer Log
void Logger::buffer(const char* text) throw() {
void ROCmLogging::Logger::buffer(const char* text) throw() {
// Buffer is the special case. So don't add log level
// and timestamp in the buffer message. Just log the raw bytes.
if ((m_LogType == FILE_LOG) && (m_LogLevel >= LOG_LEVEL_BUFFER)) {
@@ -284,18 +283,18 @@ void Logger::buffer(const char* text) throw() {
}
}
void Logger::buffer(std::string& text) throw() {
void ROCmLogging::Logger::buffer(std::string& text) throw() {
buffer(text.data());
}
void Logger::buffer(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::buffer(std::ostringstream& stream) throw() {
std::string text = stream.str();
buffer(text.data());
stream.str("");
}
// Interface for Info Log
void Logger::info(const char* text) throw() {
void ROCmLogging::Logger::info(const char* text) throw() {
// By default, logging is disabled (ie. no RSMI_LOGGING)
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -318,18 +317,18 @@ void Logger::info(const char* text) throw() {
}
}
void Logger::info(std::string& text) throw() {
void ROCmLogging::Logger::info(std::string& text) throw() {
info(text.data());
}
void Logger::info(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::info(std::ostringstream& stream) throw() {
std::string text = stream.str();
info(text.data());
stream.str("");
}
// Interface for Trace Log
void Logger::trace(const char* text) throw() {
void ROCmLogging::Logger::trace(const char* text) throw() {
// By default, logging is disabled (ie. no RSMI_LOGGING)
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -352,18 +351,18 @@ void Logger::trace(const char* text) throw() {
}
}
void Logger::trace(std::string& text) throw() {
void ROCmLogging::Logger::trace(std::string& text) throw() {
trace(text.data());
}
void Logger::trace(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::trace(std::ostringstream& stream) throw() {
std::string text = stream.str();
trace(text.data());
stream.str("");
}
// Interface for Debug Log
void Logger::debug(const char* text) throw() {
void ROCmLogging::Logger::debug(const char* text) throw() {
// By default, logging is disabled (ie. no RSMI_LOGGING)
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -386,51 +385,53 @@ void Logger::debug(const char* text) throw() {
}
}
void Logger::debug(std::string& text) throw() {
void ROCmLogging::Logger::debug(std::string& text) throw() {
debug(text.data());
}
void Logger::debug(std::ostringstream& stream) throw() {
void ROCmLogging::Logger::debug(std::ostringstream& stream) throw() {
std::string text = stream.str();
debug(text.data());
stream.str("");
}
// Interfaces to control log levels
void Logger::updateLogLevel(LogLevel logLevel) {
void ROCmLogging::Logger::updateLogLevel(LogLevel logLevel) {
m_LogLevel = logLevel;
}
void Logger::enableAllLogLevels() {
void ROCmLogging::Logger::enableAllLogLevels() {
m_LogLevel = ENABLE_LOG;
}
// Disable all log levels, except error and alarm
void Logger::disableLog() {
void ROCmLogging::Logger::disableLog() {
m_LogLevel = DISABLE_LOG;
}
// Interfaces to control log Types
void Logger::updateLogType(LogType logType) {
void ROCmLogging::Logger::updateLogType(LogType logType) {
m_LogType = logType;
}
void Logger::enableConsoleLogging() {
void ROCmLogging::Logger::enableConsoleLogging() {
m_LogType = CONSOLE;
}
void Logger::enableFileLogging() {
void ROCmLogging::Logger::enableFileLogging() {
m_LogType = FILE_LOG;
}
// Returns a string of details on current log settings
std::string Logger::getLogSettings() {
std::string ROCmLogging::Logger::getLogSettings() {
std::string logSettings;
if (m_File.is_open()) {
logSettings += "OpenStatus = File (" + logFileName + ") is open";
logSettings += "OpenStatus = File (" + std::string(logFileName)
+ ") is open";
} else {
logSettings += "OpenStatus = File (" + logFileName + ") is not open";
logSettings += "OpenStatus = File (" + std::string(logFileName)
+ ") is not open";
}
logSettings += ", ";
@@ -480,11 +481,11 @@ std::string Logger::getLogSettings() {
// Returns current reported enabled logging state. State is controlled by
// user's environment variable RSMI_LOGGING.
bool Logger::isLoggerEnabled() {
bool ROCmLogging::Logger::isLoggerEnabled() {
return m_loggingIsOn;
}
void Logger::initialize_resources() {
void ROCmLogging::Logger::initialize_resources() {
// By default, logging is disabled (ie. no RSMI_LOGGING)
// The check below allows us to toggle logging through RSMI_LOGGING
// set or unset
@@ -492,7 +493,7 @@ void Logger::initialize_resources() {
if (!m_loggingIsOn) {
return;
}
m_File.open(logFileName.c_str(), std::ios::out | std::ios::app);
m_File.open(logFileName, std::ios::out | std::ios::app);
m_LogLevel = LOG_LEVEL_TRACE;
// RSMI_LOGGING = 1, output to logs only
// RSMI_LOGGING = 2, output to console only
@@ -521,9 +522,9 @@ void Logger::initialize_resources() {
if (m_File.fail()) {
std::cout << "WARNING: Failed opening log file." << std::endl;
}
chmod(logFileName.c_str(), S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH);
chmod(logFileName, S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH);
}
void Logger::destroy_resources() {
void ROCmLogging::Logger::destroy_resources() {
m_File.close();
}
+4 -3
View File
@@ -68,7 +68,6 @@
#include "rocm_smi/rocm_smi_kfd.h"
#include "rocm_smi/rocm_smi_logger.h"
using namespace ROCmLogging;
static const char *kPathDRMRoot = "/sys/class/drm";
static const char *kPathHWMonRoot = "/sys/class/hwmon";
@@ -129,6 +128,8 @@ amd::smi::RocmSMI::devInfoTypesStrings = {
{amd::smi::kDevFwVersionMe, amdSMI + "kDevFwVersionMe"},
{amd::smi::kDevFwVersionMec, amdSMI + "kDevFwVersionMec"},
{amd::smi::kDevFwVersionMec2, amdSMI + "kDevFwVersionMec2"},
{amd::smi::kDevFwVersionMes, amdSMI + "kDevFwVersionMes"},
{amd::smi::kDevFwVersionMesKiq, amdSMI + "kDevFwVersionMesKiq"},
{amd::smi::kDevFwVersionPfp, amdSMI + "kDevFwVersionPfp"},
{amd::smi::kDevFwVersionRlc, amdSMI + "kDevFwVersionRlc"},
{amd::smi::kDevFwVersionRlcSrlc, amdSMI + "kDevFwVersionRlcSrlc"},
@@ -313,12 +314,12 @@ RocmSMI::Initialize(uint64_t flags) {
int i_ret;
LOG_ALWAYS("=============== ROCM SMI initialize ================");
Logger::getInstance()->enableAllLogLevels();
ROCmLogging::Logger::getInstance()->enableAllLogLevels();
// Leaving below to allow developers to check current log settings
// std::string logSettings = Logger::getInstance()->getLogSettings();
// std::cout << "Current log settings:\n" << logSettings << std::endl;
if (Logger::getInstance()->isLoggerEnabled()) {
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
logSystemDetails();
}
+8 -5
View File
@@ -3,7 +3,7 @@
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
@@ -58,8 +58,6 @@
#include "rocm_smi/rocm_smi_exception.h"
#include "rocm_smi/rocm_smi_logger.h"
using namespace ROCmLogging;
namespace amd {
namespace smi {
@@ -80,6 +78,8 @@ static const char *kMonPowerCapName = "power#_cap";
static const char *kMonPowerCapMaxName = "power#_cap_max";
static const char *kMonPowerCapMinName = "power#_cap_min";
static const char *kMonPowerAveName = "power#_average";
static const char *kMonPowerInputName = "power#_input";
static const char *kMonPowerLabelName = "power#_label";
static const char *kMonTempMaxName = "temp#_max";
static const char *kMonTempMinName = "temp#_min";
static const char *kMonTempMaxHystName = "temp#_max_hyst";
@@ -135,6 +135,8 @@ static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
{kMonPowerCapMax, kMonPowerCapMaxName},
{kMonPowerCapMin, kMonPowerCapMinName},
{kMonPowerAve, kMonPowerAveName},
{kMonPowerInput, kMonPowerInputName},
{kMonPowerLabel, kMonPowerLabelName},
{kMonTempMax, kMonTempMaxName},
{kMonTempMin, kMonTempMinName},
{kMonTempMaxHyst, kMonTempMaxHystName},
@@ -202,7 +204,8 @@ static const std::map<const char *, monitor_depends_t> kMonFuncDependsMap = {
.variants = {kMonInvalid},
}
},
{"rsmi_dev_power_cap_default_get", { .mandatory_depends = {kMonPowerCapDefaultName},
{"rsmi_dev_power_cap_default_get", { .mandatory_depends =
{kMonPowerCapDefaultName},
.variants = {kMonInvalid},
}
},
@@ -616,7 +619,7 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) {
supported_monitors = intersect;
}
if (!supported_monitors.empty()) {
for (unsigned long & supported_monitor : supported_monitors) {
for (uint64_t &supported_monitor : supported_monitors) {
if (m_type == eDefaultMonitor) {
assert(supported_monitor > 0);
supported_monitor |=
-1
View File
@@ -70,7 +70,6 @@
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi_logger.h"
using namespace ROCmLogging;
namespace amd {
namespace smi {
@@ -111,6 +111,7 @@ void TestPowerRead::Run(void) {
std::cout << "\t**Power Cap Range: " << info.min_power_cap << " to " <<
info.max_power_cap << " uW" << std::endl;
}
// TODO: Add current_socket_power tests
}
}
}
+2
View File
@@ -56,6 +56,8 @@ static const std::map<amdsmi_fw_block_t, const char *> kDevFWNameMap = {
{FW_ID_CP_ME, "me"},
{FW_ID_CP_MEC1, "mec1"},
{FW_ID_CP_MEC2, "mec2"},
{FW_ID_CP_MES, "mes"},
{FW_ID_MES_KIQ, "mes_kiq"}, // TODO: double check
{FW_ID_CP_PFP, "pfp"},
{FW_ID_RLC, "rlc"},
{FW_ID_RLC_SRLG, "rlc_srlg"},