Merge remote-tracking branch 'rocmsmi/amd-staging' into HEAD
Change-Id: I0661926c10eef2bc32b83d9a63a3a6eb6991e781
This commit is contained in:
@@ -290,6 +290,7 @@ typedef enum {
|
||||
FW_ID_SDMA_TH0,
|
||||
FW_ID_SDMA_TH1,
|
||||
FW_ID_CP_MES,
|
||||
FW_ID_MES_KIQ,
|
||||
FW_ID_MES_STACK,
|
||||
FW_ID_MES_THREAD1,
|
||||
FW_ID_MES_THREAD1_STACK,
|
||||
|
||||
@@ -444,38 +444,39 @@ amdsmi_fw_block_t__enumvalues = {
|
||||
44: 'FW_ID_SDMA_TH0',
|
||||
45: 'FW_ID_SDMA_TH1',
|
||||
46: 'FW_ID_CP_MES',
|
||||
47: 'FW_ID_MES_STACK',
|
||||
48: 'FW_ID_MES_THREAD1',
|
||||
49: 'FW_ID_MES_THREAD1_STACK',
|
||||
50: 'FW_ID_RLX6',
|
||||
51: 'FW_ID_RLX6_DRAM_BOOT',
|
||||
52: 'FW_ID_RS64_ME',
|
||||
53: 'FW_ID_RS64_ME_P0_DATA',
|
||||
54: 'FW_ID_RS64_ME_P1_DATA',
|
||||
55: 'FW_ID_RS64_PFP',
|
||||
56: 'FW_ID_RS64_PFP_P0_DATA',
|
||||
57: 'FW_ID_RS64_PFP_P1_DATA',
|
||||
58: 'FW_ID_RS64_MEC',
|
||||
59: 'FW_ID_RS64_MEC_P0_DATA',
|
||||
60: 'FW_ID_RS64_MEC_P1_DATA',
|
||||
61: 'FW_ID_RS64_MEC_P2_DATA',
|
||||
62: 'FW_ID_RS64_MEC_P3_DATA',
|
||||
63: 'FW_ID_PPTABLE',
|
||||
64: 'FW_ID_PSP_SOC',
|
||||
65: 'FW_ID_PSP_DBG',
|
||||
66: 'FW_ID_PSP_INTF',
|
||||
67: 'FW_ID_RLX6_CORE1',
|
||||
68: 'FW_ID_RLX6_DRAM_BOOT_CORE1',
|
||||
69: 'FW_ID_RLCV_LX7',
|
||||
70: 'FW_ID_RLC_SAVE_RESTORE_LIST',
|
||||
71: 'FW_ID_ASD',
|
||||
72: 'FW_ID_TA_RAS',
|
||||
73: 'FW_ID_XGMI',
|
||||
74: 'FW_ID_RLC_SRLG',
|
||||
75: 'FW_ID_RLC_SRLS',
|
||||
76: 'FW_ID_SMC',
|
||||
77: 'FW_ID_DMCU',
|
||||
78: 'FW_ID__MAX',
|
||||
47: 'FW_ID_MES_KIQ',
|
||||
48: 'FW_ID_MES_STACK',
|
||||
49: 'FW_ID_MES_THREAD1',
|
||||
50: 'FW_ID_MES_THREAD1_STACK',
|
||||
51: 'FW_ID_RLX6',
|
||||
52: 'FW_ID_RLX6_DRAM_BOOT',
|
||||
53: 'FW_ID_RS64_ME',
|
||||
54: 'FW_ID_RS64_ME_P0_DATA',
|
||||
55: 'FW_ID_RS64_ME_P1_DATA',
|
||||
56: 'FW_ID_RS64_PFP',
|
||||
57: 'FW_ID_RS64_PFP_P0_DATA',
|
||||
58: 'FW_ID_RS64_PFP_P1_DATA',
|
||||
59: 'FW_ID_RS64_MEC',
|
||||
60: 'FW_ID_RS64_MEC_P0_DATA',
|
||||
61: 'FW_ID_RS64_MEC_P1_DATA',
|
||||
62: 'FW_ID_RS64_MEC_P2_DATA',
|
||||
63: 'FW_ID_RS64_MEC_P3_DATA',
|
||||
64: 'FW_ID_PPTABLE',
|
||||
65: 'FW_ID_PSP_SOC',
|
||||
66: 'FW_ID_PSP_DBG',
|
||||
67: 'FW_ID_PSP_INTF',
|
||||
68: 'FW_ID_RLX6_CORE1',
|
||||
69: 'FW_ID_RLX6_DRAM_BOOT_CORE1',
|
||||
70: 'FW_ID_RLCV_LX7',
|
||||
71: 'FW_ID_RLC_SAVE_RESTORE_LIST',
|
||||
72: 'FW_ID_ASD',
|
||||
73: 'FW_ID_TA_RAS',
|
||||
74: 'FW_ID_XGMI',
|
||||
75: 'FW_ID_RLC_SRLG',
|
||||
76: 'FW_ID_RLC_SRLS',
|
||||
77: 'FW_ID_SMC',
|
||||
78: 'FW_ID_DMCU',
|
||||
79: 'FW_ID__MAX',
|
||||
}
|
||||
FW_ID_SMU = 1
|
||||
FW_ID_FIRST = 1
|
||||
@@ -524,38 +525,39 @@ FW_ID_IMU_IRAM = 43
|
||||
FW_ID_SDMA_TH0 = 44
|
||||
FW_ID_SDMA_TH1 = 45
|
||||
FW_ID_CP_MES = 46
|
||||
FW_ID_MES_STACK = 47
|
||||
FW_ID_MES_THREAD1 = 48
|
||||
FW_ID_MES_THREAD1_STACK = 49
|
||||
FW_ID_RLX6 = 50
|
||||
FW_ID_RLX6_DRAM_BOOT = 51
|
||||
FW_ID_RS64_ME = 52
|
||||
FW_ID_RS64_ME_P0_DATA = 53
|
||||
FW_ID_RS64_ME_P1_DATA = 54
|
||||
FW_ID_RS64_PFP = 55
|
||||
FW_ID_RS64_PFP_P0_DATA = 56
|
||||
FW_ID_RS64_PFP_P1_DATA = 57
|
||||
FW_ID_RS64_MEC = 58
|
||||
FW_ID_RS64_MEC_P0_DATA = 59
|
||||
FW_ID_RS64_MEC_P1_DATA = 60
|
||||
FW_ID_RS64_MEC_P2_DATA = 61
|
||||
FW_ID_RS64_MEC_P3_DATA = 62
|
||||
FW_ID_PPTABLE = 63
|
||||
FW_ID_PSP_SOC = 64
|
||||
FW_ID_PSP_DBG = 65
|
||||
FW_ID_PSP_INTF = 66
|
||||
FW_ID_RLX6_CORE1 = 67
|
||||
FW_ID_RLX6_DRAM_BOOT_CORE1 = 68
|
||||
FW_ID_RLCV_LX7 = 69
|
||||
FW_ID_RLC_SAVE_RESTORE_LIST = 70
|
||||
FW_ID_ASD = 71
|
||||
FW_ID_TA_RAS = 72
|
||||
FW_ID_XGMI = 73
|
||||
FW_ID_RLC_SRLG = 74
|
||||
FW_ID_RLC_SRLS = 75
|
||||
FW_ID_SMC = 76
|
||||
FW_ID_DMCU = 77
|
||||
FW_ID__MAX = 78
|
||||
FW_ID_MES_KIQ = 47
|
||||
FW_ID_MES_STACK = 48
|
||||
FW_ID_MES_THREAD1 = 49
|
||||
FW_ID_MES_THREAD1_STACK = 50
|
||||
FW_ID_RLX6 = 51
|
||||
FW_ID_RLX6_DRAM_BOOT = 52
|
||||
FW_ID_RS64_ME = 53
|
||||
FW_ID_RS64_ME_P0_DATA = 54
|
||||
FW_ID_RS64_ME_P1_DATA = 55
|
||||
FW_ID_RS64_PFP = 56
|
||||
FW_ID_RS64_PFP_P0_DATA = 57
|
||||
FW_ID_RS64_PFP_P1_DATA = 58
|
||||
FW_ID_RS64_MEC = 59
|
||||
FW_ID_RS64_MEC_P0_DATA = 60
|
||||
FW_ID_RS64_MEC_P1_DATA = 61
|
||||
FW_ID_RS64_MEC_P2_DATA = 62
|
||||
FW_ID_RS64_MEC_P3_DATA = 63
|
||||
FW_ID_PPTABLE = 64
|
||||
FW_ID_PSP_SOC = 65
|
||||
FW_ID_PSP_DBG = 66
|
||||
FW_ID_PSP_INTF = 67
|
||||
FW_ID_RLX6_CORE1 = 68
|
||||
FW_ID_RLX6_DRAM_BOOT_CORE1 = 69
|
||||
FW_ID_RLCV_LX7 = 70
|
||||
FW_ID_RLC_SAVE_RESTORE_LIST = 71
|
||||
FW_ID_ASD = 72
|
||||
FW_ID_TA_RAS = 73
|
||||
FW_ID_XGMI = 74
|
||||
FW_ID_RLC_SRLG = 75
|
||||
FW_ID_RLC_SRLS = 76
|
||||
FW_ID_SMC = 77
|
||||
FW_ID_DMCU = 78
|
||||
FW_ID__MAX = 79
|
||||
amdsmi_fw_block_t = ctypes.c_uint32 # enum
|
||||
|
||||
# values for enumeration 'amdsmi_vram_type_t'
|
||||
@@ -734,7 +736,7 @@ struct_amdsmi_fw_info_t._pack_ = 1 # source:False
|
||||
struct_amdsmi_fw_info_t._fields_ = [
|
||||
('num_fw_info', ctypes.c_ubyte),
|
||||
('PADDING_0', ctypes.c_ubyte * 7),
|
||||
('fw_info_list', struct_fw_info_list_ * 78),
|
||||
('fw_info_list', struct_fw_info_list_ * 79),
|
||||
('reserved', ctypes.c_uint32 * 7),
|
||||
('PADDING_1', ctypes.c_ubyte * 4),
|
||||
]
|
||||
@@ -1853,13 +1855,13 @@ __all__ = \
|
||||
'FW_ID_CP_PFP', 'FW_ID_CP_PM4', 'FW_ID_DFC', 'FW_ID_DMCU',
|
||||
'FW_ID_DMCU_ERAM', 'FW_ID_DMCU_ISR', 'FW_ID_DRV_CAP',
|
||||
'FW_ID_FIRST', 'FW_ID_IMU_DRAM', 'FW_ID_IMU_IRAM', 'FW_ID_ISP',
|
||||
'FW_ID_MC', 'FW_ID_MES_STACK', 'FW_ID_MES_THREAD1',
|
||||
'FW_ID_MES_THREAD1_STACK', 'FW_ID_MMSCH', 'FW_ID_PPTABLE',
|
||||
'FW_ID_PSP_BL', 'FW_ID_PSP_DBG', 'FW_ID_PSP_INTF',
|
||||
'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC', 'FW_ID_PSP_SOSDRV',
|
||||
'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV', 'FW_ID_PSP_TOC',
|
||||
'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC', 'FW_ID_RLCV_LX7',
|
||||
'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL',
|
||||
'FW_ID_MC', 'FW_ID_MES_KIQ', 'FW_ID_MES_STACK',
|
||||
'FW_ID_MES_THREAD1', 'FW_ID_MES_THREAD1_STACK', 'FW_ID_MMSCH',
|
||||
'FW_ID_PPTABLE', 'FW_ID_PSP_BL', 'FW_ID_PSP_DBG',
|
||||
'FW_ID_PSP_INTF', 'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC',
|
||||
'FW_ID_PSP_SOSDRV', 'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV',
|
||||
'FW_ID_PSP_TOC', 'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC',
|
||||
'FW_ID_RLCV_LX7', 'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL',
|
||||
'FW_ID_RLC_RESTORE_LIST_GPM_MEM',
|
||||
'FW_ID_RLC_RESTORE_LIST_SRM_MEM', 'FW_ID_RLC_SAVE_RESTORE_LIST',
|
||||
'FW_ID_RLC_SRLG', 'FW_ID_RLC_SRLS', 'FW_ID_RLC_V', 'FW_ID_RLX6',
|
||||
|
||||
@@ -70,7 +70,8 @@ extern "C" {
|
||||
*/
|
||||
|
||||
//! Guaranteed maximum possible number of supported frequencies
|
||||
#define RSMI_MAX_NUM_FREQUENCIES 32
|
||||
//! (32 normal + 1 sleep frequency)
|
||||
#define RSMI_MAX_NUM_FREQUENCIES 33
|
||||
|
||||
//! Maximum possible value for fan speed. Should be used as the denominator
|
||||
//! when determining fan speed percentage.
|
||||
@@ -639,6 +640,8 @@ typedef enum {
|
||||
RSMI_FW_BLOCK_ME,
|
||||
RSMI_FW_BLOCK_MEC,
|
||||
RSMI_FW_BLOCK_MEC2,
|
||||
RSMI_FW_BLOCK_MES,
|
||||
RSMI_FW_BLOCK_MES_KIQ,
|
||||
RSMI_FW_BLOCK_PFP,
|
||||
RSMI_FW_BLOCK_RLC,
|
||||
RSMI_FW_BLOCK_RLC_SRLC,
|
||||
@@ -759,6 +762,11 @@ typedef rsmi_power_profile_status_t rsmi_power_profile_status;
|
||||
* @brief This structure holds information about clock frequencies.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* Deep Sleep frequency is only supported by some GPUs
|
||||
*/
|
||||
bool has_deep_sleep;
|
||||
|
||||
/**
|
||||
* The number of supported frequencies
|
||||
*/
|
||||
@@ -1757,6 +1765,30 @@ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask);
|
||||
rsmi_status_t
|
||||
rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power);
|
||||
|
||||
/**
|
||||
* @brief Get the current socket power (also known as instant
|
||||
* power) of the device index provided.
|
||||
*
|
||||
* @details Given a device index @p dv_ind and a pointer to a uint64_t
|
||||
* @p socket_power, this function will write the current socket power
|
||||
* (in microwatts) to the uint64_t pointed to by @p socket_power.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] socket_power a pointer to uint64_t to which the current
|
||||
* socket power will be written to. If this parameter is nullptr,
|
||||
* this function will return ::RSMI_STATUS_INVALID_ARGS if the function is
|
||||
* supported with the provided, arguments and ::RSMI_STATUS_NOT_SUPPORTED
|
||||
* if it is not supported with the provided arguments.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power);
|
||||
|
||||
/**
|
||||
* @brief Get the energy accumulator counter of the device with provided
|
||||
* device index.
|
||||
|
||||
@@ -146,6 +146,8 @@ enum DevInfoTypes {
|
||||
kDevFwVersionMe,
|
||||
kDevFwVersionMec,
|
||||
kDevFwVersionMec2,
|
||||
kDevFwVersionMes,
|
||||
kDevFwVersionMesKiq,
|
||||
kDevFwVersionPfp,
|
||||
kDevFwVersionRlc,
|
||||
kDevFwVersionRlcSrlc,
|
||||
|
||||
@@ -130,18 +130,18 @@ class Logger {
|
||||
break;
|
||||
}
|
||||
return *getInstance();
|
||||
};
|
||||
}
|
||||
|
||||
Logger &operator<<(const char* s) {
|
||||
return operator<<(std::string(s));
|
||||
};
|
||||
}
|
||||
|
||||
template <class T> Logger &operator<<(const T &v) {
|
||||
std::ostringstream s;
|
||||
s << v;
|
||||
std::string str = s.str();
|
||||
return operator<<(str);
|
||||
};
|
||||
}
|
||||
|
||||
// Interface for Error Log
|
||||
void error(const char* text) throw();
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -67,6 +67,8 @@ enum MonitorTypes {
|
||||
kMonPowerCapMax,
|
||||
kMonPowerCapMin,
|
||||
kMonPowerAve,
|
||||
kMonPowerInput,
|
||||
kMonPowerLabel,
|
||||
kMonTempMax,
|
||||
kMonTempMin,
|
||||
kMonTempMaxHyst,
|
||||
@@ -94,45 +96,47 @@ enum MonitorTypes {
|
||||
kMonInvalid = 0xFFFFFFFF,
|
||||
};
|
||||
|
||||
const std::map<MonitorTypes,std::string> monitorTypesToString {
|
||||
{MonitorTypes::kMonName, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTemp, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonFanSpeed, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonMaxFanSpeed, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonFanRPMs, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonFanCntrlEnable, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCap, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCapDefault, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCapMax, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerCapMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonPowerAve, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMax, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMaxHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempMinHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCritical, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCriticalHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempEmergency, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempEmergencyHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCritMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempCritMinHyst, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempOffset, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempLowest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempHighest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonTempLabel, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVolt, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMax, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMinCrit, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMin, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltMaxCrit, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltAverage, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltLowest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltHighest, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonVoltLabel, "amd::smi::kMonName"},
|
||||
{MonitorTypes::kMonInvalid, "amd::smi::kMonName"},
|
||||
const std::map<MonitorTypes, std::string> monitorTypesToString{
|
||||
{MonitorTypes::kMonName, "MonitorTypes::kMonName"},
|
||||
{MonitorTypes::kMonTemp, "MonitorTypes::kMonTemp"},
|
||||
{MonitorTypes::kMonFanSpeed, "MonitorTypes::kMonFanSpeed"},
|
||||
{MonitorTypes::kMonMaxFanSpeed, "MonitorTypes::kMonMaxFanSpeed"},
|
||||
{MonitorTypes::kMonFanRPMs, "MonitorTypes::kMonFanRPMs"},
|
||||
{MonitorTypes::kMonFanCntrlEnable, "MonitorTypes::kMonFanCntrlEnable"},
|
||||
{MonitorTypes::kMonPowerCap, "MonitorTypes::kMonPowerCap"},
|
||||
{MonitorTypes::kMonPowerCapDefault, "MonitorTypes::kMonPowerCapDefault"},
|
||||
{MonitorTypes::kMonPowerCapMax, "MonitorTypes::kMonPowerCapMax"},
|
||||
{MonitorTypes::kMonPowerCapMin, "MonitorTypes::kMonPowerCapMin"},
|
||||
{MonitorTypes::kMonPowerAve, "MonitorTypes::kMonPowerAve"},
|
||||
{MonitorTypes::kMonPowerInput, "MonitorTypes::kMonPowerInput"},
|
||||
{MonitorTypes::kMonPowerLabel, "MonitorTypes::kMonPowerLabel"},
|
||||
{MonitorTypes::kMonTempMax, "MonitorTypes::kMonTempMax"},
|
||||
{MonitorTypes::kMonTempMin, "MonitorTypes::kMonTempMin"},
|
||||
{MonitorTypes::kMonTempMaxHyst, "MonitorTypes::kMonTempMaxHyst"},
|
||||
{MonitorTypes::kMonTempMinHyst, "MonitorTypes::kMonTempMinHyst"},
|
||||
{MonitorTypes::kMonTempCritical, "MonitorTypes::kMonTempCritical"},
|
||||
{MonitorTypes::kMonTempCriticalHyst, "MonitorTypes::kMonTempCriticalHyst"},
|
||||
{MonitorTypes::kMonTempEmergency, "MonitorTypes::kMonTempEmergency"},
|
||||
{MonitorTypes::kMonTempEmergencyHyst,
|
||||
"MonitorTypes::kMonTempEmergencyHyst"},
|
||||
{MonitorTypes::kMonTempCritMin, "MonitorTypes::kMonTempCritMin"},
|
||||
{MonitorTypes::kMonTempCritMinHyst, "MonitorTypes::kMonTempCritMinHyst"},
|
||||
{MonitorTypes::kMonTempOffset, "MonitorTypes::kMonTempOffset"},
|
||||
{MonitorTypes::kMonTempLowest, "MonitorTypes::kMonTempLowest"},
|
||||
{MonitorTypes::kMonTempHighest, "MonitorTypes::kMonTempHighest"},
|
||||
{MonitorTypes::kMonTempLabel, "MonitorTypes::kMonTempLabel"},
|
||||
{MonitorTypes::kMonVolt, "MonitorTypes::kMonVolt"},
|
||||
{MonitorTypes::kMonVoltMax, "MonitorTypes::kMonVoltMax"},
|
||||
{MonitorTypes::kMonVoltMinCrit, "MonitorTypes::kMonVoltMinCrit"},
|
||||
{MonitorTypes::kMonVoltMin, "MonitorTypes::kMonVoltMin"},
|
||||
{MonitorTypes::kMonVoltMaxCrit, "MonitorTypes::kMonVoltMaxCrit"},
|
||||
{MonitorTypes::kMonVoltAverage, "MonitorTypes::kMonVoltAverage"},
|
||||
{MonitorTypes::kMonVoltLowest, "MonitorTypes::kMonVoltLowest"},
|
||||
{MonitorTypes::kMonVoltHighest, "MonitorTypes::kMonVoltHighest"},
|
||||
{MonitorTypes::kMonVoltLabel, "MonitorTypes::kMonVoltLabel"},
|
||||
{MonitorTypes::kMonInvalid, "MonitorTypes::kMonInvalid"},
|
||||
};
|
||||
|
||||
|
||||
class Monitor {
|
||||
public:
|
||||
explicit Monitor(std::string path, RocmSMI_env_vars const *e);
|
||||
|
||||
@@ -74,7 +74,7 @@ Display Options:
|
||||
-a, --showallinfo Show Temperature, Fan and Clock values
|
||||
|
||||
Topology:
|
||||
-i, --showid Show GPU ID
|
||||
-i, --showid Show DEVICE ID
|
||||
-v, --showvbios Show VBIOS version
|
||||
--showdriverversion Show kernel driver version
|
||||
--showfwinfo [BLOCK [BLOCK ...]] Show FW information
|
||||
|
||||
@@ -45,9 +45,8 @@ CLOCK_JSON_VERSION = 1
|
||||
|
||||
headerString = ' ROCm System Management Interface '
|
||||
footerString = ' End of ROCm SMI Log '
|
||||
|
||||
# Output formatting
|
||||
appWidth = 100
|
||||
appWidth = 90
|
||||
deviceList = []
|
||||
|
||||
# Enable or disable serialized format
|
||||
@@ -383,8 +382,8 @@ def getPidList():
|
||||
return
|
||||
|
||||
|
||||
def getPower(device, silent=False):
|
||||
""" Return the current power level of a given device
|
||||
def getAvgPower(device, silent=False):
|
||||
""" Return the average power level of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
@@ -393,7 +392,21 @@ def getPower(device, silent=False):
|
||||
power = c_uint32()
|
||||
ret = rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power))
|
||||
if rsmi_ret_ok(ret, device, 'get_power_avg', silent):
|
||||
return power.value / 1000000
|
||||
return str(power.value / 1000000)
|
||||
return 'N/A'
|
||||
|
||||
def getCurrentSocketPower(device, silent=False):
|
||||
""" Return the current (also known as instant)
|
||||
socket power of a given device
|
||||
|
||||
@param device: DRM device identifier
|
||||
@param silent=Turn on to silence error output
|
||||
(you plan to handle manually). Default is off.
|
||||
"""
|
||||
power = c_uint32()
|
||||
ret = rocmsmi.rsmi_dev_current_socket_power_get(device, byref(power))
|
||||
if rsmi_ret_ok(ret, device, 'get_socket_power', silent):
|
||||
return str(power.value / 1000000)
|
||||
return 'N/A'
|
||||
|
||||
|
||||
@@ -437,7 +450,7 @@ def findFirstAvailableTemp(device):
|
||||
temp = c_int64(0)
|
||||
metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT
|
||||
ret_temp = "N/A"
|
||||
ret_temp_type = "(Unknown)"
|
||||
ret_temp_type = temp_type_lst[0]
|
||||
for i, templist_val in enumerate(temp_type_lst):
|
||||
ret = rocmsmi.rsmi_dev_temp_metric_get(c_uint32(device), i, metric, byref(temp))
|
||||
if rsmi_ret_ok(ret, device, 'get_temp_metric_' + templist_val, silent=True):
|
||||
@@ -448,6 +461,37 @@ def findFirstAvailableTemp(device):
|
||||
continue
|
||||
return (ret_temp_type, ret_temp)
|
||||
|
||||
def getTemperatureLabel(deviceList):
|
||||
""" Discovers the the first identified power label
|
||||
|
||||
Returns a string label value
|
||||
@param device: DRM device identifier
|
||||
"""
|
||||
# Default label is Edge
|
||||
tempLabel = temp_type_lst[0].lower()
|
||||
if len(deviceList) < 1:
|
||||
return tempLabel
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
tempLabel = temp_type.lower().replace('(', '').replace(')', '')
|
||||
return tempLabel
|
||||
|
||||
def getPowerLabel(deviceList):
|
||||
""" Discovers the the first identified power label
|
||||
|
||||
Returns a string label value
|
||||
@param device: DRM device identifier
|
||||
"""
|
||||
power = c_int64(0)
|
||||
# Default label is AvgPower
|
||||
powerLabel = rsmi_power_label.AVG_POWER
|
||||
if len(deviceList) < 1:
|
||||
return powerLabel
|
||||
device=deviceList[0]
|
||||
power = getCurrentSocketPower(device, True)
|
||||
if power != '0.0' and power != 'N/A':
|
||||
powerLabel = rsmi_power_label.CURRENT_SOCKET_POWER
|
||||
return powerLabel
|
||||
|
||||
def getVbiosVersion(device, silent=False):
|
||||
""" Returns the VBIOS version for a given device
|
||||
|
||||
@@ -679,23 +723,35 @@ def printListLog(metricName, valuesList):
|
||||
print(listStr + line)
|
||||
|
||||
|
||||
def printLogSpacer(displayString=None, fill='='):
|
||||
def printLogSpacer(displayString=None, fill='=', contentSizeToFit=0):
|
||||
""" Prints [name of the option]/[name of the program] in the spacer to explain data below
|
||||
|
||||
If no parameters are given, a default fill of the '=' string is used in the spacer
|
||||
|
||||
@param displayString: name of item to be displayed inside of the log spacer
|
||||
@param fill: padding string which surrounds the given display string
|
||||
@param contentSizeToFit: providing an integer > 0 allows
|
||||
ability to dynamically change output padding/fill based on this value
|
||||
instead of appWidth. Handy for concise info output.
|
||||
"""
|
||||
global appWidth, PRINT_JSON
|
||||
resizeValue = appWidth
|
||||
if contentSizeToFit != 0:
|
||||
resizeValue = contentSizeToFit
|
||||
if resizeValue % 2: # if odd -> make even
|
||||
resizeValue += 1
|
||||
# leaving below to check if resizing works properly
|
||||
# print("resizeVal=" +str(resizeValue) + "; appWidth=" + str(appWidth) +
|
||||
# "; contentSizeToFit=" + str(contentSizeToFit) + "; fill=" + fill)
|
||||
|
||||
if not PRINT_JSON:
|
||||
if displayString:
|
||||
if len(displayString) % 2:
|
||||
displayString += fill
|
||||
logSpacer = fill * int((appWidth - (len(displayString))) / 2) + displayString + fill * int(
|
||||
(appWidth - (len(displayString))) / 2)
|
||||
logSpacer = fill * int((resizeValue - (len(displayString))) / 2) + displayString + fill * int(
|
||||
(resizeValue - (len(displayString))) / 2)
|
||||
else:
|
||||
logSpacer = fill * appWidth
|
||||
logSpacer = fill * resizeValue
|
||||
print(logSpacer)
|
||||
|
||||
|
||||
@@ -1630,22 +1686,15 @@ def showAllConcise(deviceList):
|
||||
print('ERROR: Cannot print JSON/CSV output for concise output')
|
||||
sys.exit(1)
|
||||
|
||||
""" Place holder for the actual max size """
|
||||
MAX_ALL_CONCISE_WIDTH = 100
|
||||
appWidth_temp = appWidth
|
||||
appWidth = MAX_ALL_CONCISE_WIDTH
|
||||
silent = True
|
||||
|
||||
printLogSpacer(' Concise Info ')
|
||||
deviceList.sort()
|
||||
temp_type = '(' + temp_type_lst[0] + ')'
|
||||
if len(deviceList) >= 1:
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
available_temp_type = temp_type.lower()
|
||||
available_temp_type = available_temp_type.replace('(', '')
|
||||
available_temp_type = available_temp_type.replace(')', '')
|
||||
header = ['GPU', '[Model : Revision]', 'Temp', 'AvgPwr', 'Partitions', 'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%']
|
||||
subheader = ['', 'Name (20 chars)', temp_type, '', '(Mem, Compute)', '', '', '', '', '', '', '']
|
||||
available_temp_type = getTemperatureLabel(deviceList)
|
||||
temp_type = "(" + available_temp_type.capitalize() + ")"
|
||||
header=['Device', '[Model : Revision]', 'Temp', 'Power', 'Partitions',
|
||||
'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%']
|
||||
subheader = ['', 'Name (20 chars)', temp_type, getPowerLabel(deviceList),
|
||||
'(Mem, Compute)', '', '', '', '', '', '', '']
|
||||
# add additional spaces to match header
|
||||
for idx, item in enumerate(subheader):
|
||||
header_size = len(header[idx])
|
||||
@@ -1667,11 +1716,17 @@ def showAllConcise(deviceList):
|
||||
temp_val = str(getTemp(device, available_temp_type, silent))
|
||||
if temp_val != 'N/A':
|
||||
temp_val += degree_sign + 'C'
|
||||
avgPwr = str(getPower(device))
|
||||
if avgPwr != '0.0' and avgPwr != 'N/A':
|
||||
socketPwr = getCurrentSocketPower(device, True)
|
||||
avgPwr = getAvgPower(device, True)
|
||||
powerVal = 'N/A'
|
||||
if socketPwr != '0.0' and socketPwr != 'N/A':
|
||||
socketPwr += 'W'
|
||||
powerVal=socketPwr
|
||||
elif avgPwr != '0.0' and avgPwr != 'N/A':
|
||||
avgPwr += 'W'
|
||||
powerVal=avgPwr
|
||||
else:
|
||||
avgPwr = 'N/A'
|
||||
powerVal = 'N/A'
|
||||
combined_partition = (getMemoryPartition(device, silent) + ", "
|
||||
+ getComputePartition(device, silent))
|
||||
sclk = showCurrentClocks([device], 'sclk', concise=silent)
|
||||
@@ -1704,10 +1759,10 @@ def showAllConcise(deviceList):
|
||||
'', '', '', '']
|
||||
gpu_dev_product_info_top_name = gpu_dev_product_info_names[1]
|
||||
|
||||
values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val, avgPwr,
|
||||
combined_partition, sclk, mclk,
|
||||
fan, str(perf).lower(), pwrCap,
|
||||
mem_use_pct, gpu_busy]
|
||||
values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val,
|
||||
powerVal, combined_partition, sclk, mclk,
|
||||
fan, str(perf).lower(), pwrCap, mem_use_pct,
|
||||
gpu_busy]
|
||||
|
||||
val_widths = {}
|
||||
for device in deviceList:
|
||||
@@ -1716,10 +1771,17 @@ def showAllConcise(deviceList):
|
||||
for device in deviceList:
|
||||
for col in range(len(val_widths[device])):
|
||||
max_widths[col] = max(max_widths[col], val_widths[device][col])
|
||||
printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)), None)
|
||||
printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader)),
|
||||
None, useItalics=True)
|
||||
printLogSpacer(fill='=')
|
||||
|
||||
########################
|
||||
# Display concise info #
|
||||
########################
|
||||
header_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header))
|
||||
subheader_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader))
|
||||
printLogSpacer(headerString, contentSizeToFit=len(header_output))
|
||||
printLogSpacer(' Concise Info ', contentSizeToFit=len(header_output))
|
||||
printLog(None, header_output, None)
|
||||
printLog(None, subheader_output, None, useItalics=True)
|
||||
printLogSpacer(fill='=', contentSizeToFit=len(header_output))
|
||||
|
||||
for device in deviceList:
|
||||
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
|
||||
@@ -1730,9 +1792,8 @@ def showAllConcise(deviceList):
|
||||
printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in
|
||||
zip(range(len(max_widths)), values['card%s_Info' % (str(device))])), None)
|
||||
|
||||
printLogSpacer()
|
||||
""" Restore original max size """
|
||||
appWidth = appWidth_temp
|
||||
printLogSpacer(contentSizeToFit=len(header_output))
|
||||
printLogSpacer(footerString, contentSizeToFit=len(header_output))
|
||||
|
||||
|
||||
def showAllConciseHw(deviceList):
|
||||
@@ -1808,12 +1869,21 @@ def showClocks(deviceList):
|
||||
if not rsmi_ret_ok(ret, device, 'get_clk_freq_' + clk_type, True):
|
||||
continue
|
||||
printLog(device, 'Supported %s frequencies on GPU%s' % (clk_type, str(device)), None)
|
||||
for x in range(freq.num_supported):
|
||||
fr = '{:>.0f}Mhz'.format(freq.frequency[x] / 1000000)
|
||||
if x == freq.current:
|
||||
printLog(device, str(x), str(fr) + ' *')
|
||||
else:
|
||||
printLog(device, str(x), str(fr))
|
||||
for i in range(freq.num_supported):
|
||||
freq_string = '{:>.0f}Mhz'.format(freq.frequency[i] / 1000000)
|
||||
if i == freq.current:
|
||||
freq_string += ' *'
|
||||
freq_index = i
|
||||
# Deep Sleep frequency is only supported by some GPUs
|
||||
# It is indicated by letter 'S' instead of the index number
|
||||
if freq.has_deep_sleep:
|
||||
# sleep state
|
||||
if i == 0:
|
||||
freq_index = 'S'
|
||||
# all indices are offset by 1 because Deep Sleep occupies index 0
|
||||
else:
|
||||
freq_index = i - 1
|
||||
printLog(device, str(freq_index), freq_string)
|
||||
printLog(device, '', None)
|
||||
else:
|
||||
logging.debug('{} frequency is unsupported on device[{}]'.format(clk_type, device))
|
||||
@@ -1822,12 +1892,11 @@ def showClocks(deviceList):
|
||||
ret = rocmsmi.rsmi_dev_pci_bandwidth_get(device, byref(bw))
|
||||
if rsmi_ret_ok(ret, device, 'get_PCIe_bandwidth', True):
|
||||
printLog(device, 'Supported %s frequencies on GPU%s' % ('PCIe', str(device)), None)
|
||||
for x in range(bw.transfer_rate.num_supported):
|
||||
fr = '{:>.1f}GT/s x{}'.format(bw.transfer_rate.frequency[x] / 1000000000, bw.lanes[x])
|
||||
if x == bw.transfer_rate.current:
|
||||
printLog(device, str(x), str(fr) + ' *')
|
||||
else:
|
||||
printLog(device, str(x), str(fr))
|
||||
for i in range(bw.transfer_rate.num_supported):
|
||||
freq_string = '{:>.1f}GT/s x{}'.format(bw.transfer_rate.frequency[i] / 1000000000, bw.lanes[i])
|
||||
if i == bw.transfer_rate.current:
|
||||
freq_string += ' *'
|
||||
printLog(device, str(i), str(freq_string))
|
||||
printLog(device, '', None)
|
||||
else:
|
||||
logging.debug('PCIe frequency is unsupported on device [{}]'.format(device))
|
||||
@@ -1857,9 +1926,17 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
|
||||
printLog(device, '%s current clock frequency not found' % (clk_defined), None)
|
||||
continue
|
||||
fr = freq.frequency[levl] / 1000000
|
||||
freq_index = levl
|
||||
if freq.has_deep_sleep:
|
||||
# sleep state
|
||||
if levl == 0:
|
||||
freq_index = 'S'
|
||||
# all indices are offset by 1 because Deep Sleep occupies index 0
|
||||
else:
|
||||
freq_index = levl - 1
|
||||
if concise: # in case function is used for concise output, no need to print.
|
||||
return '{:.0f}Mhz'.format(fr)
|
||||
printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(levl, fr))
|
||||
printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(freq_index, fr))
|
||||
elif not concise:
|
||||
logging.debug('{} clock is unsupported on device[{}]'.format(clk_defined, device))
|
||||
|
||||
@@ -1872,12 +1949,20 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False):
|
||||
if levl >= freq.num_supported:
|
||||
printLog(device, '%s current clock frequency not found' % (clk_type), None)
|
||||
continue
|
||||
freq_index = levl
|
||||
if freq.has_deep_sleep:
|
||||
# sleep state
|
||||
if levl == 0:
|
||||
freq_index = 'S'
|
||||
# all indices are offset by 1 because Deep Sleep occupies index 0
|
||||
else:
|
||||
freq_index = levl - 1
|
||||
fr = freq.frequency[levl] / 1000000
|
||||
if PRINT_JSON:
|
||||
printLog(device, '%s clock speed:' % (clk_type), '(%sMhz)' % (str(fr)[:-2]))
|
||||
printLog(device, '%s clock level:' % (clk_type), levl)
|
||||
printLog(device, '%s clock level:' % (clk_type), freq_index)
|
||||
else:
|
||||
printLog(device, '%s clock level: %s' % (clk_type, levl), '(%sMhz)' % (str(fr)[:-2]))
|
||||
printLog(device, '%s clock level: %s' % (clk_type, freq_index), '(%sMhz)' % (str(fr)[:-2]))
|
||||
elif not concise:
|
||||
logging.debug('{} clock is unsupported on device[{}]'.format(clk_type, device))
|
||||
# pcie clocks
|
||||
@@ -2108,8 +2193,8 @@ def showId(deviceList):
|
||||
"""
|
||||
printLogSpacer(' ID ')
|
||||
for device in deviceList:
|
||||
printLog(device, 'GPU ID', getId(device))
|
||||
printLog(device, 'GPU Rev', getRev(device))
|
||||
printLog(device, 'Device ID', getId(device))
|
||||
printLog(device, 'Device Rev', getRev(device))
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
@@ -2336,23 +2421,25 @@ def showPids(verbose):
|
||||
|
||||
|
||||
def showPower(deviceList):
|
||||
""" Display current Average Graphics Package Power Consumption for a list of devices
|
||||
""" Display Current (also known as instant) Socket or Average
|
||||
Graphics Package Power Consumption for a list of devices
|
||||
|
||||
@param deviceList: List of DRM devices (can be a single-item list)
|
||||
"""
|
||||
secondaryPresent=False
|
||||
printLogSpacer(' Power Consumption ')
|
||||
for device in deviceList:
|
||||
if checkIfSecondaryDie(device):
|
||||
if str(getCurrentSocketPower(device, True)) != 'N/A':
|
||||
printLog(device, 'Current Socket Graphics Package Power (W)', getCurrentSocketPower(device))
|
||||
elif checkIfSecondaryDie(device):
|
||||
printLog(device, 'Average Graphics Package Power (W)', "N/A (Secondary die)")
|
||||
secondaryPresent=True
|
||||
elif str(getPower(device)) != '0.0':
|
||||
printLog(device, 'Average Graphics Package Power (W)', getPower(device))
|
||||
elif str(getAvgPower(device)) != '0.0':
|
||||
printLog(device, 'Average Graphics Package Power (W)', getAvgPower(device))
|
||||
else:
|
||||
printErrLog(device, 'Unable to get Average Graphics Package Power Consumption')
|
||||
printErrLog(device, 'Unable to get Average or Current Socket Graphics Package Power Consumption')
|
||||
if secondaryPresent:
|
||||
printLog(None, "\n\t\tPrimary die (usually one above or below the secondary) shows total (primary + secondary) socket power information", None)
|
||||
|
||||
printLogSpacer()
|
||||
|
||||
|
||||
@@ -2848,13 +2935,8 @@ def getGraphColor(percentage):
|
||||
|
||||
def showTempGraph(deviceList):
|
||||
deviceList.sort()
|
||||
temp_type = '(' + temp_type_lst[0] + ')'
|
||||
if len(deviceList) >= 1:
|
||||
(temp_type, _) = findFirstAvailableTemp(deviceList[0])
|
||||
printLogSpacer(' Temperature Graph ' + temp_type + ' ')
|
||||
temp_type = temp_type.lower()
|
||||
temp_type = temp_type.replace('(', '')
|
||||
temp_type = temp_type.replace(')', '')
|
||||
temp_type = getTemperatureLabel(deviceList)
|
||||
printLogSpacer(' Temperature Graph ' + temp_type.capitalize() + ' ')
|
||||
# Start a thread for constantly printing
|
||||
try:
|
||||
# Create a thread (call print function, devices, delay in ms)
|
||||
@@ -3523,9 +3605,14 @@ def save(deviceList, savefilepath):
|
||||
|
||||
|
||||
# The code below is for when this script is run as an executable instead of when imported as a module
|
||||
def isConciseInfoRequested(args):
|
||||
return len(sys.argv) == 1 or \
|
||||
len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \
|
||||
len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv))
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description=f'AMD ROCm System Management Interface | ROCM-SMI version: {__version__}',
|
||||
description='AMD ROCm System Management Interface | ROCM-SMI version: %s' % __version__,
|
||||
formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=90, width=120))
|
||||
groupDev = parser.add_argument_group()
|
||||
groupDisplayOpt = parser.add_argument_group('Display Options')
|
||||
@@ -3545,7 +3632,7 @@ if __name__ == '__main__':
|
||||
groupDisplayOpt.add_argument('--showhw', help='Show Hardware details', action='store_true')
|
||||
groupDisplayOpt.add_argument('-a', '--showallinfo', help='Show Temperature, Fan and Clock values',
|
||||
action='store_true')
|
||||
groupDisplayTop.add_argument('-i', '--showid', help='Show GPU ID', action='store_true')
|
||||
groupDisplayTop.add_argument('-i', '--showid', help='Show DEVICE ID', action='store_true')
|
||||
groupDisplayTop.add_argument('-v', '--showvbios', help='Show VBIOS version', action='store_true')
|
||||
groupDisplayTop.add_argument('-e', '--showevents', help='Show event list', metavar='EVENT', type=str, nargs='*')
|
||||
groupDisplayTop.add_argument('--showdriverversion', help='Show kernel driver version', action='store_true')
|
||||
@@ -3731,7 +3818,8 @@ if __name__ == '__main__':
|
||||
|
||||
if not PRINT_JSON:
|
||||
print('\n')
|
||||
printLogSpacer(headerString)
|
||||
if not isConciseInfoRequested(args):
|
||||
printLogSpacer(headerString)
|
||||
|
||||
if args.showallinfo:
|
||||
args.list = True
|
||||
@@ -3785,9 +3873,7 @@ if __name__ == '__main__':
|
||||
if not checkAmdGpus(deviceList):
|
||||
logging.warning('No AMD GPUs specified')
|
||||
|
||||
if len(sys.argv) == 1 or \
|
||||
len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \
|
||||
len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv)):
|
||||
if isConciseInfoRequested(args):
|
||||
showAllConcise(deviceList)
|
||||
if args.showhw:
|
||||
showAllConciseHw(deviceList)
|
||||
@@ -3994,7 +4080,8 @@ if __name__ == '__main__':
|
||||
devCsv = formatCsv(deviceList)
|
||||
print(devCsv)
|
||||
|
||||
printLogSpacer(footerString)
|
||||
if not isConciseInfoRequested(args):
|
||||
printLogSpacer(footerString)
|
||||
|
||||
rsmi_ret_ok(rocmsmi.rsmi_shut_down())
|
||||
exit(RETCODE)
|
||||
|
||||
@@ -59,7 +59,7 @@ gpu_id = c_uint32(0)
|
||||
|
||||
|
||||
# Policy enums
|
||||
RSMI_MAX_NUM_FREQUENCIES = 32
|
||||
RSMI_MAX_NUM_FREQUENCIES = 33
|
||||
RSMI_MAX_FAN_SPEED = 255
|
||||
RSMI_NUM_VOLTAGE_CURVE_POINTS = 3
|
||||
|
||||
@@ -492,7 +492,8 @@ rsmi_power_profile_status = rsmi_power_profile_status_t
|
||||
|
||||
|
||||
class rsmi_frequencies_t(Structure):
|
||||
_fields_ = [('num_supported', c_int32),
|
||||
_fields_ = [('has_deep_sleep', c_bool),
|
||||
('num_supported', c_int32),
|
||||
('current', c_uint32),
|
||||
('frequency', c_uint64 * RSMI_MAX_NUM_FREQUENCIES)]
|
||||
|
||||
@@ -654,3 +655,8 @@ rsmi_nps_mode_type = rsmi_nps_mode_type_t
|
||||
# nps_mode_type_l[rsmi_nps_mode_type_t.RSMI_MEMORY_PARTITION_NPS2]
|
||||
# will return string 'NPS2'
|
||||
nps_mode_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8']
|
||||
|
||||
class rsmi_power_label(str, Enum):
|
||||
AVG_POWER = '(Avg)'
|
||||
CURRENT_SOCKET_POWER = '(Socket)'
|
||||
|
||||
|
||||
+122
-26
@@ -77,7 +77,6 @@
|
||||
#include "rocm_smi/rocm_smi64Config.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
using namespace amd::smi;
|
||||
|
||||
static const uint32_t kMaxOverdriveLevel = 20;
|
||||
@@ -147,14 +146,21 @@ static uint64_t freq_string_to_int(const std::vector<std::string> &freq_lines,
|
||||
|
||||
std::istringstream fs(freq_lines[i]);
|
||||
|
||||
uint32_t ind;
|
||||
char junk_ch;
|
||||
int ind;
|
||||
float freq;
|
||||
std::string junk;
|
||||
std::string junk_str;
|
||||
std::string units_str;
|
||||
std::string star_str;
|
||||
|
||||
fs >> ind;
|
||||
fs >> junk; // colon
|
||||
if (fs.peek() == 'S') {
|
||||
// Deep Sleep frequency is only supported by some GPUs
|
||||
fs >> junk_ch;
|
||||
} else {
|
||||
// All other frequency indices are numbers
|
||||
fs >> ind;
|
||||
}
|
||||
fs >> junk_str; // colon
|
||||
fs >> freq;
|
||||
fs >> units_str;
|
||||
fs >> star_str;
|
||||
@@ -1127,9 +1133,14 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_
|
||||
}
|
||||
|
||||
f->num_supported = static_cast<uint32_t>(val_vec.size());
|
||||
bool current = false;
|
||||
f->current = RSMI_MAX_NUM_FREQUENCIES + 1; // init to an invalid value
|
||||
|
||||
// Deep Sleep frequency is only supported by some GPUs
|
||||
// It is indicated by letter 'S' instead of the index number
|
||||
f->has_deep_sleep = (val_vec[0][0] == 'S');
|
||||
|
||||
bool current = false;
|
||||
|
||||
for (uint32_t i = 0; i < f->num_supported; ++i) {
|
||||
f->frequency[i] = freq_string_to_int(val_vec, ¤t, lanes, i);
|
||||
|
||||
@@ -1156,9 +1167,9 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_
|
||||
sysvalue += " Previous Value";
|
||||
sysvalue += ' ' + std::to_string(f->frequency[f->current]);
|
||||
DEBUG_LOG("More than one current clock. ", sysvalue);
|
||||
}
|
||||
else
|
||||
} else {
|
||||
f->current = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1309,6 +1320,11 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
|
||||
return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
}
|
||||
|
||||
// find last_item but skip empty lines
|
||||
int last_item = val_vec.size()-1;
|
||||
while (val_vec[last_item].empty() || val_vec[last_item][0] == 0)
|
||||
last_item--;
|
||||
|
||||
p->curr_sclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
|
||||
nullptr, kOD_SCLK_label_array_index + 1);
|
||||
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
|
||||
@@ -1322,16 +1338,18 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
|
||||
} else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") {
|
||||
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
|
||||
nullptr, kOD_MCLK_label_array_index + 1);
|
||||
// the upper memory frequency is the last
|
||||
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
|
||||
nullptr, kOD_MCLK_label_array_index + 4);
|
||||
nullptr, last_item);
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
} else if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") {
|
||||
p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
|
||||
nullptr, kOD_SCLK_label_array_index + 3);
|
||||
p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr,
|
||||
nullptr, kOD_MCLK_label_array_index + 2);
|
||||
// the upper memory frequency is the last
|
||||
p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr,
|
||||
nullptr, kOD_MCLK_label_array_index + 5);
|
||||
nullptr, last_item);
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
} else {
|
||||
return RSMI_STATUS_NOT_YET_IMPLEMENTED;
|
||||
@@ -1708,6 +1726,8 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block,
|
||||
{ RSMI_FW_BLOCK_ME, amd::smi::kDevFwVersionMe },
|
||||
{ RSMI_FW_BLOCK_MEC, amd::smi::kDevFwVersionMec },
|
||||
{ RSMI_FW_BLOCK_MEC2, amd::smi::kDevFwVersionMec2 },
|
||||
{ RSMI_FW_BLOCK_MES, amd::smi::kDevFwVersionMes },
|
||||
{ RSMI_FW_BLOCK_MES_KIQ, amd::smi::kDevFwVersionMesKiq },
|
||||
{ RSMI_FW_BLOCK_PFP, amd::smi::kDevFwVersionPfp },
|
||||
{ RSMI_FW_BLOCK_RLC, amd::smi::kDevFwVersionRlc },
|
||||
{ RSMI_FW_BLOCK_RLC_SRLC, amd::smi::kDevFwVersionRlcSrlc },
|
||||
@@ -2485,21 +2505,22 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
|
||||
amd::smi::MonitorTypes mon_type = amd::smi::kMonInvalid;
|
||||
uint16_t val_ui16;
|
||||
|
||||
static const std::map<rsmi_temperature_metric_t, amd::smi::MonitorTypes> kMetricTypeMap = {
|
||||
{ RSMI_TEMP_CURRENT, amd::smi::kMonTemp },
|
||||
{ RSMI_TEMP_MAX, amd::smi::kMonTempMax },
|
||||
{ RSMI_TEMP_MIN, amd::smi::kMonTempMin },
|
||||
{ RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst },
|
||||
{ RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst },
|
||||
{ RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical },
|
||||
{ RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst },
|
||||
{ RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency },
|
||||
{ RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst },
|
||||
{ RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin },
|
||||
{ RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst },
|
||||
{ RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset },
|
||||
{ RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest },
|
||||
{ RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest },
|
||||
static const std::map<rsmi_temperature_metric_t, amd::smi::MonitorTypes>
|
||||
kMetricTypeMap = {
|
||||
{ RSMI_TEMP_CURRENT, amd::smi::kMonTemp },
|
||||
{ RSMI_TEMP_MAX, amd::smi::kMonTempMax },
|
||||
{ RSMI_TEMP_MIN, amd::smi::kMonTempMin },
|
||||
{ RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst },
|
||||
{ RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst },
|
||||
{ RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical },
|
||||
{ RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst },
|
||||
{ RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency },
|
||||
{ RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst },
|
||||
{ RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin },
|
||||
{ RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst },
|
||||
{ RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset },
|
||||
{ RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest },
|
||||
{ RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest },
|
||||
};
|
||||
|
||||
const auto mon_type_it = kMetricTypeMap.find(metric);
|
||||
@@ -2584,7 +2605,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type,
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
*temperature = static_cast<int64_t>(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE;
|
||||
*temperature =
|
||||
static_cast<int64_t>(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE;
|
||||
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= end ======= "
|
||||
<< " | Success "
|
||||
@@ -2919,6 +2941,80 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) {
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
rsmi_status_t rsmiReturn = RSMI_STATUS_NOT_SUPPORTED;
|
||||
std::string val_str;
|
||||
uint32_t sensor_ind = 1; // socket_power sysfs files have 1-based indices
|
||||
MonitorTypes mon_type = amd::smi::kMonPowerInput;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind="
|
||||
<< std::to_string(dv_ind);
|
||||
LOG_TRACE(ss);
|
||||
if (socket_power == nullptr) {
|
||||
rsmiReturn = RSMI_STATUS_INVALID_ARGS;
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Cause: socket_power was a null ptr reference"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
CHK_SUPPORT_SUBVAR_ONLY(socket_power, sensor_ind)
|
||||
DEVICE_MUTEX
|
||||
|
||||
if (dev->monitor() == nullptr) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Cause: hwmon monitor was a null ptr reference"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return rsmiReturn;
|
||||
}
|
||||
|
||||
int ret = dev->monitor()->readMonitor(amd::smi::kMonPowerLabel,
|
||||
sensor_ind, &val_str);
|
||||
if (ret || val_str != "PPT" || val_str.size() != 3) {
|
||||
if (ret != 0) {
|
||||
rsmiReturn = amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Cause: readMonitor() returned an error status"
|
||||
<< " or Socket Power label did not show PPT or size of label data was"
|
||||
<< " unexpected"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return rsmiReturn;
|
||||
}
|
||||
rsmiReturn = get_dev_mon_value(mon_type, dv_ind, sensor_ind,
|
||||
socket_power);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: " << monitorTypesToString.at(mon_type)
|
||||
<< " | Data: " << *socket_power
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(rsmiReturn) << " |";
|
||||
LOG_TRACE(ss);
|
||||
return rsmiReturn;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power,
|
||||
float *counter_resolution, uint64_t *timestamp) {
|
||||
|
||||
@@ -68,8 +68,6 @@
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -141,6 +139,8 @@ static const char *kDevFwVersionMcFName = "fw_version/mc_fw_version";
|
||||
static const char *kDevFwVersionMeFName = "fw_version/me_fw_version";
|
||||
static const char *kDevFwVersionMecFName = "fw_version/mec_fw_version";
|
||||
static const char *kDevFwVersionMec2FName = "fw_version/mec2_fw_version";
|
||||
static const char *kDevFwVersionMesFName = "fw_version/mes_fw_version";
|
||||
static const char *kDevFwVersionMesKiqFName = "fw_version/mes_kiq_fw_version";
|
||||
static const char *kDevFwVersionPfpFName = "fw_version/pfp_fw_version";
|
||||
static const char *kDevFwVersionRlcFName = "fw_version/rlc_fw_version";
|
||||
static const char *kDevFwVersionRlcSrlcFName = "fw_version/rlc_srlc_fw_version";
|
||||
@@ -284,6 +284,8 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevFwVersionMe, kDevFwVersionMeFName},
|
||||
{kDevFwVersionMec, kDevFwVersionMecFName},
|
||||
{kDevFwVersionMec2, kDevFwVersionMec2FName},
|
||||
{kDevFwVersionMes, kDevFwVersionMesFName},
|
||||
{kDevFwVersionMesKiq, kDevFwVersionMesKiqFName},
|
||||
{kDevFwVersionPfp, kDevFwVersionPfpFName},
|
||||
{kDevFwVersionRlc, kDevFwVersionRlcFName},
|
||||
{kDevFwVersionRlcSrlc, kDevFwVersionRlcSrlcFName},
|
||||
@@ -347,6 +349,8 @@ static std::map<DevInfoTypes, uint8_t> kDevInfoVarTypeToRSMIVariant = {
|
||||
{kDevFwVersionMe, RSMI_FW_BLOCK_ME},
|
||||
{kDevFwVersionMec, RSMI_FW_BLOCK_MEC},
|
||||
{kDevFwVersionMec2, RSMI_FW_BLOCK_MEC2},
|
||||
{kDevFwVersionMes, RSMI_FW_BLOCK_MES},
|
||||
{kDevFwVersionMesKiq, RSMI_FW_BLOCK_MES_KIQ},
|
||||
{kDevFwVersionPfp, RSMI_FW_BLOCK_PFP},
|
||||
{kDevFwVersionRlc, RSMI_FW_BLOCK_RLC},
|
||||
{kDevFwVersionRlcSrlc, RSMI_FW_BLOCK_RLC_SRLC},
|
||||
@@ -482,6 +486,8 @@ static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
kDevFwVersionMe,
|
||||
kDevFwVersionMec,
|
||||
kDevFwVersionMec2,
|
||||
kDevFwVersionMes,
|
||||
kDevFwVersionMesKiq,
|
||||
kDevFwVersionPfp,
|
||||
kDevFwVersionRlc,
|
||||
kDevFwVersionRlcSrlc,
|
||||
@@ -962,6 +968,8 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) {
|
||||
case kDevFwVersionMe:
|
||||
case kDevFwVersionMec:
|
||||
case kDevFwVersionMec2:
|
||||
case kDevFwVersionMes:
|
||||
case kDevFwVersionMesKiq:
|
||||
case kDevFwVersionPfp:
|
||||
case kDevFwVersionRlc:
|
||||
case kDevFwVersionRlcSrlc:
|
||||
|
||||
@@ -61,7 +61,6 @@
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
using namespace amd::smi;
|
||||
|
||||
#define TRY try {
|
||||
|
||||
@@ -71,9 +71,8 @@
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
Logger* Logger::m_Instance = nullptr;
|
||||
ROCmLogging::Logger *ROCmLogging::Logger::m_Instance = nullptr;
|
||||
|
||||
// Log file name
|
||||
// WARNING: File name should be changed here and
|
||||
@@ -81,39 +80,39 @@ Logger* Logger::m_Instance = nullptr;
|
||||
// in one place will cause a mismatch in these scripts,
|
||||
// files may not have proper permissions, and logrotate
|
||||
// would not function properly.
|
||||
const std::string logPath = "/var/log/amd_smi_lib/";
|
||||
const std::string logBaseFName = "AMD-SMI-lib";
|
||||
const std::string logExtension = ".log";
|
||||
const std::string logFileName = logPath + logBaseFName + logExtension;
|
||||
#define LOGPATH "/var/log/amd_smi_lib/"
|
||||
#define LOGBASE_FNAME "AMD-SMI-lib"
|
||||
#define LOGEXTENSION ".log"
|
||||
const char *logFileName = LOGPATH LOGBASE_FNAME LOGEXTENSION;
|
||||
|
||||
Logger::Logger() {
|
||||
ROCmLogging::Logger::Logger() {
|
||||
initialize_resources();
|
||||
}
|
||||
|
||||
Logger::~Logger() {
|
||||
ROCmLogging::Logger::~Logger() {
|
||||
if (m_loggingIsOn) {
|
||||
destroy_resources();
|
||||
}
|
||||
}
|
||||
|
||||
Logger* Logger::getInstance() throw() {
|
||||
ROCmLogging::Logger* ROCmLogging::Logger::getInstance() throw() {
|
||||
if (m_Instance == nullptr) {
|
||||
m_Instance = new Logger();
|
||||
m_Instance = new ROCmLogging::Logger();
|
||||
}
|
||||
return m_Instance;
|
||||
}
|
||||
|
||||
void Logger::lock() {
|
||||
void ROCmLogging::Logger::lock() {
|
||||
m_Lock.lock();
|
||||
}
|
||||
|
||||
void Logger::unlock() {
|
||||
void ROCmLogging::Logger::unlock() {
|
||||
m_Lock.unlock();
|
||||
}
|
||||
|
||||
void Logger::logIntoFile(std::string& data) {
|
||||
void ROCmLogging::Logger::logIntoFile(std::string& data) {
|
||||
lock();
|
||||
if(!m_File.is_open()) {
|
||||
if (!m_File.is_open()) {
|
||||
initialize_resources();
|
||||
if (!m_File.is_open()) {
|
||||
std::cout << "WARNING: re-initializing resources was unsuccessful."
|
||||
@@ -127,24 +126,24 @@ void Logger::logIntoFile(std::string& data) {
|
||||
unlock();
|
||||
}
|
||||
|
||||
void Logger::logOnConsole(std::string& data) {
|
||||
void ROCmLogging::Logger::logOnConsole(std::string& data) {
|
||||
std::cout << getCurrentTime() << " " << data << std::endl;
|
||||
}
|
||||
|
||||
// Returns: In string format, YY-MM-DD HH:MM:SS.microseconds
|
||||
std::string Logger::getCurrentTime(void) {
|
||||
using namespace std::chrono;
|
||||
std::string ROCmLogging::Logger::getCurrentTime(void) {
|
||||
std::string currentTime;
|
||||
|
||||
// get current time
|
||||
auto now = system_clock::now();
|
||||
auto now = std::chrono::system_clock::now();
|
||||
|
||||
// get number of milliseconds for the current second
|
||||
// (remainder after division into seconds)
|
||||
auto ms = duration_cast<microseconds>(now.time_since_epoch()) % 1000000;
|
||||
auto ms = std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch()) % 1000000;
|
||||
|
||||
// convert to std::time_t in order to convert to std::tm (broken time)
|
||||
auto timer = system_clock::to_time_t(now);
|
||||
auto timer = std::chrono::system_clock::to_time_t(now);
|
||||
|
||||
// convert to broken time
|
||||
std::tm bt = *std::localtime(&timer);
|
||||
@@ -159,7 +158,7 @@ std::string Logger::getCurrentTime(void) {
|
||||
}
|
||||
|
||||
// Interface for Error Log
|
||||
void Logger::error(const char* text) throw() {
|
||||
void ROCmLogging::Logger::error(const char* text) throw() {
|
||||
// By default, logging is disabled
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -182,18 +181,18 @@ void Logger::error(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::error(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::error(std::string& text) throw() {
|
||||
error(text.data());
|
||||
}
|
||||
|
||||
void Logger::error(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::error(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
error(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Alarm Log
|
||||
void Logger::alarm(const char* text) throw() {
|
||||
void ROCmLogging::Logger::alarm(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -216,18 +215,18 @@ void Logger::alarm(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::alarm(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::alarm(std::string& text) throw() {
|
||||
alarm(text.data());
|
||||
}
|
||||
|
||||
void Logger::alarm(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::alarm(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
alarm(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Always Log
|
||||
void Logger::always(const char* text) throw() {
|
||||
void ROCmLogging::Logger::always(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -250,18 +249,18 @@ void Logger::always(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::always(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::always(std::string& text) throw() {
|
||||
always(text.data());
|
||||
}
|
||||
|
||||
void Logger::always(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::always(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
always(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Buffer Log
|
||||
void Logger::buffer(const char* text) throw() {
|
||||
void ROCmLogging::Logger::buffer(const char* text) throw() {
|
||||
// Buffer is the special case. So don't add log level
|
||||
// and timestamp in the buffer message. Just log the raw bytes.
|
||||
if ((m_LogType == FILE_LOG) && (m_LogLevel >= LOG_LEVEL_BUFFER)) {
|
||||
@@ -284,18 +283,18 @@ void Logger::buffer(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::buffer(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::buffer(std::string& text) throw() {
|
||||
buffer(text.data());
|
||||
}
|
||||
|
||||
void Logger::buffer(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::buffer(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
buffer(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Info Log
|
||||
void Logger::info(const char* text) throw() {
|
||||
void ROCmLogging::Logger::info(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -318,18 +317,18 @@ void Logger::info(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::info(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::info(std::string& text) throw() {
|
||||
info(text.data());
|
||||
}
|
||||
|
||||
void Logger::info(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::info(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
info(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Trace Log
|
||||
void Logger::trace(const char* text) throw() {
|
||||
void ROCmLogging::Logger::trace(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -352,18 +351,18 @@ void Logger::trace(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::trace(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::trace(std::string& text) throw() {
|
||||
trace(text.data());
|
||||
}
|
||||
|
||||
void Logger::trace(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::trace(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
trace(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interface for Debug Log
|
||||
void Logger::debug(const char* text) throw() {
|
||||
void ROCmLogging::Logger::debug(const char* text) throw() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -386,51 +385,53 @@ void Logger::debug(const char* text) throw() {
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::debug(std::string& text) throw() {
|
||||
void ROCmLogging::Logger::debug(std::string& text) throw() {
|
||||
debug(text.data());
|
||||
}
|
||||
|
||||
void Logger::debug(std::ostringstream& stream) throw() {
|
||||
void ROCmLogging::Logger::debug(std::ostringstream& stream) throw() {
|
||||
std::string text = stream.str();
|
||||
debug(text.data());
|
||||
stream.str("");
|
||||
}
|
||||
|
||||
// Interfaces to control log levels
|
||||
void Logger::updateLogLevel(LogLevel logLevel) {
|
||||
void ROCmLogging::Logger::updateLogLevel(LogLevel logLevel) {
|
||||
m_LogLevel = logLevel;
|
||||
}
|
||||
|
||||
void Logger::enableAllLogLevels() {
|
||||
void ROCmLogging::Logger::enableAllLogLevels() {
|
||||
m_LogLevel = ENABLE_LOG;
|
||||
}
|
||||
|
||||
// Disable all log levels, except error and alarm
|
||||
void Logger::disableLog() {
|
||||
void ROCmLogging::Logger::disableLog() {
|
||||
m_LogLevel = DISABLE_LOG;
|
||||
}
|
||||
|
||||
// Interfaces to control log Types
|
||||
void Logger::updateLogType(LogType logType) {
|
||||
void ROCmLogging::Logger::updateLogType(LogType logType) {
|
||||
m_LogType = logType;
|
||||
}
|
||||
|
||||
void Logger::enableConsoleLogging() {
|
||||
void ROCmLogging::Logger::enableConsoleLogging() {
|
||||
m_LogType = CONSOLE;
|
||||
}
|
||||
|
||||
void Logger::enableFileLogging() {
|
||||
void ROCmLogging::Logger::enableFileLogging() {
|
||||
m_LogType = FILE_LOG;
|
||||
}
|
||||
|
||||
// Returns a string of details on current log settings
|
||||
std::string Logger::getLogSettings() {
|
||||
std::string ROCmLogging::Logger::getLogSettings() {
|
||||
std::string logSettings;
|
||||
|
||||
if (m_File.is_open()) {
|
||||
logSettings += "OpenStatus = File (" + logFileName + ") is open";
|
||||
logSettings += "OpenStatus = File (" + std::string(logFileName)
|
||||
+ ") is open";
|
||||
} else {
|
||||
logSettings += "OpenStatus = File (" + logFileName + ") is not open";
|
||||
logSettings += "OpenStatus = File (" + std::string(logFileName)
|
||||
+ ") is not open";
|
||||
}
|
||||
logSettings += ", ";
|
||||
|
||||
@@ -480,11 +481,11 @@ std::string Logger::getLogSettings() {
|
||||
|
||||
// Returns current reported enabled logging state. State is controlled by
|
||||
// user's environment variable RSMI_LOGGING.
|
||||
bool Logger::isLoggerEnabled() {
|
||||
bool ROCmLogging::Logger::isLoggerEnabled() {
|
||||
return m_loggingIsOn;
|
||||
}
|
||||
|
||||
void Logger::initialize_resources() {
|
||||
void ROCmLogging::Logger::initialize_resources() {
|
||||
// By default, logging is disabled (ie. no RSMI_LOGGING)
|
||||
// The check below allows us to toggle logging through RSMI_LOGGING
|
||||
// set or unset
|
||||
@@ -492,7 +493,7 @@ void Logger::initialize_resources() {
|
||||
if (!m_loggingIsOn) {
|
||||
return;
|
||||
}
|
||||
m_File.open(logFileName.c_str(), std::ios::out | std::ios::app);
|
||||
m_File.open(logFileName, std::ios::out | std::ios::app);
|
||||
m_LogLevel = LOG_LEVEL_TRACE;
|
||||
// RSMI_LOGGING = 1, output to logs only
|
||||
// RSMI_LOGGING = 2, output to console only
|
||||
@@ -521,9 +522,9 @@ void Logger::initialize_resources() {
|
||||
if (m_File.fail()) {
|
||||
std::cout << "WARNING: Failed opening log file." << std::endl;
|
||||
}
|
||||
chmod(logFileName.c_str(), S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH);
|
||||
chmod(logFileName, S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH);
|
||||
}
|
||||
|
||||
void Logger::destroy_resources() {
|
||||
void ROCmLogging::Logger::destroy_resources() {
|
||||
m_File.close();
|
||||
}
|
||||
|
||||
@@ -68,7 +68,6 @@
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
static const char *kPathDRMRoot = "/sys/class/drm";
|
||||
static const char *kPathHWMonRoot = "/sys/class/hwmon";
|
||||
@@ -129,6 +128,8 @@ amd::smi::RocmSMI::devInfoTypesStrings = {
|
||||
{amd::smi::kDevFwVersionMe, amdSMI + "kDevFwVersionMe"},
|
||||
{amd::smi::kDevFwVersionMec, amdSMI + "kDevFwVersionMec"},
|
||||
{amd::smi::kDevFwVersionMec2, amdSMI + "kDevFwVersionMec2"},
|
||||
{amd::smi::kDevFwVersionMes, amdSMI + "kDevFwVersionMes"},
|
||||
{amd::smi::kDevFwVersionMesKiq, amdSMI + "kDevFwVersionMesKiq"},
|
||||
{amd::smi::kDevFwVersionPfp, amdSMI + "kDevFwVersionPfp"},
|
||||
{amd::smi::kDevFwVersionRlc, amdSMI + "kDevFwVersionRlc"},
|
||||
{amd::smi::kDevFwVersionRlcSrlc, amdSMI + "kDevFwVersionRlcSrlc"},
|
||||
@@ -313,12 +314,12 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
int i_ret;
|
||||
|
||||
LOG_ALWAYS("=============== ROCM SMI initialize ================");
|
||||
Logger::getInstance()->enableAllLogLevels();
|
||||
ROCmLogging::Logger::getInstance()->enableAllLogLevels();
|
||||
// Leaving below to allow developers to check current log settings
|
||||
// std::string logSettings = Logger::getInstance()->getLogSettings();
|
||||
// std::cout << "Current log settings:\n" << logSettings << std::endl;
|
||||
|
||||
if (Logger::getInstance()->isLoggerEnabled()) {
|
||||
if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) {
|
||||
logSystemDetails();
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* Copyright (c) 2017-2023, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
@@ -58,8 +58,6 @@
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -80,6 +78,8 @@ static const char *kMonPowerCapName = "power#_cap";
|
||||
static const char *kMonPowerCapMaxName = "power#_cap_max";
|
||||
static const char *kMonPowerCapMinName = "power#_cap_min";
|
||||
static const char *kMonPowerAveName = "power#_average";
|
||||
static const char *kMonPowerInputName = "power#_input";
|
||||
static const char *kMonPowerLabelName = "power#_label";
|
||||
static const char *kMonTempMaxName = "temp#_max";
|
||||
static const char *kMonTempMinName = "temp#_min";
|
||||
static const char *kMonTempMaxHystName = "temp#_max_hyst";
|
||||
@@ -135,6 +135,8 @@ static const std::map<MonitorTypes, const char *> kMonitorNameMap = {
|
||||
{kMonPowerCapMax, kMonPowerCapMaxName},
|
||||
{kMonPowerCapMin, kMonPowerCapMinName},
|
||||
{kMonPowerAve, kMonPowerAveName},
|
||||
{kMonPowerInput, kMonPowerInputName},
|
||||
{kMonPowerLabel, kMonPowerLabelName},
|
||||
{kMonTempMax, kMonTempMaxName},
|
||||
{kMonTempMin, kMonTempMinName},
|
||||
{kMonTempMaxHyst, kMonTempMaxHystName},
|
||||
@@ -202,7 +204,8 @@ static const std::map<const char *, monitor_depends_t> kMonFuncDependsMap = {
|
||||
.variants = {kMonInvalid},
|
||||
}
|
||||
},
|
||||
{"rsmi_dev_power_cap_default_get", { .mandatory_depends = {kMonPowerCapDefaultName},
|
||||
{"rsmi_dev_power_cap_default_get", { .mandatory_depends =
|
||||
{kMonPowerCapDefaultName},
|
||||
.variants = {kMonInvalid},
|
||||
}
|
||||
},
|
||||
@@ -616,7 +619,7 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) {
|
||||
supported_monitors = intersect;
|
||||
}
|
||||
if (!supported_monitors.empty()) {
|
||||
for (unsigned long & supported_monitor : supported_monitors) {
|
||||
for (uint64_t &supported_monitor : supported_monitors) {
|
||||
if (m_type == eDefaultMonitor) {
|
||||
assert(supported_monitor > 0);
|
||||
supported_monitor |=
|
||||
|
||||
@@ -70,7 +70,6 @@
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi_logger.h"
|
||||
|
||||
using namespace ROCmLogging;
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
@@ -111,6 +111,7 @@ void TestPowerRead::Run(void) {
|
||||
std::cout << "\t**Power Cap Range: " << info.min_power_cap << " to " <<
|
||||
info.max_power_cap << " uW" << std::endl;
|
||||
}
|
||||
// TODO: Add current_socket_power tests
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,6 +56,8 @@ static const std::map<amdsmi_fw_block_t, const char *> kDevFWNameMap = {
|
||||
{FW_ID_CP_ME, "me"},
|
||||
{FW_ID_CP_MEC1, "mec1"},
|
||||
{FW_ID_CP_MEC2, "mec2"},
|
||||
{FW_ID_CP_MES, "mes"},
|
||||
{FW_ID_MES_KIQ, "mes_kiq"}, // TODO: double check
|
||||
{FW_ID_CP_PFP, "pfp"},
|
||||
{FW_ID_RLC, "rlc"},
|
||||
{FW_ID_RLC_SRLG, "rlc_srlg"},
|
||||
|
||||
Reference in New Issue
Block a user