diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index b2bd4c82fb..124746421b 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -290,6 +290,7 @@ typedef enum { FW_ID_SDMA_TH0, FW_ID_SDMA_TH1, FW_ID_CP_MES, + FW_ID_MES_KIQ, FW_ID_MES_STACK, FW_ID_MES_THREAD1, FW_ID_MES_THREAD1_STACK, diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index 6d16985071..360780bd3e 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -444,38 +444,39 @@ amdsmi_fw_block_t__enumvalues = { 44: 'FW_ID_SDMA_TH0', 45: 'FW_ID_SDMA_TH1', 46: 'FW_ID_CP_MES', - 47: 'FW_ID_MES_STACK', - 48: 'FW_ID_MES_THREAD1', - 49: 'FW_ID_MES_THREAD1_STACK', - 50: 'FW_ID_RLX6', - 51: 'FW_ID_RLX6_DRAM_BOOT', - 52: 'FW_ID_RS64_ME', - 53: 'FW_ID_RS64_ME_P0_DATA', - 54: 'FW_ID_RS64_ME_P1_DATA', - 55: 'FW_ID_RS64_PFP', - 56: 'FW_ID_RS64_PFP_P0_DATA', - 57: 'FW_ID_RS64_PFP_P1_DATA', - 58: 'FW_ID_RS64_MEC', - 59: 'FW_ID_RS64_MEC_P0_DATA', - 60: 'FW_ID_RS64_MEC_P1_DATA', - 61: 'FW_ID_RS64_MEC_P2_DATA', - 62: 'FW_ID_RS64_MEC_P3_DATA', - 63: 'FW_ID_PPTABLE', - 64: 'FW_ID_PSP_SOC', - 65: 'FW_ID_PSP_DBG', - 66: 'FW_ID_PSP_INTF', - 67: 'FW_ID_RLX6_CORE1', - 68: 'FW_ID_RLX6_DRAM_BOOT_CORE1', - 69: 'FW_ID_RLCV_LX7', - 70: 'FW_ID_RLC_SAVE_RESTORE_LIST', - 71: 'FW_ID_ASD', - 72: 'FW_ID_TA_RAS', - 73: 'FW_ID_XGMI', - 74: 'FW_ID_RLC_SRLG', - 75: 'FW_ID_RLC_SRLS', - 76: 'FW_ID_SMC', - 77: 'FW_ID_DMCU', - 78: 'FW_ID__MAX', + 47: 'FW_ID_MES_KIQ', + 48: 'FW_ID_MES_STACK', + 49: 'FW_ID_MES_THREAD1', + 50: 'FW_ID_MES_THREAD1_STACK', + 51: 'FW_ID_RLX6', + 52: 'FW_ID_RLX6_DRAM_BOOT', + 53: 'FW_ID_RS64_ME', + 54: 'FW_ID_RS64_ME_P0_DATA', + 55: 'FW_ID_RS64_ME_P1_DATA', + 56: 'FW_ID_RS64_PFP', + 57: 'FW_ID_RS64_PFP_P0_DATA', + 58: 'FW_ID_RS64_PFP_P1_DATA', + 59: 'FW_ID_RS64_MEC', + 60: 'FW_ID_RS64_MEC_P0_DATA', + 61: 'FW_ID_RS64_MEC_P1_DATA', + 62: 'FW_ID_RS64_MEC_P2_DATA', + 63: 'FW_ID_RS64_MEC_P3_DATA', + 64: 'FW_ID_PPTABLE', + 65: 'FW_ID_PSP_SOC', + 66: 'FW_ID_PSP_DBG', + 67: 'FW_ID_PSP_INTF', + 68: 'FW_ID_RLX6_CORE1', + 69: 'FW_ID_RLX6_DRAM_BOOT_CORE1', + 70: 'FW_ID_RLCV_LX7', + 71: 'FW_ID_RLC_SAVE_RESTORE_LIST', + 72: 'FW_ID_ASD', + 73: 'FW_ID_TA_RAS', + 74: 'FW_ID_XGMI', + 75: 'FW_ID_RLC_SRLG', + 76: 'FW_ID_RLC_SRLS', + 77: 'FW_ID_SMC', + 78: 'FW_ID_DMCU', + 79: 'FW_ID__MAX', } FW_ID_SMU = 1 FW_ID_FIRST = 1 @@ -524,38 +525,39 @@ FW_ID_IMU_IRAM = 43 FW_ID_SDMA_TH0 = 44 FW_ID_SDMA_TH1 = 45 FW_ID_CP_MES = 46 -FW_ID_MES_STACK = 47 -FW_ID_MES_THREAD1 = 48 -FW_ID_MES_THREAD1_STACK = 49 -FW_ID_RLX6 = 50 -FW_ID_RLX6_DRAM_BOOT = 51 -FW_ID_RS64_ME = 52 -FW_ID_RS64_ME_P0_DATA = 53 -FW_ID_RS64_ME_P1_DATA = 54 -FW_ID_RS64_PFP = 55 -FW_ID_RS64_PFP_P0_DATA = 56 -FW_ID_RS64_PFP_P1_DATA = 57 -FW_ID_RS64_MEC = 58 -FW_ID_RS64_MEC_P0_DATA = 59 -FW_ID_RS64_MEC_P1_DATA = 60 -FW_ID_RS64_MEC_P2_DATA = 61 -FW_ID_RS64_MEC_P3_DATA = 62 -FW_ID_PPTABLE = 63 -FW_ID_PSP_SOC = 64 -FW_ID_PSP_DBG = 65 -FW_ID_PSP_INTF = 66 -FW_ID_RLX6_CORE1 = 67 -FW_ID_RLX6_DRAM_BOOT_CORE1 = 68 -FW_ID_RLCV_LX7 = 69 -FW_ID_RLC_SAVE_RESTORE_LIST = 70 -FW_ID_ASD = 71 -FW_ID_TA_RAS = 72 -FW_ID_XGMI = 73 -FW_ID_RLC_SRLG = 74 -FW_ID_RLC_SRLS = 75 -FW_ID_SMC = 76 -FW_ID_DMCU = 77 -FW_ID__MAX = 78 +FW_ID_MES_KIQ = 47 +FW_ID_MES_STACK = 48 +FW_ID_MES_THREAD1 = 49 +FW_ID_MES_THREAD1_STACK = 50 +FW_ID_RLX6 = 51 +FW_ID_RLX6_DRAM_BOOT = 52 +FW_ID_RS64_ME = 53 +FW_ID_RS64_ME_P0_DATA = 54 +FW_ID_RS64_ME_P1_DATA = 55 +FW_ID_RS64_PFP = 56 +FW_ID_RS64_PFP_P0_DATA = 57 +FW_ID_RS64_PFP_P1_DATA = 58 +FW_ID_RS64_MEC = 59 +FW_ID_RS64_MEC_P0_DATA = 60 +FW_ID_RS64_MEC_P1_DATA = 61 +FW_ID_RS64_MEC_P2_DATA = 62 +FW_ID_RS64_MEC_P3_DATA = 63 +FW_ID_PPTABLE = 64 +FW_ID_PSP_SOC = 65 +FW_ID_PSP_DBG = 66 +FW_ID_PSP_INTF = 67 +FW_ID_RLX6_CORE1 = 68 +FW_ID_RLX6_DRAM_BOOT_CORE1 = 69 +FW_ID_RLCV_LX7 = 70 +FW_ID_RLC_SAVE_RESTORE_LIST = 71 +FW_ID_ASD = 72 +FW_ID_TA_RAS = 73 +FW_ID_XGMI = 74 +FW_ID_RLC_SRLG = 75 +FW_ID_RLC_SRLS = 76 +FW_ID_SMC = 77 +FW_ID_DMCU = 78 +FW_ID__MAX = 79 amdsmi_fw_block_t = ctypes.c_uint32 # enum # values for enumeration 'amdsmi_vram_type_t' @@ -734,7 +736,7 @@ struct_amdsmi_fw_info_t._pack_ = 1 # source:False struct_amdsmi_fw_info_t._fields_ = [ ('num_fw_info', ctypes.c_ubyte), ('PADDING_0', ctypes.c_ubyte * 7), - ('fw_info_list', struct_fw_info_list_ * 78), + ('fw_info_list', struct_fw_info_list_ * 79), ('reserved', ctypes.c_uint32 * 7), ('PADDING_1', ctypes.c_ubyte * 4), ] @@ -1853,13 +1855,13 @@ __all__ = \ 'FW_ID_CP_PFP', 'FW_ID_CP_PM4', 'FW_ID_DFC', 'FW_ID_DMCU', 'FW_ID_DMCU_ERAM', 'FW_ID_DMCU_ISR', 'FW_ID_DRV_CAP', 'FW_ID_FIRST', 'FW_ID_IMU_DRAM', 'FW_ID_IMU_IRAM', 'FW_ID_ISP', - 'FW_ID_MC', 'FW_ID_MES_STACK', 'FW_ID_MES_THREAD1', - 'FW_ID_MES_THREAD1_STACK', 'FW_ID_MMSCH', 'FW_ID_PPTABLE', - 'FW_ID_PSP_BL', 'FW_ID_PSP_DBG', 'FW_ID_PSP_INTF', - 'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC', 'FW_ID_PSP_SOSDRV', - 'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV', 'FW_ID_PSP_TOC', - 'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC', 'FW_ID_RLCV_LX7', - 'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL', + 'FW_ID_MC', 'FW_ID_MES_KIQ', 'FW_ID_MES_STACK', + 'FW_ID_MES_THREAD1', 'FW_ID_MES_THREAD1_STACK', 'FW_ID_MMSCH', + 'FW_ID_PPTABLE', 'FW_ID_PSP_BL', 'FW_ID_PSP_DBG', + 'FW_ID_PSP_INTF', 'FW_ID_PSP_KEYDB', 'FW_ID_PSP_SOC', + 'FW_ID_PSP_SOSDRV', 'FW_ID_PSP_SPL', 'FW_ID_PSP_SYSDRV', + 'FW_ID_PSP_TOC', 'FW_ID_REG_ACCESS_WHITELIST', 'FW_ID_RLC', + 'FW_ID_RLCV_LX7', 'FW_ID_RLC_P', 'FW_ID_RLC_RESTORE_LIST_CNTL', 'FW_ID_RLC_RESTORE_LIST_GPM_MEM', 'FW_ID_RLC_RESTORE_LIST_SRM_MEM', 'FW_ID_RLC_SAVE_RESTORE_LIST', 'FW_ID_RLC_SRLG', 'FW_ID_RLC_SRLS', 'FW_ID_RLC_V', 'FW_ID_RLX6', diff --git a/rocm_smi/include/rocm_smi/rocm_smi.h b/rocm_smi/include/rocm_smi/rocm_smi.h index 0d9e3d7665..6f8cb475ac 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/rocm_smi/include/rocm_smi/rocm_smi.h @@ -70,7 +70,8 @@ extern "C" { */ //! Guaranteed maximum possible number of supported frequencies -#define RSMI_MAX_NUM_FREQUENCIES 32 +//! (32 normal + 1 sleep frequency) +#define RSMI_MAX_NUM_FREQUENCIES 33 //! Maximum possible value for fan speed. Should be used as the denominator //! when determining fan speed percentage. @@ -639,6 +640,8 @@ typedef enum { RSMI_FW_BLOCK_ME, RSMI_FW_BLOCK_MEC, RSMI_FW_BLOCK_MEC2, + RSMI_FW_BLOCK_MES, + RSMI_FW_BLOCK_MES_KIQ, RSMI_FW_BLOCK_PFP, RSMI_FW_BLOCK_RLC, RSMI_FW_BLOCK_RLC_SRLC, @@ -759,6 +762,11 @@ typedef rsmi_power_profile_status_t rsmi_power_profile_status; * @brief This structure holds information about clock frequencies. */ typedef struct { + /** + * Deep Sleep frequency is only supported by some GPUs + */ + bool has_deep_sleep; + /** * The number of supported frequencies */ @@ -1757,6 +1765,30 @@ rsmi_status_t rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask); rsmi_status_t rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power); +/** + * @brief Get the current socket power (also known as instant + * power) of the device index provided. + * + * @details Given a device index @p dv_ind and a pointer to a uint64_t + * @p socket_power, this function will write the current socket power + * (in microwatts) to the uint64_t pointed to by @p socket_power. + * + * @param[in] dv_ind a device index + * + * @param[inout] socket_power a pointer to uint64_t to which the current + * socket power will be written to. If this parameter is nullptr, + * this function will return ::RSMI_STATUS_INVALID_ARGS if the function is + * supported with the provided, arguments and ::RSMI_STATUS_NOT_SUPPORTED + * if it is not supported with the provided arguments. + * + * @retval ::RSMI_STATUS_SUCCESS call was successful + * @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not + * support this function with the given arguments + * @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid + */ +rsmi_status_t +rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power); + /** * @brief Get the energy accumulator counter of the device with provided * device index. diff --git a/rocm_smi/include/rocm_smi/rocm_smi_device.h b/rocm_smi/include/rocm_smi/rocm_smi_device.h index 43c1728809..e9a61af972 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi_device.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_device.h @@ -146,6 +146,8 @@ enum DevInfoTypes { kDevFwVersionMe, kDevFwVersionMec, kDevFwVersionMec2, + kDevFwVersionMes, + kDevFwVersionMesKiq, kDevFwVersionPfp, kDevFwVersionRlc, kDevFwVersionRlcSrlc, diff --git a/rocm_smi/include/rocm_smi/rocm_smi_logger.h b/rocm_smi/include/rocm_smi/rocm_smi_logger.h index bd2608db58..f83240fbf4 100644 --- a/rocm_smi/include/rocm_smi/rocm_smi_logger.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_logger.h @@ -130,18 +130,18 @@ class Logger { break; } return *getInstance(); - }; + } Logger &operator<<(const char* s) { return operator<<(std::string(s)); - }; + } template Logger &operator<<(const T &v) { std::ostringstream s; s << v; std::string str = s.str(); return operator<<(str); - }; + } // Interface for Error Log void error(const char* text) throw(); diff --git a/rocm_smi/include/rocm_smi/rocm_smi_monitor.h b/rocm_smi/include/rocm_smi/rocm_smi_monitor.h index ea639eae35..ad284646b3 100755 --- a/rocm_smi/include/rocm_smi/rocm_smi_monitor.h +++ b/rocm_smi/include/rocm_smi/rocm_smi_monitor.h @@ -5,7 +5,7 @@ * The University of Illinois/NCSA * Open Source License (NCSA) * - * Copyright (c) 2017, Advanced Micro Devices, Inc. + * Copyright (c) 2017-2023, Advanced Micro Devices, Inc. * All rights reserved. * * Developed by: @@ -67,6 +67,8 @@ enum MonitorTypes { kMonPowerCapMax, kMonPowerCapMin, kMonPowerAve, + kMonPowerInput, + kMonPowerLabel, kMonTempMax, kMonTempMin, kMonTempMaxHyst, @@ -94,45 +96,47 @@ enum MonitorTypes { kMonInvalid = 0xFFFFFFFF, }; -const std::map monitorTypesToString { - {MonitorTypes::kMonName, "amd::smi::kMonName"}, - {MonitorTypes::kMonTemp, "amd::smi::kMonName"}, - {MonitorTypes::kMonFanSpeed, "amd::smi::kMonName"}, - {MonitorTypes::kMonMaxFanSpeed, "amd::smi::kMonName"}, - {MonitorTypes::kMonFanRPMs, "amd::smi::kMonName"}, - {MonitorTypes::kMonFanCntrlEnable, "amd::smi::kMonName"}, - {MonitorTypes::kMonPowerCap, "amd::smi::kMonName"}, - {MonitorTypes::kMonPowerCapDefault, "amd::smi::kMonName"}, - {MonitorTypes::kMonPowerCapMax, "amd::smi::kMonName"}, - {MonitorTypes::kMonPowerCapMin, "amd::smi::kMonName"}, - {MonitorTypes::kMonPowerAve, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempMax, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempMin, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempMaxHyst, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempMinHyst, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempCritical, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempCriticalHyst, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempEmergency, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempEmergencyHyst, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempCritMin, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempCritMinHyst, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempOffset, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempLowest, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempHighest, "amd::smi::kMonName"}, - {MonitorTypes::kMonTempLabel, "amd::smi::kMonName"}, - {MonitorTypes::kMonVolt, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltMax, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltMinCrit, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltMin, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltMaxCrit, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltAverage, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltLowest, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltHighest, "amd::smi::kMonName"}, - {MonitorTypes::kMonVoltLabel, "amd::smi::kMonName"}, - {MonitorTypes::kMonInvalid, "amd::smi::kMonName"}, +const std::map monitorTypesToString{ + {MonitorTypes::kMonName, "MonitorTypes::kMonName"}, + {MonitorTypes::kMonTemp, "MonitorTypes::kMonTemp"}, + {MonitorTypes::kMonFanSpeed, "MonitorTypes::kMonFanSpeed"}, + {MonitorTypes::kMonMaxFanSpeed, "MonitorTypes::kMonMaxFanSpeed"}, + {MonitorTypes::kMonFanRPMs, "MonitorTypes::kMonFanRPMs"}, + {MonitorTypes::kMonFanCntrlEnable, "MonitorTypes::kMonFanCntrlEnable"}, + {MonitorTypes::kMonPowerCap, "MonitorTypes::kMonPowerCap"}, + {MonitorTypes::kMonPowerCapDefault, "MonitorTypes::kMonPowerCapDefault"}, + {MonitorTypes::kMonPowerCapMax, "MonitorTypes::kMonPowerCapMax"}, + {MonitorTypes::kMonPowerCapMin, "MonitorTypes::kMonPowerCapMin"}, + {MonitorTypes::kMonPowerAve, "MonitorTypes::kMonPowerAve"}, + {MonitorTypes::kMonPowerInput, "MonitorTypes::kMonPowerInput"}, + {MonitorTypes::kMonPowerLabel, "MonitorTypes::kMonPowerLabel"}, + {MonitorTypes::kMonTempMax, "MonitorTypes::kMonTempMax"}, + {MonitorTypes::kMonTempMin, "MonitorTypes::kMonTempMin"}, + {MonitorTypes::kMonTempMaxHyst, "MonitorTypes::kMonTempMaxHyst"}, + {MonitorTypes::kMonTempMinHyst, "MonitorTypes::kMonTempMinHyst"}, + {MonitorTypes::kMonTempCritical, "MonitorTypes::kMonTempCritical"}, + {MonitorTypes::kMonTempCriticalHyst, "MonitorTypes::kMonTempCriticalHyst"}, + {MonitorTypes::kMonTempEmergency, "MonitorTypes::kMonTempEmergency"}, + {MonitorTypes::kMonTempEmergencyHyst, + "MonitorTypes::kMonTempEmergencyHyst"}, + {MonitorTypes::kMonTempCritMin, "MonitorTypes::kMonTempCritMin"}, + {MonitorTypes::kMonTempCritMinHyst, "MonitorTypes::kMonTempCritMinHyst"}, + {MonitorTypes::kMonTempOffset, "MonitorTypes::kMonTempOffset"}, + {MonitorTypes::kMonTempLowest, "MonitorTypes::kMonTempLowest"}, + {MonitorTypes::kMonTempHighest, "MonitorTypes::kMonTempHighest"}, + {MonitorTypes::kMonTempLabel, "MonitorTypes::kMonTempLabel"}, + {MonitorTypes::kMonVolt, "MonitorTypes::kMonVolt"}, + {MonitorTypes::kMonVoltMax, "MonitorTypes::kMonVoltMax"}, + {MonitorTypes::kMonVoltMinCrit, "MonitorTypes::kMonVoltMinCrit"}, + {MonitorTypes::kMonVoltMin, "MonitorTypes::kMonVoltMin"}, + {MonitorTypes::kMonVoltMaxCrit, "MonitorTypes::kMonVoltMaxCrit"}, + {MonitorTypes::kMonVoltAverage, "MonitorTypes::kMonVoltAverage"}, + {MonitorTypes::kMonVoltLowest, "MonitorTypes::kMonVoltLowest"}, + {MonitorTypes::kMonVoltHighest, "MonitorTypes::kMonVoltHighest"}, + {MonitorTypes::kMonVoltLabel, "MonitorTypes::kMonVoltLabel"}, + {MonitorTypes::kMonInvalid, "MonitorTypes::kMonInvalid"}, }; - class Monitor { public: explicit Monitor(std::string path, RocmSMI_env_vars const *e); diff --git a/rocm_smi/python_smi_tools/README.md b/rocm_smi/python_smi_tools/README.md index 4d5af959e4..42e051fb5e 100644 --- a/rocm_smi/python_smi_tools/README.md +++ b/rocm_smi/python_smi_tools/README.md @@ -74,7 +74,7 @@ Display Options: -a, --showallinfo Show Temperature, Fan and Clock values Topology: - -i, --showid Show GPU ID + -i, --showid Show DEVICE ID -v, --showvbios Show VBIOS version --showdriverversion Show kernel driver version --showfwinfo [BLOCK [BLOCK ...]] Show FW information diff --git a/rocm_smi/python_smi_tools/rocm_smi.py b/rocm_smi/python_smi_tools/rocm_smi.py index 2a0a4655d7..6f7ba1a8e0 100755 --- a/rocm_smi/python_smi_tools/rocm_smi.py +++ b/rocm_smi/python_smi_tools/rocm_smi.py @@ -45,9 +45,8 @@ CLOCK_JSON_VERSION = 1 headerString = ' ROCm System Management Interface ' footerString = ' End of ROCm SMI Log ' - # Output formatting -appWidth = 100 +appWidth = 90 deviceList = [] # Enable or disable serialized format @@ -383,8 +382,8 @@ def getPidList(): return -def getPower(device, silent=False): - """ Return the current power level of a given device +def getAvgPower(device, silent=False): + """ Return the average power level of a given device @param device: DRM device identifier @param silent=Turn on to silence error output @@ -393,7 +392,21 @@ def getPower(device, silent=False): power = c_uint32() ret = rocmsmi.rsmi_dev_power_ave_get(device, 0, byref(power)) if rsmi_ret_ok(ret, device, 'get_power_avg', silent): - return power.value / 1000000 + return str(power.value / 1000000) + return 'N/A' + +def getCurrentSocketPower(device, silent=False): + """ Return the current (also known as instant) + socket power of a given device + + @param device: DRM device identifier + @param silent=Turn on to silence error output + (you plan to handle manually). Default is off. + """ + power = c_uint32() + ret = rocmsmi.rsmi_dev_current_socket_power_get(device, byref(power)) + if rsmi_ret_ok(ret, device, 'get_socket_power', silent): + return str(power.value / 1000000) return 'N/A' @@ -437,7 +450,7 @@ def findFirstAvailableTemp(device): temp = c_int64(0) metric = rsmi_temperature_metric_t.RSMI_TEMP_CURRENT ret_temp = "N/A" - ret_temp_type = "(Unknown)" + ret_temp_type = temp_type_lst[0] for i, templist_val in enumerate(temp_type_lst): ret = rocmsmi.rsmi_dev_temp_metric_get(c_uint32(device), i, metric, byref(temp)) if rsmi_ret_ok(ret, device, 'get_temp_metric_' + templist_val, silent=True): @@ -448,6 +461,37 @@ def findFirstAvailableTemp(device): continue return (ret_temp_type, ret_temp) +def getTemperatureLabel(deviceList): + """ Discovers the the first identified power label + + Returns a string label value + @param device: DRM device identifier + """ + # Default label is Edge + tempLabel = temp_type_lst[0].lower() + if len(deviceList) < 1: + return tempLabel + (temp_type, _) = findFirstAvailableTemp(deviceList[0]) + tempLabel = temp_type.lower().replace('(', '').replace(')', '') + return tempLabel + +def getPowerLabel(deviceList): + """ Discovers the the first identified power label + + Returns a string label value + @param device: DRM device identifier + """ + power = c_int64(0) + # Default label is AvgPower + powerLabel = rsmi_power_label.AVG_POWER + if len(deviceList) < 1: + return powerLabel + device=deviceList[0] + power = getCurrentSocketPower(device, True) + if power != '0.0' and power != 'N/A': + powerLabel = rsmi_power_label.CURRENT_SOCKET_POWER + return powerLabel + def getVbiosVersion(device, silent=False): """ Returns the VBIOS version for a given device @@ -679,23 +723,35 @@ def printListLog(metricName, valuesList): print(listStr + line) -def printLogSpacer(displayString=None, fill='='): +def printLogSpacer(displayString=None, fill='=', contentSizeToFit=0): """ Prints [name of the option]/[name of the program] in the spacer to explain data below If no parameters are given, a default fill of the '=' string is used in the spacer @param displayString: name of item to be displayed inside of the log spacer @param fill: padding string which surrounds the given display string + @param contentSizeToFit: providing an integer > 0 allows + ability to dynamically change output padding/fill based on this value + instead of appWidth. Handy for concise info output. """ global appWidth, PRINT_JSON + resizeValue = appWidth + if contentSizeToFit != 0: + resizeValue = contentSizeToFit + if resizeValue % 2: # if odd -> make even + resizeValue += 1 + # leaving below to check if resizing works properly + # print("resizeVal=" +str(resizeValue) + "; appWidth=" + str(appWidth) + + # "; contentSizeToFit=" + str(contentSizeToFit) + "; fill=" + fill) + if not PRINT_JSON: if displayString: if len(displayString) % 2: displayString += fill - logSpacer = fill * int((appWidth - (len(displayString))) / 2) + displayString + fill * int( - (appWidth - (len(displayString))) / 2) + logSpacer = fill * int((resizeValue - (len(displayString))) / 2) + displayString + fill * int( + (resizeValue - (len(displayString))) / 2) else: - logSpacer = fill * appWidth + logSpacer = fill * resizeValue print(logSpacer) @@ -1630,22 +1686,15 @@ def showAllConcise(deviceList): print('ERROR: Cannot print JSON/CSV output for concise output') sys.exit(1) - """ Place holder for the actual max size """ - MAX_ALL_CONCISE_WIDTH = 100 - appWidth_temp = appWidth - appWidth = MAX_ALL_CONCISE_WIDTH silent = True - printLogSpacer(' Concise Info ') deviceList.sort() - temp_type = '(' + temp_type_lst[0] + ')' - if len(deviceList) >= 1: - (temp_type, _) = findFirstAvailableTemp(deviceList[0]) - available_temp_type = temp_type.lower() - available_temp_type = available_temp_type.replace('(', '') - available_temp_type = available_temp_type.replace(')', '') - header = ['GPU', '[Model : Revision]', 'Temp', 'AvgPwr', 'Partitions', 'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%'] - subheader = ['', 'Name (20 chars)', temp_type, '', '(Mem, Compute)', '', '', '', '', '', '', ''] + available_temp_type = getTemperatureLabel(deviceList) + temp_type = "(" + available_temp_type.capitalize() + ")" + header=['Device', '[Model : Revision]', 'Temp', 'Power', 'Partitions', + 'SCLK', 'MCLK', 'Fan', 'Perf', 'PwrCap', 'VRAM%', 'GPU%'] + subheader = ['', 'Name (20 chars)', temp_type, getPowerLabel(deviceList), + '(Mem, Compute)', '', '', '', '', '', '', ''] # add additional spaces to match header for idx, item in enumerate(subheader): header_size = len(header[idx]) @@ -1667,11 +1716,17 @@ def showAllConcise(deviceList): temp_val = str(getTemp(device, available_temp_type, silent)) if temp_val != 'N/A': temp_val += degree_sign + 'C' - avgPwr = str(getPower(device)) - if avgPwr != '0.0' and avgPwr != 'N/A': + socketPwr = getCurrentSocketPower(device, True) + avgPwr = getAvgPower(device, True) + powerVal = 'N/A' + if socketPwr != '0.0' and socketPwr != 'N/A': + socketPwr += 'W' + powerVal=socketPwr + elif avgPwr != '0.0' and avgPwr != 'N/A': avgPwr += 'W' + powerVal=avgPwr else: - avgPwr = 'N/A' + powerVal = 'N/A' combined_partition = (getMemoryPartition(device, silent) + ", " + getComputePartition(device, silent)) sclk = showCurrentClocks([device], 'sclk', concise=silent) @@ -1704,10 +1759,10 @@ def showAllConcise(deviceList): '', '', '', ''] gpu_dev_product_info_top_name = gpu_dev_product_info_names[1] - values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val, avgPwr, - combined_partition, sclk, mclk, - fan, str(perf).lower(), pwrCap, - mem_use_pct, gpu_busy] + values['card%s' % (str(device))] = [device, gpu_dev_product_info_top_name, temp_val, + powerVal, combined_partition, sclk, mclk, + fan, str(perf).lower(), pwrCap, mem_use_pct, + gpu_busy] val_widths = {} for device in deviceList: @@ -1716,10 +1771,17 @@ def showAllConcise(deviceList): for device in deviceList: for col in range(len(val_widths[device])): max_widths[col] = max(max_widths[col], val_widths[device][col]) - printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)), None) - printLog(None, "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader)), - None, useItalics=True) - printLogSpacer(fill='=') + + ######################## + # Display concise info # + ######################## + header_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), header)) + subheader_output = "".join(word.ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), subheader)) + printLogSpacer(headerString, contentSizeToFit=len(header_output)) + printLogSpacer(' Concise Info ', contentSizeToFit=len(header_output)) + printLog(None, header_output, None) + printLog(None, subheader_output, None, useItalics=True) + printLogSpacer(fill='=', contentSizeToFit=len(header_output)) for device in deviceList: printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in @@ -1730,9 +1792,8 @@ def showAllConcise(deviceList): printLog(None, "".join(str(word).ljust(max_widths[col]) for col, word in zip(range(len(max_widths)), values['card%s_Info' % (str(device))])), None) - printLogSpacer() - """ Restore original max size """ - appWidth = appWidth_temp + printLogSpacer(contentSizeToFit=len(header_output)) + printLogSpacer(footerString, contentSizeToFit=len(header_output)) def showAllConciseHw(deviceList): @@ -1808,12 +1869,21 @@ def showClocks(deviceList): if not rsmi_ret_ok(ret, device, 'get_clk_freq_' + clk_type, True): continue printLog(device, 'Supported %s frequencies on GPU%s' % (clk_type, str(device)), None) - for x in range(freq.num_supported): - fr = '{:>.0f}Mhz'.format(freq.frequency[x] / 1000000) - if x == freq.current: - printLog(device, str(x), str(fr) + ' *') - else: - printLog(device, str(x), str(fr)) + for i in range(freq.num_supported): + freq_string = '{:>.0f}Mhz'.format(freq.frequency[i] / 1000000) + if i == freq.current: + freq_string += ' *' + freq_index = i + # Deep Sleep frequency is only supported by some GPUs + # It is indicated by letter 'S' instead of the index number + if freq.has_deep_sleep: + # sleep state + if i == 0: + freq_index = 'S' + # all indices are offset by 1 because Deep Sleep occupies index 0 + else: + freq_index = i - 1 + printLog(device, str(freq_index), freq_string) printLog(device, '', None) else: logging.debug('{} frequency is unsupported on device[{}]'.format(clk_type, device)) @@ -1822,12 +1892,11 @@ def showClocks(deviceList): ret = rocmsmi.rsmi_dev_pci_bandwidth_get(device, byref(bw)) if rsmi_ret_ok(ret, device, 'get_PCIe_bandwidth', True): printLog(device, 'Supported %s frequencies on GPU%s' % ('PCIe', str(device)), None) - for x in range(bw.transfer_rate.num_supported): - fr = '{:>.1f}GT/s x{}'.format(bw.transfer_rate.frequency[x] / 1000000000, bw.lanes[x]) - if x == bw.transfer_rate.current: - printLog(device, str(x), str(fr) + ' *') - else: - printLog(device, str(x), str(fr)) + for i in range(bw.transfer_rate.num_supported): + freq_string = '{:>.1f}GT/s x{}'.format(bw.transfer_rate.frequency[i] / 1000000000, bw.lanes[i]) + if i == bw.transfer_rate.current: + freq_string += ' *' + printLog(device, str(i), str(freq_string)) printLog(device, '', None) else: logging.debug('PCIe frequency is unsupported on device [{}]'.format(device)) @@ -1857,9 +1926,17 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False): printLog(device, '%s current clock frequency not found' % (clk_defined), None) continue fr = freq.frequency[levl] / 1000000 + freq_index = levl + if freq.has_deep_sleep: + # sleep state + if levl == 0: + freq_index = 'S' + # all indices are offset by 1 because Deep Sleep occupies index 0 + else: + freq_index = levl - 1 if concise: # in case function is used for concise output, no need to print. return '{:.0f}Mhz'.format(fr) - printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(levl, fr)) + printLog(device, '{} clock level'.format(clk_defined), '{} ({:.0f}Mhz)'.format(freq_index, fr)) elif not concise: logging.debug('{} clock is unsupported on device[{}]'.format(clk_defined, device)) @@ -1872,12 +1949,20 @@ def showCurrentClocks(deviceList, clk_defined=None, concise=False): if levl >= freq.num_supported: printLog(device, '%s current clock frequency not found' % (clk_type), None) continue + freq_index = levl + if freq.has_deep_sleep: + # sleep state + if levl == 0: + freq_index = 'S' + # all indices are offset by 1 because Deep Sleep occupies index 0 + else: + freq_index = levl - 1 fr = freq.frequency[levl] / 1000000 if PRINT_JSON: printLog(device, '%s clock speed:' % (clk_type), '(%sMhz)' % (str(fr)[:-2])) - printLog(device, '%s clock level:' % (clk_type), levl) + printLog(device, '%s clock level:' % (clk_type), freq_index) else: - printLog(device, '%s clock level: %s' % (clk_type, levl), '(%sMhz)' % (str(fr)[:-2])) + printLog(device, '%s clock level: %s' % (clk_type, freq_index), '(%sMhz)' % (str(fr)[:-2])) elif not concise: logging.debug('{} clock is unsupported on device[{}]'.format(clk_type, device)) # pcie clocks @@ -2108,8 +2193,8 @@ def showId(deviceList): """ printLogSpacer(' ID ') for device in deviceList: - printLog(device, 'GPU ID', getId(device)) - printLog(device, 'GPU Rev', getRev(device)) + printLog(device, 'Device ID', getId(device)) + printLog(device, 'Device Rev', getRev(device)) printLogSpacer() @@ -2336,23 +2421,25 @@ def showPids(verbose): def showPower(deviceList): - """ Display current Average Graphics Package Power Consumption for a list of devices + """ Display Current (also known as instant) Socket or Average + Graphics Package Power Consumption for a list of devices @param deviceList: List of DRM devices (can be a single-item list) """ secondaryPresent=False printLogSpacer(' Power Consumption ') for device in deviceList: - if checkIfSecondaryDie(device): + if str(getCurrentSocketPower(device, True)) != 'N/A': + printLog(device, 'Current Socket Graphics Package Power (W)', getCurrentSocketPower(device)) + elif checkIfSecondaryDie(device): printLog(device, 'Average Graphics Package Power (W)', "N/A (Secondary die)") secondaryPresent=True - elif str(getPower(device)) != '0.0': - printLog(device, 'Average Graphics Package Power (W)', getPower(device)) + elif str(getAvgPower(device)) != '0.0': + printLog(device, 'Average Graphics Package Power (W)', getAvgPower(device)) else: - printErrLog(device, 'Unable to get Average Graphics Package Power Consumption') + printErrLog(device, 'Unable to get Average or Current Socket Graphics Package Power Consumption') if secondaryPresent: printLog(None, "\n\t\tPrimary die (usually one above or below the secondary) shows total (primary + secondary) socket power information", None) - printLogSpacer() @@ -2848,13 +2935,8 @@ def getGraphColor(percentage): def showTempGraph(deviceList): deviceList.sort() - temp_type = '(' + temp_type_lst[0] + ')' - if len(deviceList) >= 1: - (temp_type, _) = findFirstAvailableTemp(deviceList[0]) - printLogSpacer(' Temperature Graph ' + temp_type + ' ') - temp_type = temp_type.lower() - temp_type = temp_type.replace('(', '') - temp_type = temp_type.replace(')', '') + temp_type = getTemperatureLabel(deviceList) + printLogSpacer(' Temperature Graph ' + temp_type.capitalize() + ' ') # Start a thread for constantly printing try: # Create a thread (call print function, devices, delay in ms) @@ -3523,9 +3605,14 @@ def save(deviceList, savefilepath): # The code below is for when this script is run as an executable instead of when imported as a module +def isConciseInfoRequested(args): + return len(sys.argv) == 1 or \ + len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \ + len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv)) + if __name__ == '__main__': parser = argparse.ArgumentParser( - description=f'AMD ROCm System Management Interface | ROCM-SMI version: {__version__}', + description='AMD ROCm System Management Interface | ROCM-SMI version: %s' % __version__, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=90, width=120)) groupDev = parser.add_argument_group() groupDisplayOpt = parser.add_argument_group('Display Options') @@ -3545,7 +3632,7 @@ if __name__ == '__main__': groupDisplayOpt.add_argument('--showhw', help='Show Hardware details', action='store_true') groupDisplayOpt.add_argument('-a', '--showallinfo', help='Show Temperature, Fan and Clock values', action='store_true') - groupDisplayTop.add_argument('-i', '--showid', help='Show GPU ID', action='store_true') + groupDisplayTop.add_argument('-i', '--showid', help='Show DEVICE ID', action='store_true') groupDisplayTop.add_argument('-v', '--showvbios', help='Show VBIOS version', action='store_true') groupDisplayTop.add_argument('-e', '--showevents', help='Show event list', metavar='EVENT', type=str, nargs='*') groupDisplayTop.add_argument('--showdriverversion', help='Show kernel driver version', action='store_true') @@ -3731,7 +3818,8 @@ if __name__ == '__main__': if not PRINT_JSON: print('\n') - printLogSpacer(headerString) + if not isConciseInfoRequested(args): + printLogSpacer(headerString) if args.showallinfo: args.list = True @@ -3785,9 +3873,7 @@ if __name__ == '__main__': if not checkAmdGpus(deviceList): logging.warning('No AMD GPUs specified') - if len(sys.argv) == 1 or \ - len(sys.argv) == 2 and (args.alldevices or (args.json or args.csv)) or \ - len(sys.argv) == 3 and (args.alldevices and (args.json or args.csv)): + if isConciseInfoRequested(args): showAllConcise(deviceList) if args.showhw: showAllConciseHw(deviceList) @@ -3994,7 +4080,8 @@ if __name__ == '__main__': devCsv = formatCsv(deviceList) print(devCsv) - printLogSpacer(footerString) + if not isConciseInfoRequested(args): + printLogSpacer(footerString) rsmi_ret_ok(rocmsmi.rsmi_shut_down()) exit(RETCODE) diff --git a/rocm_smi/python_smi_tools/rsmiBindings.py b/rocm_smi/python_smi_tools/rsmiBindings.py index e6b141889f..36dbb6e5ff 100644 --- a/rocm_smi/python_smi_tools/rsmiBindings.py +++ b/rocm_smi/python_smi_tools/rsmiBindings.py @@ -59,7 +59,7 @@ gpu_id = c_uint32(0) # Policy enums -RSMI_MAX_NUM_FREQUENCIES = 32 +RSMI_MAX_NUM_FREQUENCIES = 33 RSMI_MAX_FAN_SPEED = 255 RSMI_NUM_VOLTAGE_CURVE_POINTS = 3 @@ -492,7 +492,8 @@ rsmi_power_profile_status = rsmi_power_profile_status_t class rsmi_frequencies_t(Structure): - _fields_ = [('num_supported', c_int32), + _fields_ = [('has_deep_sleep', c_bool), + ('num_supported', c_int32), ('current', c_uint32), ('frequency', c_uint64 * RSMI_MAX_NUM_FREQUENCIES)] @@ -654,3 +655,8 @@ rsmi_nps_mode_type = rsmi_nps_mode_type_t # nps_mode_type_l[rsmi_nps_mode_type_t.RSMI_MEMORY_PARTITION_NPS2] # will return string 'NPS2' nps_mode_type_l = ['NPS1', 'NPS2', 'NPS4', 'NPS8'] + +class rsmi_power_label(str, Enum): + AVG_POWER = '(Avg)' + CURRENT_SOCKET_POWER = '(Socket)' + diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 655eacd2d8..8d9921c793 100755 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -77,7 +77,6 @@ #include "rocm_smi/rocm_smi64Config.h" #include "rocm_smi/rocm_smi_logger.h" -using namespace ROCmLogging; using namespace amd::smi; static const uint32_t kMaxOverdriveLevel = 20; @@ -147,14 +146,21 @@ static uint64_t freq_string_to_int(const std::vector &freq_lines, std::istringstream fs(freq_lines[i]); - uint32_t ind; + char junk_ch; + int ind; float freq; - std::string junk; + std::string junk_str; std::string units_str; std::string star_str; - fs >> ind; - fs >> junk; // colon + if (fs.peek() == 'S') { + // Deep Sleep frequency is only supported by some GPUs + fs >> junk_ch; + } else { + // All other frequency indices are numbers + fs >> ind; + } + fs >> junk_str; // colon fs >> freq; fs >> units_str; fs >> star_str; @@ -1127,9 +1133,14 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_ } f->num_supported = static_cast(val_vec.size()); - bool current = false; f->current = RSMI_MAX_NUM_FREQUENCIES + 1; // init to an invalid value + // Deep Sleep frequency is only supported by some GPUs + // It is indicated by letter 'S' instead of the index number + f->has_deep_sleep = (val_vec[0][0] == 'S'); + + bool current = false; + for (uint32_t i = 0; i < f->num_supported; ++i) { f->frequency[i] = freq_string_to_int(val_vec, ¤t, lanes, i); @@ -1156,9 +1167,9 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type, rsmi_clk_type_ sysvalue += " Previous Value"; sysvalue += ' ' + std::to_string(f->frequency[f->current]); DEBUG_LOG("More than one current clock. ", sysvalue); - } - else + } else { f->current = i; + } } } @@ -1309,6 +1320,11 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, return RSMI_STATUS_UNEXPECTED_DATA; } + // find last_item but skip empty lines + int last_item = val_vec.size()-1; + while (val_vec[last_item].empty() || val_vec[last_item][0] == 0) + last_item--; + p->curr_sclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, nullptr, kOD_SCLK_label_array_index + 1); p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, @@ -1322,16 +1338,18 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind, } else if (val_vec[kOD_MCLK_label_array_index] == "MCLK:") { p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, nullptr, kOD_MCLK_label_array_index + 1); + // the upper memory frequency is the last p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 4); + nullptr, last_item); return RSMI_STATUS_SUCCESS; } else if (val_vec[kOD_MCLK_label_array_index + 1] == "MCLK:") { p->curr_sclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, nullptr, kOD_SCLK_label_array_index + 3); p->curr_mclk_range.lower_bound = freq_string_to_int(val_vec, nullptr, nullptr, kOD_MCLK_label_array_index + 2); + // the upper memory frequency is the last p->curr_mclk_range.upper_bound = freq_string_to_int(val_vec, nullptr, - nullptr, kOD_MCLK_label_array_index + 5); + nullptr, last_item); return RSMI_STATUS_SUCCESS; } else { return RSMI_STATUS_NOT_YET_IMPLEMENTED; @@ -1708,6 +1726,8 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block, { RSMI_FW_BLOCK_ME, amd::smi::kDevFwVersionMe }, { RSMI_FW_BLOCK_MEC, amd::smi::kDevFwVersionMec }, { RSMI_FW_BLOCK_MEC2, amd::smi::kDevFwVersionMec2 }, + { RSMI_FW_BLOCK_MES, amd::smi::kDevFwVersionMes }, + { RSMI_FW_BLOCK_MES_KIQ, amd::smi::kDevFwVersionMesKiq }, { RSMI_FW_BLOCK_PFP, amd::smi::kDevFwVersionPfp }, { RSMI_FW_BLOCK_RLC, amd::smi::kDevFwVersionRlc }, { RSMI_FW_BLOCK_RLC_SRLC, amd::smi::kDevFwVersionRlcSrlc }, @@ -2485,21 +2505,22 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type, amd::smi::MonitorTypes mon_type = amd::smi::kMonInvalid; uint16_t val_ui16; - static const std::map kMetricTypeMap = { - { RSMI_TEMP_CURRENT, amd::smi::kMonTemp }, - { RSMI_TEMP_MAX, amd::smi::kMonTempMax }, - { RSMI_TEMP_MIN, amd::smi::kMonTempMin }, - { RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst }, - { RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst }, - { RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical }, - { RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst }, - { RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency }, - { RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst }, - { RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin }, - { RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst }, - { RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset }, - { RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest }, - { RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest }, + static const std::map + kMetricTypeMap = { + { RSMI_TEMP_CURRENT, amd::smi::kMonTemp }, + { RSMI_TEMP_MAX, amd::smi::kMonTempMax }, + { RSMI_TEMP_MIN, amd::smi::kMonTempMin }, + { RSMI_TEMP_MAX_HYST, amd::smi::kMonTempMaxHyst }, + { RSMI_TEMP_MIN_HYST, amd::smi::kMonTempMinHyst }, + { RSMI_TEMP_CRITICAL, amd::smi::kMonTempCritical }, + { RSMI_TEMP_CRITICAL_HYST, amd::smi::kMonTempCriticalHyst }, + { RSMI_TEMP_EMERGENCY, amd::smi::kMonTempEmergency }, + { RSMI_TEMP_EMERGENCY_HYST, amd::smi::kMonTempEmergencyHyst }, + { RSMI_TEMP_CRIT_MIN, amd::smi::kMonTempCritMin }, + { RSMI_TEMP_CRIT_MIN_HYST, amd::smi::kMonTempCritMinHyst }, + { RSMI_TEMP_OFFSET, amd::smi::kMonTempOffset }, + { RSMI_TEMP_LOWEST, amd::smi::kMonTempLowest }, + { RSMI_TEMP_HIGHEST, amd::smi::kMonTempHighest }, }; const auto mon_type_it = kMetricTypeMap.find(metric); @@ -2584,7 +2605,8 @@ rsmi_dev_temp_metric_get(uint32_t dv_ind, uint32_t sensor_type, return RSMI_STATUS_NOT_SUPPORTED; } - *temperature = static_cast(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE; + *temperature = + static_cast(val_ui16) * CENTRIGRADE_TO_MILLI_CENTIGRADE; ss << __PRETTY_FUNCTION__ << " | ======= end ======= " << " | Success " @@ -2919,6 +2941,80 @@ rsmi_dev_power_ave_get(uint32_t dv_ind, uint32_t sensor_ind, uint64_t *power) { CATCH } +rsmi_status_t +rsmi_dev_current_socket_power_get(uint32_t dv_ind, uint64_t *socket_power) { + TRY + std::ostringstream ss; + rsmi_status_t rsmiReturn = RSMI_STATUS_NOT_SUPPORTED; + std::string val_str; + uint32_t sensor_ind = 1; // socket_power sysfs files have 1-based indices + MonitorTypes mon_type = amd::smi::kMonPowerInput; + ss << __PRETTY_FUNCTION__ << " | ======= start =======, dv_ind=" + << std::to_string(dv_ind); + LOG_TRACE(ss); + if (socket_power == nullptr) { + rsmiReturn = RSMI_STATUS_INVALID_ARGS; + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Fail " + << " | Device #: " << dv_ind + << " | Type: " << monitorTypesToString.at(mon_type) + << " | Cause: socket_power was a null ptr reference" + << " | Returning = " + << getRSMIStatusString(rsmiReturn) << " |"; + LOG_ERROR(ss); + return RSMI_STATUS_INVALID_ARGS; + } + CHK_SUPPORT_SUBVAR_ONLY(socket_power, sensor_ind) + DEVICE_MUTEX + + if (dev->monitor() == nullptr) { + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Fail " + << " | Device #: " << dv_ind + << " | Type: " << monitorTypesToString.at(mon_type) + << " | Cause: hwmon monitor was a null ptr reference" + << " | Returning = " + << getRSMIStatusString(rsmiReturn) << " |"; + LOG_ERROR(ss); + return rsmiReturn; + } + + int ret = dev->monitor()->readMonitor(amd::smi::kMonPowerLabel, + sensor_ind, &val_str); + if (ret || val_str != "PPT" || val_str.size() != 3) { + if (ret != 0) { + rsmiReturn = amd::smi::ErrnoToRsmiStatus(ret); + } + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Fail " + << " | Device #: " << dv_ind + << " | Type: " << monitorTypesToString.at(mon_type) + << " | Cause: readMonitor() returned an error status" + << " or Socket Power label did not show PPT or size of label data was" + << " unexpected" + << " | Returning = " + << getRSMIStatusString(rsmiReturn) << " |"; + LOG_ERROR(ss); + return rsmiReturn; + } + rsmiReturn = get_dev_mon_value(mon_type, dv_ind, sensor_ind, + socket_power); + ss << __PRETTY_FUNCTION__ + << " | ======= end ======= " + << " | Success " + << " | Device #: " << dv_ind + << " | Type: " << monitorTypesToString.at(mon_type) + << " | Data: " << *socket_power + << " | Returning = " + << getRSMIStatusString(rsmiReturn) << " |"; + LOG_TRACE(ss); + return rsmiReturn; + CATCH +} + rsmi_status_t rsmi_dev_energy_count_get(uint32_t dv_ind, uint64_t *power, float *counter_resolution, uint64_t *timestamp) { diff --git a/rocm_smi/src/rocm_smi_device.cc b/rocm_smi/src/rocm_smi_device.cc index 8bea7e86a3..1310b27956 100755 --- a/rocm_smi/src/rocm_smi_device.cc +++ b/rocm_smi/src/rocm_smi_device.cc @@ -68,8 +68,6 @@ #include "rocm_smi/rocm_smi_logger.h" #include "shared_mutex.h" // NOLINT -using namespace ROCmLogging; - namespace amd { namespace smi { @@ -141,6 +139,8 @@ static const char *kDevFwVersionMcFName = "fw_version/mc_fw_version"; static const char *kDevFwVersionMeFName = "fw_version/me_fw_version"; static const char *kDevFwVersionMecFName = "fw_version/mec_fw_version"; static const char *kDevFwVersionMec2FName = "fw_version/mec2_fw_version"; +static const char *kDevFwVersionMesFName = "fw_version/mes_fw_version"; +static const char *kDevFwVersionMesKiqFName = "fw_version/mes_kiq_fw_version"; static const char *kDevFwVersionPfpFName = "fw_version/pfp_fw_version"; static const char *kDevFwVersionRlcFName = "fw_version/rlc_fw_version"; static const char *kDevFwVersionRlcSrlcFName = "fw_version/rlc_srlc_fw_version"; @@ -284,6 +284,8 @@ static const std::map kDevAttribNameMap = { {kDevFwVersionMe, kDevFwVersionMeFName}, {kDevFwVersionMec, kDevFwVersionMecFName}, {kDevFwVersionMec2, kDevFwVersionMec2FName}, + {kDevFwVersionMes, kDevFwVersionMesFName}, + {kDevFwVersionMesKiq, kDevFwVersionMesKiqFName}, {kDevFwVersionPfp, kDevFwVersionPfpFName}, {kDevFwVersionRlc, kDevFwVersionRlcFName}, {kDevFwVersionRlcSrlc, kDevFwVersionRlcSrlcFName}, @@ -347,6 +349,8 @@ static std::map kDevInfoVarTypeToRSMIVariant = { {kDevFwVersionMe, RSMI_FW_BLOCK_ME}, {kDevFwVersionMec, RSMI_FW_BLOCK_MEC}, {kDevFwVersionMec2, RSMI_FW_BLOCK_MEC2}, + {kDevFwVersionMes, RSMI_FW_BLOCK_MES}, + {kDevFwVersionMesKiq, RSMI_FW_BLOCK_MES_KIQ}, {kDevFwVersionPfp, RSMI_FW_BLOCK_PFP}, {kDevFwVersionRlc, RSMI_FW_BLOCK_RLC}, {kDevFwVersionRlcSrlc, RSMI_FW_BLOCK_RLC_SRLC}, @@ -482,6 +486,8 @@ static const std::map kDevFuncDependsMap = { kDevFwVersionMe, kDevFwVersionMec, kDevFwVersionMec2, + kDevFwVersionMes, + kDevFwVersionMesKiq, kDevFwVersionPfp, kDevFwVersionRlc, kDevFwVersionRlcSrlc, @@ -962,6 +968,8 @@ int Device::readDevInfo(DevInfoTypes type, uint64_t *val) { case kDevFwVersionMe: case kDevFwVersionMec: case kDevFwVersionMec2: + case kDevFwVersionMes: + case kDevFwVersionMesKiq: case kDevFwVersionPfp: case kDevFwVersionRlc: case kDevFwVersionRlcSrlc: diff --git a/rocm_smi/src/rocm_smi_gpu_metrics.cc b/rocm_smi/src/rocm_smi_gpu_metrics.cc index e89a0d58fb..13fc58fc1a 100755 --- a/rocm_smi/src/rocm_smi_gpu_metrics.cc +++ b/rocm_smi/src/rocm_smi_gpu_metrics.cc @@ -61,7 +61,6 @@ #include "rocm_smi/rocm_smi_exception.h" #include "rocm_smi/rocm_smi_logger.h" -using namespace ROCmLogging; using namespace amd::smi; #define TRY try { diff --git a/rocm_smi/src/rocm_smi_logger.cc b/rocm_smi/src/rocm_smi_logger.cc index 05bb09834a..593d4ff3e5 100644 --- a/rocm_smi/src/rocm_smi_logger.cc +++ b/rocm_smi/src/rocm_smi_logger.cc @@ -71,9 +71,8 @@ #include "rocm_smi/rocm_smi_logger.h" #include "rocm_smi/rocm_smi_main.h" -using namespace ROCmLogging; -Logger* Logger::m_Instance = nullptr; +ROCmLogging::Logger *ROCmLogging::Logger::m_Instance = nullptr; // Log file name // WARNING: File name should be changed here and @@ -81,39 +80,39 @@ Logger* Logger::m_Instance = nullptr; // in one place will cause a mismatch in these scripts, // files may not have proper permissions, and logrotate // would not function properly. -const std::string logPath = "/var/log/amd_smi_lib/"; -const std::string logBaseFName = "AMD-SMI-lib"; -const std::string logExtension = ".log"; -const std::string logFileName = logPath + logBaseFName + logExtension; +#define LOGPATH "/var/log/amd_smi_lib/" +#define LOGBASE_FNAME "AMD-SMI-lib" +#define LOGEXTENSION ".log" +const char *logFileName = LOGPATH LOGBASE_FNAME LOGEXTENSION; -Logger::Logger() { +ROCmLogging::Logger::Logger() { initialize_resources(); } -Logger::~Logger() { +ROCmLogging::Logger::~Logger() { if (m_loggingIsOn) { destroy_resources(); } } -Logger* Logger::getInstance() throw() { +ROCmLogging::Logger* ROCmLogging::Logger::getInstance() throw() { if (m_Instance == nullptr) { - m_Instance = new Logger(); + m_Instance = new ROCmLogging::Logger(); } return m_Instance; } -void Logger::lock() { +void ROCmLogging::Logger::lock() { m_Lock.lock(); } -void Logger::unlock() { +void ROCmLogging::Logger::unlock() { m_Lock.unlock(); } -void Logger::logIntoFile(std::string& data) { +void ROCmLogging::Logger::logIntoFile(std::string& data) { lock(); - if(!m_File.is_open()) { + if (!m_File.is_open()) { initialize_resources(); if (!m_File.is_open()) { std::cout << "WARNING: re-initializing resources was unsuccessful." @@ -127,24 +126,24 @@ void Logger::logIntoFile(std::string& data) { unlock(); } -void Logger::logOnConsole(std::string& data) { +void ROCmLogging::Logger::logOnConsole(std::string& data) { std::cout << getCurrentTime() << " " << data << std::endl; } // Returns: In string format, YY-MM-DD HH:MM:SS.microseconds -std::string Logger::getCurrentTime(void) { - using namespace std::chrono; +std::string ROCmLogging::Logger::getCurrentTime(void) { std::string currentTime; // get current time - auto now = system_clock::now(); + auto now = std::chrono::system_clock::now(); // get number of milliseconds for the current second // (remainder after division into seconds) - auto ms = duration_cast(now.time_since_epoch()) % 1000000; + auto ms = std::chrono::duration_cast( + now.time_since_epoch()) % 1000000; // convert to std::time_t in order to convert to std::tm (broken time) - auto timer = system_clock::to_time_t(now); + auto timer = std::chrono::system_clock::to_time_t(now); // convert to broken time std::tm bt = *std::localtime(&timer); @@ -159,7 +158,7 @@ std::string Logger::getCurrentTime(void) { } // Interface for Error Log -void Logger::error(const char* text) throw() { +void ROCmLogging::Logger::error(const char* text) throw() { // By default, logging is disabled // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -182,18 +181,18 @@ void Logger::error(const char* text) throw() { } } -void Logger::error(std::string& text) throw() { +void ROCmLogging::Logger::error(std::string& text) throw() { error(text.data()); } -void Logger::error(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::error(std::ostringstream& stream) throw() { std::string text = stream.str(); error(text.data()); stream.str(""); } // Interface for Alarm Log -void Logger::alarm(const char* text) throw() { +void ROCmLogging::Logger::alarm(const char* text) throw() { // By default, logging is disabled (ie. no RSMI_LOGGING) // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -216,18 +215,18 @@ void Logger::alarm(const char* text) throw() { } } -void Logger::alarm(std::string& text) throw() { +void ROCmLogging::Logger::alarm(std::string& text) throw() { alarm(text.data()); } -void Logger::alarm(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::alarm(std::ostringstream& stream) throw() { std::string text = stream.str(); alarm(text.data()); stream.str(""); } // Interface for Always Log -void Logger::always(const char* text) throw() { +void ROCmLogging::Logger::always(const char* text) throw() { // By default, logging is disabled (ie. no RSMI_LOGGING) // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -250,18 +249,18 @@ void Logger::always(const char* text) throw() { } } -void Logger::always(std::string& text) throw() { +void ROCmLogging::Logger::always(std::string& text) throw() { always(text.data()); } -void Logger::always(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::always(std::ostringstream& stream) throw() { std::string text = stream.str(); always(text.data()); stream.str(""); } // Interface for Buffer Log -void Logger::buffer(const char* text) throw() { +void ROCmLogging::Logger::buffer(const char* text) throw() { // Buffer is the special case. So don't add log level // and timestamp in the buffer message. Just log the raw bytes. if ((m_LogType == FILE_LOG) && (m_LogLevel >= LOG_LEVEL_BUFFER)) { @@ -284,18 +283,18 @@ void Logger::buffer(const char* text) throw() { } } -void Logger::buffer(std::string& text) throw() { +void ROCmLogging::Logger::buffer(std::string& text) throw() { buffer(text.data()); } -void Logger::buffer(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::buffer(std::ostringstream& stream) throw() { std::string text = stream.str(); buffer(text.data()); stream.str(""); } // Interface for Info Log -void Logger::info(const char* text) throw() { +void ROCmLogging::Logger::info(const char* text) throw() { // By default, logging is disabled (ie. no RSMI_LOGGING) // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -318,18 +317,18 @@ void Logger::info(const char* text) throw() { } } -void Logger::info(std::string& text) throw() { +void ROCmLogging::Logger::info(std::string& text) throw() { info(text.data()); } -void Logger::info(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::info(std::ostringstream& stream) throw() { std::string text = stream.str(); info(text.data()); stream.str(""); } // Interface for Trace Log -void Logger::trace(const char* text) throw() { +void ROCmLogging::Logger::trace(const char* text) throw() { // By default, logging is disabled (ie. no RSMI_LOGGING) // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -352,18 +351,18 @@ void Logger::trace(const char* text) throw() { } } -void Logger::trace(std::string& text) throw() { +void ROCmLogging::Logger::trace(std::string& text) throw() { trace(text.data()); } -void Logger::trace(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::trace(std::ostringstream& stream) throw() { std::string text = stream.str(); trace(text.data()); stream.str(""); } // Interface for Debug Log -void Logger::debug(const char* text) throw() { +void ROCmLogging::Logger::debug(const char* text) throw() { // By default, logging is disabled (ie. no RSMI_LOGGING) // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -386,51 +385,53 @@ void Logger::debug(const char* text) throw() { } } -void Logger::debug(std::string& text) throw() { +void ROCmLogging::Logger::debug(std::string& text) throw() { debug(text.data()); } -void Logger::debug(std::ostringstream& stream) throw() { +void ROCmLogging::Logger::debug(std::ostringstream& stream) throw() { std::string text = stream.str(); debug(text.data()); stream.str(""); } // Interfaces to control log levels -void Logger::updateLogLevel(LogLevel logLevel) { +void ROCmLogging::Logger::updateLogLevel(LogLevel logLevel) { m_LogLevel = logLevel; } -void Logger::enableAllLogLevels() { +void ROCmLogging::Logger::enableAllLogLevels() { m_LogLevel = ENABLE_LOG; } // Disable all log levels, except error and alarm -void Logger::disableLog() { +void ROCmLogging::Logger::disableLog() { m_LogLevel = DISABLE_LOG; } // Interfaces to control log Types -void Logger::updateLogType(LogType logType) { +void ROCmLogging::Logger::updateLogType(LogType logType) { m_LogType = logType; } -void Logger::enableConsoleLogging() { +void ROCmLogging::Logger::enableConsoleLogging() { m_LogType = CONSOLE; } -void Logger::enableFileLogging() { +void ROCmLogging::Logger::enableFileLogging() { m_LogType = FILE_LOG; } // Returns a string of details on current log settings -std::string Logger::getLogSettings() { +std::string ROCmLogging::Logger::getLogSettings() { std::string logSettings; if (m_File.is_open()) { - logSettings += "OpenStatus = File (" + logFileName + ") is open"; + logSettings += "OpenStatus = File (" + std::string(logFileName) + + ") is open"; } else { - logSettings += "OpenStatus = File (" + logFileName + ") is not open"; + logSettings += "OpenStatus = File (" + std::string(logFileName) + + ") is not open"; } logSettings += ", "; @@ -480,11 +481,11 @@ std::string Logger::getLogSettings() { // Returns current reported enabled logging state. State is controlled by // user's environment variable RSMI_LOGGING. -bool Logger::isLoggerEnabled() { +bool ROCmLogging::Logger::isLoggerEnabled() { return m_loggingIsOn; } -void Logger::initialize_resources() { +void ROCmLogging::Logger::initialize_resources() { // By default, logging is disabled (ie. no RSMI_LOGGING) // The check below allows us to toggle logging through RSMI_LOGGING // set or unset @@ -492,7 +493,7 @@ void Logger::initialize_resources() { if (!m_loggingIsOn) { return; } - m_File.open(logFileName.c_str(), std::ios::out | std::ios::app); + m_File.open(logFileName, std::ios::out | std::ios::app); m_LogLevel = LOG_LEVEL_TRACE; // RSMI_LOGGING = 1, output to logs only // RSMI_LOGGING = 2, output to console only @@ -521,9 +522,9 @@ void Logger::initialize_resources() { if (m_File.fail()) { std::cout << "WARNING: Failed opening log file." << std::endl; } - chmod(logFileName.c_str(), S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH); + chmod(logFileName, S_IRUSR|S_IRGRP|S_IROTH|S_IWUSR|S_IWGRP|S_IWOTH); } -void Logger::destroy_resources() { +void ROCmLogging::Logger::destroy_resources() { m_File.close(); } diff --git a/rocm_smi/src/rocm_smi_main.cc b/rocm_smi/src/rocm_smi_main.cc index ef4e022889..9089e9093d 100755 --- a/rocm_smi/src/rocm_smi_main.cc +++ b/rocm_smi/src/rocm_smi_main.cc @@ -68,7 +68,6 @@ #include "rocm_smi/rocm_smi_kfd.h" #include "rocm_smi/rocm_smi_logger.h" -using namespace ROCmLogging; static const char *kPathDRMRoot = "/sys/class/drm"; static const char *kPathHWMonRoot = "/sys/class/hwmon"; @@ -129,6 +128,8 @@ amd::smi::RocmSMI::devInfoTypesStrings = { {amd::smi::kDevFwVersionMe, amdSMI + "kDevFwVersionMe"}, {amd::smi::kDevFwVersionMec, amdSMI + "kDevFwVersionMec"}, {amd::smi::kDevFwVersionMec2, amdSMI + "kDevFwVersionMec2"}, + {amd::smi::kDevFwVersionMes, amdSMI + "kDevFwVersionMes"}, + {amd::smi::kDevFwVersionMesKiq, amdSMI + "kDevFwVersionMesKiq"}, {amd::smi::kDevFwVersionPfp, amdSMI + "kDevFwVersionPfp"}, {amd::smi::kDevFwVersionRlc, amdSMI + "kDevFwVersionRlc"}, {amd::smi::kDevFwVersionRlcSrlc, amdSMI + "kDevFwVersionRlcSrlc"}, @@ -313,12 +314,12 @@ RocmSMI::Initialize(uint64_t flags) { int i_ret; LOG_ALWAYS("=============== ROCM SMI initialize ================"); - Logger::getInstance()->enableAllLogLevels(); + ROCmLogging::Logger::getInstance()->enableAllLogLevels(); // Leaving below to allow developers to check current log settings // std::string logSettings = Logger::getInstance()->getLogSettings(); // std::cout << "Current log settings:\n" << logSettings << std::endl; - if (Logger::getInstance()->isLoggerEnabled()) { + if (ROCmLogging::Logger::getInstance()->isLoggerEnabled()) { logSystemDetails(); } diff --git a/rocm_smi/src/rocm_smi_monitor.cc b/rocm_smi/src/rocm_smi_monitor.cc index c4f94284a6..d7d9f4d6dc 100755 --- a/rocm_smi/src/rocm_smi_monitor.cc +++ b/rocm_smi/src/rocm_smi_monitor.cc @@ -3,7 +3,7 @@ * The University of Illinois/NCSA * Open Source License (NCSA) * - * Copyright (c) 2017, Advanced Micro Devices, Inc. + * Copyright (c) 2017-2023, Advanced Micro Devices, Inc. * All rights reserved. * * Developed by: @@ -58,8 +58,6 @@ #include "rocm_smi/rocm_smi_exception.h" #include "rocm_smi/rocm_smi_logger.h" -using namespace ROCmLogging; - namespace amd { namespace smi { @@ -80,6 +78,8 @@ static const char *kMonPowerCapName = "power#_cap"; static const char *kMonPowerCapMaxName = "power#_cap_max"; static const char *kMonPowerCapMinName = "power#_cap_min"; static const char *kMonPowerAveName = "power#_average"; +static const char *kMonPowerInputName = "power#_input"; +static const char *kMonPowerLabelName = "power#_label"; static const char *kMonTempMaxName = "temp#_max"; static const char *kMonTempMinName = "temp#_min"; static const char *kMonTempMaxHystName = "temp#_max_hyst"; @@ -135,6 +135,8 @@ static const std::map kMonitorNameMap = { {kMonPowerCapMax, kMonPowerCapMaxName}, {kMonPowerCapMin, kMonPowerCapMinName}, {kMonPowerAve, kMonPowerAveName}, + {kMonPowerInput, kMonPowerInputName}, + {kMonPowerLabel, kMonPowerLabelName}, {kMonTempMax, kMonTempMaxName}, {kMonTempMin, kMonTempMinName}, {kMonTempMaxHyst, kMonTempMaxHystName}, @@ -202,7 +204,8 @@ static const std::map kMonFuncDependsMap = { .variants = {kMonInvalid}, } }, - {"rsmi_dev_power_cap_default_get", { .mandatory_depends = {kMonPowerCapDefaultName}, + {"rsmi_dev_power_cap_default_get", { .mandatory_depends = + {kMonPowerCapDefaultName}, .variants = {kMonInvalid}, } }, @@ -616,7 +619,7 @@ void Monitor::fillSupportedFuncs(SupportedFuncMap *supported_funcs) { supported_monitors = intersect; } if (!supported_monitors.empty()) { - for (unsigned long & supported_monitor : supported_monitors) { + for (uint64_t &supported_monitor : supported_monitors) { if (m_type == eDefaultMonitor) { assert(supported_monitor > 0); supported_monitor |= diff --git a/rocm_smi/src/rocm_smi_utils.cc b/rocm_smi/src/rocm_smi_utils.cc index 796244f4d6..3bb69c4572 100755 --- a/rocm_smi/src/rocm_smi_utils.cc +++ b/rocm_smi/src/rocm_smi_utils.cc @@ -70,7 +70,6 @@ #include "rocm_smi/rocm_smi_device.h" #include "rocm_smi/rocm_smi_logger.h" -using namespace ROCmLogging; namespace amd { namespace smi { diff --git a/tests/amd_smi_test/functional/power_read.cc b/tests/amd_smi_test/functional/power_read.cc index d1f9a4a7e4..b4e26ce7b3 100755 --- a/tests/amd_smi_test/functional/power_read.cc +++ b/tests/amd_smi_test/functional/power_read.cc @@ -111,6 +111,7 @@ void TestPowerRead::Run(void) { std::cout << "\t**Power Cap Range: " << info.min_power_cap << " to " << info.max_power_cap << " uW" << std::endl; } + // TODO: Add current_socket_power tests } } } diff --git a/tests/amd_smi_test/test_utils.cc b/tests/amd_smi_test/test_utils.cc index d6127eb7a1..06fa98fe7e 100644 --- a/tests/amd_smi_test/test_utils.cc +++ b/tests/amd_smi_test/test_utils.cc @@ -56,6 +56,8 @@ static const std::map kDevFWNameMap = { {FW_ID_CP_ME, "me"}, {FW_ID_CP_MEC1, "mec1"}, {FW_ID_CP_MEC2, "mec2"}, + {FW_ID_CP_MES, "mes"}, + {FW_ID_MES_KIQ, "mes_kiq"}, // TODO: double check {FW_ID_CP_PFP, "pfp"}, {FW_ID_RLC, "rlc"}, {FW_ID_RLC_SRLG, "rlc_srlg"},