amdsmi_cli: Add support for CPU specific API in amdsmi_cli tool

- Add support for only CPU if only the hsmp driver is driver is present.
  - Add support for both the amdgpu and amdcpu's if both the amdgpu driver and cpu's are present.
  - Add support for socket power metrics
  - Add support for hsmp proto type version, prochot status, read current fclkmclk freq
    and current cclk freq limit, c0 residency, lclk dpm level range, socket frequency range
  - Add CPU socket current frequency limit.
  - Update tool for API's IO bandwidth, XGMI bandwidth,
    power telemetry rails, APB enable and APB disable API's
  - Add support set_pow_limit, set_xgmi_link_width, set_lclk_dpm_level, core_boost_limit,
    curr_active_freq_core_limit, set_soc_boost_limit and set_core_boost_limit.
  - Add support for the following cpu related API's in tool
    core_energy, socket energy, set power efficiency mode, ddr bandwidth,
    cpu temperature, dimm temperature range rate, dimm power consumption
    and dimm thermal temperature.
  - Add support for set_gmi3_link_width, set_pcie_lnk_rate, set_df_pstate_range

Change-Id: I5a35d1cceeb7df0bc8b7116df7c27bb7f376e839
Этот коммит содержится в:
Naveen Krishna Chatradhi
2023-12-07 07:33:17 -08:00
коммит произвёл khashaik
родитель 94d3c563a3
Коммит 19030e5b72
5 изменённых файлов: 1345 добавлений и 54 удалений
+813 -24
Просмотреть файл
@@ -42,20 +42,45 @@ class AMDSMICommands():
def __init__(self, format='human_readable', destination='stdout') -> None:
self.helpers = AMDSMIHelpers()
self.logger = AMDSMILogger(format=format, destination=destination)
self.device_handles = []
self.cpu_handles = []
self.core_handles = []
try:
self.device_handles = amdsmi_interface.amdsmi_get_processor_handles()
except amdsmi_exception.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error('Unable to get devices, driver not initialized (amdgpu not found in modules)')
sys.exit(-1)
else:
raise e
if len(self.device_handles) == 0:
logging.error('Unable to detect any devices, check if driver is initialized (amdgpu not found in modules)')
sys.exit(-1)
logging.info('Unable to detect any devices, check if driver is initialized (amdgpu not found in modules)')
# Fetch CPU handles
try:
self.cpu_handles = amdsmi_interface.amdsmi_get_cpusocket_handles()
except amdsmi_exception.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_NO_DRV):
logging.info('Unable to get CPU devices, hsmp driver not loaded')
else:
raise e
# core handles
try:
self.core_handles = amdsmi_interface.amdsmi_get_cpucore_handles()
except amdsmi_exception.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_NO_DRV):
logging.info('Unable to get CORE devices, hsmp driver not loaded')
else:
raise e
if (len(self.device_handles) == 0 and len(self.cpu_handles) == 0 and len(self.core_handles) == 0):
logging.error('Unable to detect any devices, check if amdgpu and hsmp drivers are initialized')
sys.exit(-1)
self.stop = ''
@@ -135,14 +160,77 @@ class AMDSMICommands():
self.logger.print_output()
def static(self, args, multiple_devices=False, gpu=None, asic=None,
bus=None, vbios=None, limit=None, driver=None, ras=None,
board=None, numa=None, vram=None, cache=None, partition=None,
dfc_ucode=None, fb_info=None, num_vf=None):
def get_static_cpu(self, args, multiple_devices=False, cpu=None):
"""Get Static information for target cpu
Args:
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
cpu (device_handle, optional): device_handle for target device. Defaults to None.
Returns:
None: Print output via AMDSMILogger to destination
"""
if (cpu):
args.cpu = cpu
#store cpu args that are applicable to the current platform
curr_platform_cpu_args = ["smu", "interface_ver"]
curr_platform_cpu_values = [args.smu, args.interface_ver]
if (not any(curr_platform_cpu_values)):
for arg in curr_platform_cpu_args:
setattr(args, arg, True)
if (len(self.cpu_handles)):
handled_multiple_cpus, device_handle = self.helpers.handle_cpus(args,
self.logger,
self.get_static_cpu)
if handled_multiple_cpus:
return # This function is recursive
args.cpu = device_handle
# get cpu id for logging
cpu_id = self.helpers.get_cpu_id_from_device_handle(args.cpu)
logging.debug(f"Static Arg information for CPU {cpu_id} on {self.helpers.os_info()}")
static_dict = {}
if (args.smu):
try:
smu = amdsmi_interface.amdsmi_get_cpu_smu_fw_version(args.cpu)
static_dict["smu"] = {"FW_VERSION" : f"{ smu['smu_fw_major_ver_num']}"
f":{smu['smu_fw_minor_ver_num']}:{smu['smu_fw_debug_ver_num']}"}
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["smu"] = "N/A"
logging.debug("Failed to get SMU FW for cpu %s | %s", cpu_id, e.get_error_info())
if (args.interface_ver):
static_dict["interface_version"] = {}
try:
intf_ver = amdsmi_interface.amdsmi_get_cpu_hsmp_proto_ver(args.cpu)
static_dict["interface_version"]["proto version"] = intf_ver
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["interface_version"]["proto version"] = "N/A"
logging.debug("Failed to get proto version for cpu %s | %s", cpu_id, e.get_error_info())
multiple_devices_csv_override = False
self.logger.store_cpu_output(args.cpu, 'values', static_dict)
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override)
def get_static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None,
limit=None, driver=None, ras=None, board=None, numa=None, vram=None,
cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None):
"""Get Static information for target gpu
Args:
args (Namespace): Namespace containing the parsed CLI args
current_platform_args (list): gpu supported platform arguments
current_platform_values (list): gpu supported platform values for each argument
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
gpu (device_handle, optional): device_handle for target device. Defaults to None.
asic (bool, optional): Value override for args.asic. Defaults to None.
@@ -160,13 +248,10 @@ class AMDSMICommands():
fb_info (bool, optional): Value override for args.fb_info. Defaults to None.
num_vf (bool, optional): Value override for args.num_vf. Defaults to None.
Raises:
IndexError: Index error if gpu list is empty
Returns:
None: Print output via AMDSMILogger to destination
"""
# Set args.* to passed in arguments
if gpu:
args.gpu = gpu
if asic:
@@ -214,26 +299,20 @@ class AMDSMICommands():
current_platform_args += ["dfc_ucode", "fb_info", "num_vf"]
current_platform_values += [args.dfc_ucode, args.fb_info, args.num_vf]
# Handle No GPU passed
if args.gpu == None:
args.gpu = self.device_handles
if (not any(current_platform_values)):
for arg in current_platform_args:
setattr(args, arg, True)
# Handle multiple GPUs
handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.static)
handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.get_static_gpu)
if handled_multiple_gpus:
return # This function is recursive
args.gpu = device_handle
# Get gpu_id for logging
gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu)
logging.debug(f"Static Arg information for GPU {gpu_id} on {self.helpers.os_info()}")
logging.debug(f"Applicable Args: {current_platform_args}")
logging.debug(f"Arg Values: {current_platform_values}")
# Set the platform applicable args to True if no args are set
if not any(current_platform_values):
for arg in current_platform_args:
setattr(args, arg, True)
static_dict = {}
@@ -588,10 +667,84 @@ class AMDSMICommands():
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override)
def static(self, args, multiple_devices=False, gpu=None, asic=None,
bus=None, vbios=None, limit=None, driver=None, ras=None,
board=None, numa=None, vram=None, cache=None, partition=None,
dfc_ucode=None, fb_info=None, num_vf=None, cpu=None,
interface_ver=None):
"""Get Static information for target gpu and cpu
Args:
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
gpu (device_handle, optional): device_handle for target device. Defaults to None.
asic (bool, optional): Value override for args.asic. Defaults to None.
bus (bool, optional): Value override for args.bus. Defaults to None.
vbios (bool, optional): Value override for args.vbios. Defaults to None.
limit (bool, optional): Value override for args.limit. Defaults to None.
driver (bool, optional): Value override for args.driver. Defaults to None.
ras (bool, optional): Value override for args.ras. Defaults to None.
board (bool, optional): Value override for args.board. Defaults to None.
numa (bool, optional): Value override for args.numa. Defaults to None.
vram (bool, optional): Value override for args.vram. Defaults to None.
cache (bool, optional): Value override for args.cache. Defaults to None.
partition (bool, optional): Value override for args.partition. Defaults to None.
dfc_ucode (bool, optional): Value override for args.dfc_ucode. Defaults to None.
fb_info (bool, optional): Value override for args.fb_info. Defaults to None.
num_vf (bool, optional): Value override for args.num_vf. Defaults to None.
cpu (cpu_handle, optional): cpu_handle for target device. Defaults to None.
interface_ver (bool, optional): Value override for args.interface_ver. Defaults to None
Raises:
IndexError: Index error if gpu list is empty
Returns:
None: Print output via AMDSMILogger to destination
"""
# Set args.* to passed in arguments
if gpu:
args.gpu = gpu
if cpu:
args.cpu = cpu
if interface_ver:
args.interface_ver = interface_ver
gpus = args.gpu
cpus = args.cpu
gpu_options = any([args.gpu, args.asic, args.bus, args.vbios, args.driver, args.vram, args.cache, args.board])
cpu_options = any([args.smu, args.interface_ver])
# Handle No GPU passed
if args.gpu == None:
args.gpu = self.device_handles
# Handle No CPU passed
if args.cpu == None:
args.cpu = self.cpu_handles
if (len(self.cpu_handles) and (((not gpus) and (not cpus)) or cpus)):
self.get_static_cpu(args, cpu)
else:
logging.info("No CPU devices present")
if (len(self.device_handles) and (((not gpus) and (not cpus)) or gpus)):
self.logger.clear_multiple_devices_ouput()
self.get_static_gpu(args, multiple_devices, gpu, asic,
bus, vbios, limit, driver, ras,
board, numa, vram, cache, partition,
dfc_ucode, fb_info, num_vf)
else:
logging.info("No GPU devices present")
if (len(self.cpu_handles) == 0 and len(self.device_handles) == 0):
logging.error("No CPU and GPU devices present")
sys.exit(-1)
def firmware(self, args, multiple_devices=False, gpu=None, fw_list=True):
""" Get Firmware information for target gpu
@@ -787,7 +940,7 @@ class AMDSMICommands():
self.logger.print_output()
def metric(self, args, multiple_devices=False, watching_output=False, gpu=None,
def metric_gpu(self, args, multiple_devices=False, watching_output=False, gpu=None,
usage=None, watch=None, watch_time=None, iterations=None, power=None,
clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None,
fan=None, voltage_curve=None, overdrive=None, perf_level=None,
@@ -916,7 +1069,7 @@ class AMDSMICommands():
# Store output from multiple devices
for device_handle in args.gpu:
self.metric(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle)
self.metric_gpu(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle)
# Reload original gpus
args.gpu = stored_gpus
@@ -1480,6 +1633,642 @@ class AMDSMICommands():
self.logger.store_watch_output(multiple_device_enabled=False)
def metric_cpu(self, args, multiple_devices=False, cpu=None, power_metrics=None, prochot=None,
freq_metrics=None, c0_res=None, lclk_dpm_level=None,pwr_svi_telemtry_rails=None,
io_bandwidth=None, xgmi_bandwidth=None, enable_apb=None, disable_apb=None,
set_pow_limit=None, set_xgmi_link_width=None, set_lclk_dpm_level=None,
set_soc_boost_limit=None, metrics_ver=None, metrics_table=None, socket_energy=None,
set_pwr_eff_mode=None, ddr_bandwidth=None, cpu_temp=None, dimm_temp_range_rate=None,
dimm_pow_conumption=None, dimm_thermal_sensor=None, set_gmi3_link_width=None,
set_pcie_lnk_rate=None, set_df_pstate_range=None):
"""Get Metric information for target cpu
Args:
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
cpu (cpu_handle, optional): device_handle for target device. Defaults to None.
Returns:
None: Print output via AMDSMILogger to destination
"""
if (cpu):
args.cpu = cpu
if (power_metrics):
args.cpu_power_metrics = power_metrics
if (prochot):
args.cpu_prochot = prochot
if (freq_metrics):
args.cpu_freq_metrics = freq_metrics
if (c0_res):
args.cpu_c0_res = c0_res
if (lclk_dpm_level):
args.cpu_lclk_dpm_level = lclk_dpm_level
if (pwr_svi_telemtry_rails):
args.cpu_pwr_svi_telemtry_rails = pwr_svi_telemtry_rails
if (io_bandwidth):
args.cpu_io_bandwidth = io_bandwidth
if (xgmi_bandwidth):
args.cpu_xgmi_bandwidth = xgmi_bandwidth
if (enable_apb):
args.cpu_enable_apb = enable_apb
if (disable_apb):
args.cpu_disable_apb = disable_apb
if (set_pow_limit):
args.set_cpu_pow_limit = set_pow_limit
if (set_xgmi_link_width):
args.set_xgmi_link_width = set_xgmi_link_width
if (set_lclk_dpm_level):
args.set_lclk_dpm_level = set_lclk_dpm_level
if (set_soc_boost_limit):
args.set_soc_boost_limit = set_soc_boost_limit
if (metrics_ver):
args.cpu_metrics_ver = metrics_ver
if (metrics_table):
args.cpu_metrics_table = metrics_table
if (socket_energy):
args.socket_energy = socket_energy
if (set_pwr_eff_mode):
args.set_cpu_pwr_eff_mode = set_pwr_eff_mode
if (ddr_bandwidth):
args.set_cpu_pwr_eff_mode = ddr_bandwidth
if (cpu_temp):
args.cpu_temp = cpu_temp
if (dimm_temp_range_rate):
args.cpu_dimm_temp_range_rate = dimm_temp_range_rate
if (dimm_pow_conumption):
args.cpu_dimm_pow_conumption = dimm_pow_conumption
if (dimm_thermal_sensor):
args.cpu_dimm_thermal_sensor = dimm_thermal_sensor
if (set_gmi3_link_width):
args.set_cpu_gmi3_link_width = set_gmi3_link_width
if (set_pcie_lnk_rate):
args.set_cpu_pcie_lnk_rate = set_pcie_lnk_rate
if (set_df_pstate_range):
args.set_cpu_df_pstate_range = set_df_pstate_range
#store cpu args that are applicable to the current platform
curr_platform_cpu_args = ["cpu_power_metrics", "cpu_prochot", "cpu_freq_metrics",
"cpu_c0_res", "cpu_lclk_dpm_level", "cpu_pwr_svi_telemtry_rails",
"cpu_io_bandwidth", "cpu_xgmi_bandwidth", "cpu_disable_apb",
"set_cpu_pow_limit","set_cpu_xgmi_link_width", "set_cpu_lclk_dpm_level",
"set_soc_boost_limit", "cpu_metrics_ver", "cpu_metrics_table",
"socket_energy", "set_cpu_pwr_eff_mode", "cpu_ddr_bandwidth",
"cpu_temp", "cpu_dimm_temp_range_rate", "cpu_dimm_pow_conumption",
"cpu_dimm_thermal_sensor", "set_cpu_gmi3_link_width", "set_cpu_pcie_lnk_rate",
"set_cpu_df_pstate_range"]
curr_platform_cpu_values = [args.cpu_power_metrics, args.cpu_prochot, args.cpu_freq_metrics,
args.cpu_c0_res, args.cpu_lclk_dpm_level, args.cpu_pwr_svi_telemtry_rails,
args.cpu_io_bandwidth, args.cpu_xgmi_bandwidth, args.cpu_disable_apb,
args.set_cpu_pow_limit, args.set_cpu_xgmi_link_width, args.set_cpu_lclk_dpm_level,
args.set_soc_boost_limit, args.cpu_metrics_ver, args.cpu_metrics_table,
args.socket_energy, args.set_cpu_pwr_eff_mode, args.cpu_ddr_bandwidth,
args.cpu_temp, args.cpu_dimm_temp_range_rate, args.cpu_dimm_pow_conumption,
args.cpu_dimm_thermal_sensor, args.set_cpu_gmi3_link_width, args.set_cpu_pcie_lnk_rate,
args.set_cpu_df_pstate_range]
# Handle No CPU passed
if args.cpu == None:
args.cpu = self.cpu_handles
if (not any(curr_platform_cpu_values)):
for arg in curr_platform_cpu_args:
if arg not in("cpu_lclk_dpm_level", "cpu_io_bandwidth", "cpu_xgmi_bandwidth", "cpu_disable_apb",
"set_cpu_pow_limit", "set_cpu_xgmi_link_width", "set_cpu_lclk_dpm_level",
"set_soc_boost_limit", "set_cpu_pwr_eff_mode", "cpu_dimm_temp_range_rate",
"cpu_dimm_temp_range_rate", "cpu_dimm_pow_conumption", "cpu_dimm_thermal_sensor",
"set_cpu_gmi3_link_width", "set_cpu_pcie_lnk_rate", "set_cpu_df_pstate_range"):
setattr(args, arg, True)
if (len(self.cpu_handles)):
handled_multiple_cpus, device_handle = self.helpers.handle_cpus(args,
self.logger,
self.metric_cpu)
if handled_multiple_cpus:
return # This function is recursive
args.cpu = device_handle
# get cpu id for logging
cpu_id = self.helpers.get_cpu_id_from_device_handle(args.cpu)
logging.debug(f"Metric Arg information for CPU {cpu_id} on {self.helpers.os_info()}")
static_dict = {}
if (args.cpu_power_metrics):
static_dict["power_metrics"] = {}
try:
soc_pow = amdsmi_interface.amdsmi_get_cpu_socket_power(args.cpu)
static_dict["power_metrics"]["socket power"] = soc_pow
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["power_metrics"]["socket power"] = "N/A"
logging.debug("Failed to get socket power for cpu %s | %s", cpu_id, e.get_error_info())
try:
soc_pow_limit = amdsmi_interface.amdsmi_get_cpu_socket_power_cap(args.cpu)
static_dict["power_metrics"]["socket power limit"] = soc_pow_limit
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["power_metrics"]["socket power limit"] = "N/A"
logging.debug("Failed to get socket power limit for cpu %s | %s", cpu_id, e.get_error_info())
try:
soc_max_pow_limit = amdsmi_interface.amdsmi_get_cpu_socket_power_cap_max(args.cpu)
static_dict["power_metrics"]["socket max power limit"] = soc_max_pow_limit
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["power_metrics"]["socket max power limit"] = "N/A"
logging.debug("Failed to get max socket power limit for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_prochot):
static_dict["prochot"] = {}
try:
proc_status = amdsmi_interface.amdsmi_get_cpu_prochot_status(args.cpu)
static_dict["prochot"]["prochot_status"] = proc_status
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["prochot"]["prochot_status"] = "N/A"
logging.debug("Failed to get prochot status for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_freq_metrics):
static_dict["freq_metrics"] = {}
try:
fclk_mclk = amdsmi_interface.amdsmi_get_cpu_fclk_mclk(args.cpu)
static_dict["freq_metrics"]["fclkmemclk"] = fclk_mclk
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["freq_metrics"]["fclkmemclk"] = "N/A"
logging.debug("Failed to get current fclkmemclk freq for cpu %s | %s", cpu_id, e.get_error_info())
try:
cclk_freq = amdsmi_interface.amdsmi_get_cpu_cclk_limit(args.cpu)
static_dict["freq_metrics"]["cclkfreqlimit"] = cclk_freq
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["freq_metrics"]["cclkfreqlimit"] = "N/A"
logging.debug("Failed to get current cclk freq for cpu %s | %s", cpu_id, e.get_error_info())
try:
soc_cur_freq_limit = amdsmi_interface.amdsmi_get_cpu_socket_current_active_freq_limit(args.cpu)
static_dict["freq_metrics"]["soc_current_active_freq_limit"] = soc_cur_freq_limit
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["freq_metrics"]["soc_current_active_freq_limit"] = "N/A"
logging.debug("Failed to get socket current freq limit for cpu %s | %s", cpu_id, e.get_error_info())
try:
soc_freq_range = amdsmi_interface.amdsmi_get_cpu_socket_freq_range(args.cpu)
static_dict["freq_metrics"]["soc_freq_range"] = soc_freq_range
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["freq_metrics"]["soc_freq_range"] = "N/A"
logging.debug("Failed to get socket freq range for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_c0_res):
static_dict["c0_residency"] = {}
try:
residency = amdsmi_interface.amdsmi_get_cpu_socket_c0_residency(args.cpu)
static_dict["c0_residency"]["residency"] = residency
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["c0_residency"]["residency"] = "N/A"
logging.debug("Failed to get C0 residency for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_lclk_dpm_level):
static_dict["socket_dpm"] = {}
try:
dpm_val = amdsmi_interface.amdsmi_get_cpu_socket_lclk_dpm_level(args.cpu,
args.cpu_lclk_dpm_level[0][0])
static_dict["socket_dpm"]["dpml_level_range"] = dpm_val
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["socket_dpm"]["dpml_level_range"] = dpm_val
logging.debug("Failed to get socket dpm level range for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_pwr_svi_telemtry_rails):
static_dict["svi_telemetry_all_rails"] = {}
try:
power = amdsmi_interface.amdsmi_get_cpu_pwr_svi_telemetry_all_rails(args.cpu)
static_dict["svi_telemetry_all_rails"]["power"] = power
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["c0_residency"]["residency"] = "N/A"
logging.debug("Failed to get svi telemetry all rails for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_io_bandwidth):
static_dict["io_bandwidth"] = {}
try:
bandwidth = amdsmi_interface.amdsmi_get_cpu_current_io_bandwidth(args.cpu,
int(args.cpu_io_bandwidth[0][0]),
args.cpu_io_bandwidth[0][1])
static_dict["io_bandwidth"]["band_width"] = bandwidth
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["io_bandwidth"]["band_width"] = "N/A"
logging.debug("Failed to get io bandwidth for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_xgmi_bandwidth):
static_dict["xgmi_bandwidth"] = {}
try:
bandwidth = amdsmi_interface.amdsmi_get_cpu_current_xgmi_bw(args.cpu,
int(args.cpu_xgmi_bandwidth[0][0]),
args.cpu_xgmi_bandwidth[0][1])
static_dict["xgmi_bandwidth"]["band_width"] = bandwidth
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["xgmi_bandwidth"]["band_width"] = "N/A"
logging.debug("Failed to get xgmi bandwidth for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_enable_apb):
static_dict["apbenable"] = {}
try:
amdsmi_interface.amdsmi_cpu_apb_enable(args.cpu)
static_dict["apbenable"]["state"] = "Enabled DF - Pstate performance boost algorithm"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["apbenable"]["state"] = "N/A"
logging.debug("Failed to enable APB for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_disable_apb):
static_dict["apbdisable"] = {}
try:
amdsmi_interface.amdsmi_cpu_apb_disable(args.cpu, args.cpu_disable_apb[0][0])
static_dict["apbdisable"]["state"] = "Disabled DF - Pstate performance boost algorithm"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["apbdisable"]["state"] = "N/A"
logging.debug("Failed to enable APB for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_cpu_pow_limit):
static_dict["set_pow_limit"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_socket_power_cap(args.cpu, args.set_cpu_pow_limit[0][0])
static_dict["set_pow_limit"]["Response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_pow_limit"]["Response"] = "Set Operation successful"
logging.debug("Failed to set power limit for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_cpu_xgmi_link_width):
static_dict["set_xgmi_link_width"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_xgmi_width(args.cpu, args.set_cpu_xgmi_link_width[0][0],
args.set_cpu_xgmi_link_width[0][1])
static_dict["set_xgmi_link_width"]["Response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_xgmi_link_width"]["Response"] = "N/A"
logging.debug("Failed to set xgmi link width for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_cpu_lclk_dpm_level):
static_dict["set_lclk_dpm_level"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_socket_lclk_dpm_level(args.cpu, args.set_cpu_lclk_dpm_level[0][0],
args.set_cpu_lclk_dpm_level[0][1],
args.set_cpu_lclk_dpm_level[0][2])
static_dict["set_lclk_dpm_level"]["Response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_lclk_dpm_level"]["Response"] = "N/A"
logging.debug("Failed to set lclk dpm level for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_soc_boost_limit):
static_dict["set_soc_boost_limit"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_socket_boostlimit(args.cpu, args.set_soc_boost_limit[0][0])
static_dict["set_soc_boost_limit"]["Response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_soc_boost_limit"]["Response"] = "N/A"
logging.debug("Failed to set socket boost limit for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_metrics_ver):
static_dict["metric_version"] = {}
try:
version = amdsmi_interface.amdsmi_get_metrics_table_version(args.cpu)
static_dict["metric_version"]["version"] = version
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["metric_version"]["version"] = "N/A"
logging.debug("Failed to get metrics table version for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_metrics_table):
static_dict["metrics_table"] = {}
static_dict["metrics_table"]["response"] = "N/A"
# Note:- amdsmi_get_metrics_table has been disabled as there is fix needed in the library API and will be
# in next version
"""try:
metrics_table = amdsmi_interface.amdsmi_get_metrics_table(args.cpu)
static_dict["metrics_table"]["response"] = metrics_table
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["metrics_table"]["response"] = "N/A"
logging.debug("Failed to get metrics table for cpu %s | %s", cpu_id, e.get_error_info())"""
if (args.socket_energy):
static_dict["socket_energy"] = {}
try:
energy = amdsmi_interface.amdsmi_get_cpu_socket_energy(args.cpu)
static_dict["socket_energy"]["response"] = energy
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["socket_energy"]["response"] = "N/A"
logging.debug("Failed to get socket energy for cpu %s | %s", cpu_id, e.get_error_info())
if(args.set_cpu_pwr_eff_mode):
static_dict["set_pwr_eff_mode"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_pwr_efficiency_mode(args.cpu, args.set_cpu_pwr_eff_mode[0][0])
static_dict["set_pwr_eff_mode"]["Response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_pwr_eff_mode"]["Response"] = "N/A"
logging.debug("Failed to set power efficiency mode for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_ddr_bandwidth):
static_dict["ddr_bandwidth"] = {}
try:
resp = amdsmi_interface.amdsmi_get_cpu_ddr_bw(args.cpu)
static_dict["ddr_bandwidth"]["response"] = resp
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["ddr_bandwidth"]["response"] = "N/A"
logging.debug("Failed to get ddr bandwdith for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_temp):
static_dict["cpu_temp"] = {}
try:
resp = amdsmi_interface.amdsmi_get_cpu_socket_temperature(args.cpu)
static_dict["cpu_temp"]["response"] = resp
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["cpu_temp"]["response"] = "N/A"
logging.debug("Failed to get cpu temperature for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_dimm_temp_range_rate):
static_dict["dimm_temp_range_rate"] = {}
try:
resp = amdsmi_interface.amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(args.cpu, args.cpu_dimm_temp_range_rate[0][0])
static_dict["dimm_temp_range_rate"]["response"] = resp
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["dimm_temp_range_rate"]["response"] = "N/A"
logging.debug("Failed to get dimm temperature range and refresh rate for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_dimm_pow_conumption):
static_dict["dimm_pow_conumption"] = {}
try:
resp = amdsmi_interface.amdsmi_get_cpu_dimm_power_consumption(args.cpu, args.cpu_dimm_pow_conumption[0][0])
static_dict["dimm_pow_conumption"]["response"] = resp
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["dimm_pow_conumption"]["response"] = "N/A"
logging.debug("Failed to get dimm temperature range and refresh rate for cpu %s | %s", cpu_id, e.get_error_info())
if (args.cpu_dimm_thermal_sensor):
static_dict["dimm_thermal_sensor"] = {}
try:
resp = amdsmi_interface.amdsmi_get_cpu_dimm_thermal_sensor(args.cpu, args.cpu_dimm_thermal_sensor[0][0])
static_dict["dimm_thermal_sensor"]["response"] = resp
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["dimm_thermal_sensor"]["response"] = "N/A"
logging.debug("Failed to get dimm temperature range and refresh rate for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_cpu_gmi3_link_width):
static_dict["set_gmi3_link_width"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_gmi3_link_width_range(args.cpu, args.set_cpu_gmi3_link_width[0][0],
args.set_cpu_gmi3_link_width[0][1])
static_dict["set_gmi3_link_width"]["response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_gmi3_link_width"]["response"] = "N/A"
logging.debug("Failed to set gmi3 link width for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_cpu_pcie_lnk_rate):
static_dict["set_pcie_lnk_rate"] = {}
try:
resp = amdsmi_interface.amdsmi_set_cpu_pcie_link_rate(args.cpu, args.set_cpu_pcie_lnk_rate[0][0])
static_dict["set_pcie_lnk_rate"]["prev_mode"] = resp
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_pcie_lnk_rate"]["prev_mode"] = "N/A"
logging.debug("Failed to set pcie link rate for cpu %s | %s", cpu_id, e.get_error_info())
if (args.set_cpu_df_pstate_range):
static_dict["set_df_pstate_range"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_df_pstate_range(args.cpu, args.set_cpu_df_pstate_range[0][0],
args.set_cpu_df_pstate_range[0][1])
static_dict["set_df_pstate_range"]["response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_df_pstate_range"]["response"] = "N/A"
logging.debug("Failed to set df pstate range for cpu %s | %s", cpu_id, e.get_error_info())
multiple_devices_csv_override = False
self.logger.store_cpu_output(args.cpu, 'values', static_dict)
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override)
def metric_core(self, args, multiple_devices=False, core=None, boost_limit=None,
curr_active_freq_core_limit=None, set_core_boost_limit=None, core_energy=None):
"""Get Static information for target core
Args:
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
core (device_handle, optional): device_handle for target device. Defaults to None.
Returns:
None: Print output via AMDSMILogger to destination
"""
if core:
args.core = core
if boost_limit:
args.core_boost_limit = boost_limit
if curr_active_freq_core_limit:
args.core_curr_active_freq_core_limit = curr_active_freq_core_limit
if set_core_boost_limit:
args.set_core_boost_limit = boost_limit
if core_energy:
args.core_energy = core_energy
#store core args that are applicable to the current platform
curr_platform_core_args = ["core_boost_limit", "core_curr_active_freq_core_limit",
"set_core_boost_limit","core_energy"]
curr_platform_core_values = [args.core_boost_limit, args.core_curr_active_freq_core_limit,
args.set_core_boost_limit, args.core_energy]
# Handle No core passed
if args.core == None:
args.core = self.core_handles
if (not any(curr_platform_core_values)):
for arg in curr_platform_core_args:
if arg not in (["set_core_boost_limit"]):
setattr(args, arg, True)
if (len(self.core_handles)):
handled_multiple_cores, device_handle = self.helpers.handle_cores(args,
self.logger,
self.metric_core)
if handled_multiple_cores:
return # This function is recursive
args.core = device_handle
# get core id for logging
core_id = self.helpers.get_core_id_from_device_handle(args.core)
logging.debug(f"Static Arg information for Core {core_id} on {self.helpers.os_info()}")
static_dict = {}
if (args.core_boost_limit):
static_dict["boost_limit"] ={}
try:
boost_limit = amdsmi_interface.amdsmi_get_cpu_core_boostlimit(args.core)
static_dict["boost_limit"]["value"] = boost_limit
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["boost_limit"]["value"] = "N/A"
logging.debug("Failed to get core boost limit for core %s | %s", core_id, e.get_error_info())
if (args.core_curr_active_freq_core_limit):
static_dict["curr_active_freq_core_limit"] = {}
try:
freq = amdsmi_interface.amdsmi_get_cpu_core_current_freq_limit(args.core)
static_dict["curr_active_freq_core_limit"]["value"] = freq
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["curr_active_freq_core_limit"]["value"] = "N/A"
logging.debug("Failed to get current active frequency core for core %s | %s", core_id, e.get_error_info())
if (args.set_core_boost_limit):
static_dict["set_core_boost_limit"] = {}
try:
amdsmi_interface.amdsmi_set_cpu_core_boostlimit(args.core, args.set_core_boost_limit[0][0])
static_dict["set_core_boost_limit"]["Response"] = "Set Operation successful"
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["set_core_boost_limit"]["Response"] = "N/A"
logging.debug("Failed to set core boost limit for cpu %s | %s", core_id, e.get_error_info())
if (args.core_energy):
static_dict["core_energy"] ={}
try:
energy = amdsmi_interface.amdsmi_get_cpu_core_energy(args.core)
static_dict["core_energy"]["value"] = energy
except amdsmi_exception.AmdSmiLibraryException as e:
static_dict["core_energy"]["value"] = "N/A"
logging.debug("Failed to get core energy for core %s | %s", core_id, e.get_error_info())
multiple_devices_csv_override = False
self.logger.store_core_output(args.core, 'values', static_dict)
if multiple_devices:
self.logger.store_multiple_device_output()
return # Skip printing when there are multiple devices
self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override)
def metric(self, args, multiple_devices=False, watching_output=False, gpu=None,
usage=None, watch=None, watch_time=None, iterations=None, power=None,
clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None,
fan=None, voltage_curve=None, overdrive=None, perf_level=None,
xgmi_err=None, energy=None, mem_usage=None, schedule=None,
guard=None, guest_data=None, fb_usage=None, xgmi=None,cpu=None,
cpu_power_metrics=None, prochot=None, freq_metrics=None, c0_res=None,
lclk_dpm_level=None,pwr_svi_telemtry_rails=None, io_bandwidth=None,
xgmi_bandwidth=None, enable_apb=None, disable_apb=None,set_pow_limit=None,
set_xgmi_link_width=None, set_lclk_dpm_level=None, set_soc_boost_limit=None,
metrics_ver=None, metrics_table=None, socket_energy=None,set_pwr_eff_mode=None,
ddr_bandwidth=None, cpu_temp=None, dimm_temp_range_rate=None,dimm_pow_conumption=None,
dimm_thermal_sensor=None, set_gmi3_link_width=None, set_pcie_lnk_rate=None,
set_df_pstate_range=None, core=None, boost_limit=None,
curr_active_freq_core_limit=None, set_core_boost_limit=None, core_energy=None):
"""Get Metric information for target gpu
Args:
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
watching_output (bool, optional): True if watch option has been set. Defaults to False.
gpu (device_handle, optional): device_handle for target device. Defaults to None.
usage (bool, optional): Value override for args.usage. Defaults to None.
watch (Positive int, optional): Value override for args.watch. Defaults to None.
watch_time (Positive int, optional): Value override for args.watch_time. Defaults to None.
iterations (Positive int, optional): Value override for args.iterations. Defaults to None.
power (bool, optional): Value override for args.power. Defaults to None.
clock (bool, optional): Value override for args.clock. Defaults to None.
temperature (bool, optional): Value override for args.temperature. Defaults to None.
ecc (bool, optional): Value override for args.ecc. Defaults to None.
ecc_block (bool, optional): Value override for args.ecc. Defaults to None.
pcie (bool, optional): Value override for args.pcie. Defaults to None.
fan (bool, optional): Value override for args.fan. Defaults to None.
voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None.
overdrive (bool, optional): Value override for args.overdrive. Defaults to None.
perf_level (bool, optional): Value override for args.perf_level. Defaults to None.
xgmi_err (bool, optional): Value override for args.xgmi_err. Defaults to None.
energy (bool, optional): Value override for args.energy. Defaults to None.
mem_usage (bool, optional): Value override for args.mem_usage. Defaults to None.
schedule (bool, optional): Value override for args.schedule. Defaults to None.
guard (bool, optional): Value override for args.guard. Defaults to None.
guest_data (bool, optional): Value override for args.guest_data. Defaults to None.
fb_usage (bool, optional): Value override for args.fb_usage. Defaults to None.
xgmi (bool, optional): Value override for args.xgmi. Defaults to None.
cpu_power_metrics (bool, optional): Value override for args.cpu_power_metrics. Defaults to None
prochot (bool, optional): Value override for args.prochot. Defaults to None.
freq_metrics (bool, optional): Value override for args.freq_metrics. Defaults to None.
c0_res (bool, optional): Value override for args.c0_res. Defaults to None
lclk_dpm_level (list, optional): Value override for args.lclk_dpm_level. Defaults to None
pwr_svi_telemtry_rails (list, optional): value override for args.pwr_svi_telemtry_rails. Defaults to None
io_bandwidth (list, optional): value override for args.io_bandwidth. Defaults to None
xgmi_bandwidth (list, optional): value override for args.xgmi_bandwidth. Defaults to None
enable_apb (bool, optional): Value override for args.enable_apb. Defaults to None
disable_apb (bool, optional): Value override for args.disable_apb. Defaults to None
set_pow_limit (bool, optional): Value override for args.cpu_set_pow_limit. Defaults to None
set_xgmi_link_width (list, optional): Value override for args.set_cpu_xgmi_link_width. Defaults to None
set_lclk_dpm_level (bool, optional): Value override for args.set_cpu_lclk_dpm_level. Defaults to None
boost_limit (bool, optional): Value override for args.boost_limit. Defaults to None
set_soc_boost_limit (list, optional): Value override for args.set_soc_boost_limit. Defaults to None
metrics_ver (bool, optional): Value override for args.cpu_metrics_ver. Defaults to None
metrics_table (bool, optional): Value override for args.cpu_metrics_table. Defaults to None
socket_energy (bool, optional): Value override for args.socket_energy. Defaults to None
set_pwr_eff_mode (list, optional): Value override for args.set_cpu_pwr_eff_mode. Defaults to None
ddr_bandwidth (bool, optional): Value override for args.ddr_bandwidth. Defaults to None
cpu_temp (bool, optional): Value override for args.cpu_temp. Defaults to None
dimm_temp_range_rate (bool, optional): Value override for args.cpu_dimm_temp_range_rate. Defaults to None
dimm_pow_conumption (bool, optional): Value override for args.cpu_dimm_pow_conumption. Defaults to None
dimm_thermal_sensor (bool, optional): Value override for args.cpu_dimm_thermal_sensor. Defaults to None
set_gmi3_link_width (list, optional): Value override for args.set_cpu_gmi3_link_width. Defaults to None
set_pcie_lnk_rate (list, optional): Value override for args.set_cpu_pcie_lnk_rate. Defaults to None
set_df_pstate_range (list, optional): Value override for args.set_cpu_df_pstate_range. Defaults to None
Raises:
IndexError: Index error if gpu list is empty
Returns:
None: Print output via AMDSMILogger to destination
"""
gpus = args.gpu
cpus= args.cpu
cores = args.core
gpu_options = any([args.gpu, args.usage,args.watch, args.watch_time, args.iterations,
args.power, args.clock, args.temperature, args.ecc, args.ecc_block,
args.pcie, args.fan, args.voltage_curve, args.overdrive, args.perf_level,
args.xgmi_err, args.energy, args.mem_usage])
cpu_options = any([args.cpu, args.cpu_power_metrics, args.cpu_prochot,
args.cpu_freq_metrics, args.cpu_c0_res, args.cpu_lclk_dpm_level,
args.cpu_pwr_svi_telemtry_rails, args.cpu_io_bandwidth, args.cpu_xgmi_bandwidth,
args.cpu_enable_apb, args.cpu_disable_apb, args.set_cpu_pow_limit,
args.set_cpu_xgmi_link_width, args.set_cpu_lclk_dpm_level,
args.set_soc_boost_limit,args.cpu_metrics_ver, args.cpu_metrics_table,
args.socket_energy, args.set_cpu_pwr_eff_mode,args.cpu_ddr_bandwidth,
args.cpu_temp, args.cpu_dimm_temp_range_rate, args.cpu_dimm_pow_conumption,
args.cpu_dimm_thermal_sensor, args.set_cpu_gmi3_link_width,
args.set_cpu_pcie_lnk_rate, args.set_cpu_df_pstate_range])
core_options = any([args.core_boost_limit, args.core_curr_active_freq_core_limit,
args.set_core_boost_limit, args.core_energy])
if ((len(self.device_handles) and ((((not gpus) and (not cpus) and (not cores)) or gpus)
and not cpu_options and not core_options))):
self.metric_gpu( args, multiple_devices, watching_output, gpu,
usage, watch, watch_time, iterations, power,
clock, temperature, ecc, ecc_block, pcie,
fan, voltage_curve, overdrive, perf_level,
xgmi_err, energy, mem_usage, schedule,
guard, guest_data, fb_usage, xgmi)
if ((len(self.cpu_handles) and ((((not gpus) and (not cpus) and (not cores)) or cpus)
and not gpu_options and not core_options))):
self.logger.clear_multiple_devices_ouput()
self.metric_cpu(args, multiple_devices, cpu, cpu_power_metrics, prochot,
freq_metrics, c0_res, lclk_dpm_level, pwr_svi_telemtry_rails,
io_bandwidth, xgmi_bandwidth, enable_apb, disable_apb,
set_pow_limit,set_xgmi_link_width, set_lclk_dpm_level,
set_soc_boost_limit, metrics_ver, metrics_table, socket_energy,
set_pwr_eff_mode,ddr_bandwidth, cpu_temp, dimm_temp_range_rate,
dimm_pow_conumption,dimm_thermal_sensor, set_gmi3_link_width,
set_pcie_lnk_rate, set_df_pstate_range)
if ((len(self.core_handles) and ((((not gpus) and (not cpus) and (not cores)) or cores)
and not gpu_options and not cpu_options))):
self.logger.clear_multiple_devices_ouput()
self.metric_core(args, multiple_devices, core, boost_limit,
curr_active_freq_core_limit, set_core_boost_limit,
core_energy)
if (len(self.cpu_handles) == 0 and len(self.device_handles) == 0 and
len(self.core_handles) == 0):
logging.error("No CPU and GPU devices present")
sys.exit(-1)
def process(self, args, multiple_devices=False, watching_output=False,
gpu=None, general=None, engine=None, pid=None, name=None,
watch=None, watch_time=None, iterations=None):
+279 -25
Просмотреть файл
@@ -116,6 +116,97 @@ class AMDSMIHelpers():
return self._is_windows
def get_cpu_choices(self):
"""Return dictionary of possible CPU choices and string of the output:
Dictionary will be in format: cpus[ID]: Device Handle)
String output will be in format:
"ID: 0 "
params:
None
return:
(dict, str) : (cpu_choices, cpu_choices_str)
"""
cpu_choices = {}
cpu_choices_str = ""
#import pdb;pdb.set_trace()
try:
cpu_handles = []
# amdsmi_get_cpusocket_handles() returns the cpu socket handles stored for cpu_id
cpu_handles = amdsmi_interface.amdsmi_get_cpusocket_handles()
except amdsmi_interface.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.info('Unable to get device choices, driver not initialized (amdhsmp not found in modules)')
else:
raise e
if len(cpu_handles) == 0:
logging.info('Unable to find any devices, check if driver is initialized (amdhsmp not found in modules)')
else:
# Handle spacing for the gpu_choices_str
max_padding = int(math.log10(len(cpu_handles))) + 1
for cpu_id, device_handle in enumerate(cpu_handles):
cpu_choices[str(cpu_id)] = {
"Device Handle": device_handle
}
if cpu_id == 0:
id_padding = max_padding
else:
id_padding = max_padding - int(math.log10(cpu_id))
cpu_choices_str += f"ID: {cpu_id}\n"
# Add the all option to the gpu_choices
cpu_choices["all"] = "all"
cpu_choices_str += f" all{' ' * max_padding}| Selects all devices\n"
return (cpu_choices, cpu_choices_str)
def get_core_choices(self):
"""Return dictionary of possible Core choices and string of the output:
Dictionary will be in format: coress[ID]: Device Handle)
String output will be in format:
"ID: 0 "
params:
None
return:
(dict, str) : (core_choices, core_choices_str)
"""
core_choices = {}
core_choices_str = ""
try:
core_handles = []
# amdsmi_get_cpucore_handles() returns the core handles stored for core_id
core_handles = amdsmi_interface.amdsmi_get_cpucore_handles()
except amdsmi_interface.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.info('Unable to get device choices, driver not initialized (amdhsmp not found in modules)')
else:
raise e
if len(core_handles) == 0:
logging.info('Unable to find any devices, check if driver is initialized (amdhsmp not found in modules)')
else:
# Handle spacing for the gpu_choices_str
max_padding = int(math.log10(len(core_handles))) + 1
for core_id, device_handle in enumerate(core_handles):
core_choices[str(core_id)] = {
"Device Handle": device_handle
}
if core_id == 0:
id_padding = max_padding
else:
id_padding = max_padding - int(math.log10(core_id))
core_choices_str += f"ID: 0 - {len(core_handles) - 1}\n"
# Add the all option to the core_choices
core_choices["all"] = "all"
core_choices_str += f" all{' ' * max_padding}| Selects all devices\n"
return (core_choices, core_choices_str)
def get_output_format(self):
"""Returns the output format read from sys.argv
Returns:
@@ -142,6 +233,7 @@ class AMDSMIHelpers():
"""
gpu_choices = {}
gpu_choices_str = ""
device_handles = []
try:
# amdsmi_get_processor_handles returns the device_handles storted for gpu_id
@@ -149,36 +241,34 @@ class AMDSMIHelpers():
except amdsmi_interface.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error('Unable to get device choices, driver not initialized (amdgpu not found in modules)')
sys.exit(-1)
logging.info('Unable to get device choices, driver not initialized (amdgpu not found in modules)')
else:
raise e
if len(device_handles) == 0:
logging.error('Unable to find any devices, check if driver is initialized (amdgpu not found in modules)')
sys.exit(-1)
logging.info('Unable to find any devices, check if driver is initialized (amdgpu not found in modules)')
else:
# Handle spacing for the gpu_choices_str
max_padding = int(math.log10(len(device_handles))) + 1
# Handle spacing for the gpu_choices_str
max_padding = int(math.log10(len(device_handles))) + 1
for gpu_id, device_handle in enumerate(device_handles):
bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(device_handle)
uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(device_handle)
gpu_choices[str(gpu_id)] = {
"BDF": bdf,
"UUID": uuid,
"Device Handle": device_handle,
}
for gpu_id, device_handle in enumerate(device_handles):
bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(device_handle)
uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(device_handle)
gpu_choices[str(gpu_id)] = {
"BDF": bdf,
"UUID": uuid,
"Device Handle": device_handle,
}
if gpu_id == 0:
id_padding = max_padding
else:
id_padding = max_padding - int(math.log10(gpu_id))
gpu_choices_str += f"ID: {gpu_id}{' ' * id_padding}| BDF: {bdf} | UUID: {uuid}\n"
if gpu_id == 0:
id_padding = max_padding
else:
id_padding = max_padding - int(math.log10(gpu_id))
gpu_choices_str += f"ID: {gpu_id}{' ' * id_padding}| BDF: {bdf} | UUID: {uuid}\n"
# Add the all option to the gpu_choices
gpu_choices["all"] = "all"
gpu_choices_str += f" all{' ' * max_padding}| Selects all devices\n"
# Add the all option to the gpu_choices
gpu_choices["all"] = "all"
gpu_choices_str += f" all{' ' * max_padding}| Selects all devices\n"
return (gpu_choices, gpu_choices_str)
@@ -234,11 +324,89 @@ class AMDSMIHelpers():
return True, selected_device_handles
def handle_gpus(self, args, logger, subcommand):
def get_device_handles_from_cpu_selections(self, cpu_selections: List[str], cpu_choices=None):
"""Convert provided cpu_selections to device_handles
Args:
cpu_selections (list[str]): Selected CPU ID(s):
ex: ID:0
cpu_choices (dict{cpu_choices}): This is a dictionary of the possible cpu_choices
Returns:
(True, list[device_handles]): Returns a list of all the cpu_selections converted to
amdsmi device_handles
(False, str): Return False, and the first input that failed to be converted
"""
if 'all' in cpu_selections:
return (True, amdsmi_interface.amdsmi_get_cpusocket_handles())
if isinstance(cpu_selections, str):
cpu_selections = [cpu_selections]
if cpu_choices is None:
cpu_choices = self.get_cpu_choices()[0]
selected_device_handles = []
for cpu_selection in cpu_selections:
valid_cpu_choice = False
for cpu_id, cpu_info in cpu_choices.items():
device_handle = cpu_info['Device Handle']
# Check if passed gpu is a gpu ID
if cpu_selection == cpu_id:
selected_device_handles.append(device_handle)
valid_cpu_choice = True
break
if not valid_cpu_choice:
logging.debug(f"AMDSMIHelpers.get_device_handles_from_cpu_selections - Unable to convert {cpu_selection}")
return False, cpu_selection
return True, selected_device_handles
def get_device_handles_from_core_selections(self, core_selections: List[str], core_choices=None):
"""Convert provided core_selections to device_handles
Args:
core_selections (list[str]): Selected CORE ID(s):
ex: ID:0
core_choices (dict{core_choices}): This is a dictionary of the possible core_choices
Returns:
(True, list[device_handles]): Returns a list of all the core_selections converted to
amdsmi device_handles
(False, str): Return False, and the first input that failed to be converted
"""
if 'all' in core_selections:
return (True, amdsmi_interface.amdsmi_get_cpucore_handles())
if isinstance(core_selections, str):
core_selections = [core_selections]
if core_choices is None:
core_choices = self.get_core_choices()[0]
selected_device_handles = []
for core_selection in core_selections:
valid_cpu_choice = False
for core_id, core_info in core_choices.items():
device_handle = core_info['Device Handle']
# Check if passed core is a core ID
if core_selection == core_id:
selected_device_handles.append(device_handle)
valid_core_choice = True
break
if not valid_core_choice:
logging.debug(f"AMDSMIHelpers.get_device_handles_from_core_selections - Unable to convert {core_selection}")
return False, core_selection
return True, selected_device_handles
def handle_gpus(self, args,logger, subcommand):
"""This function will run execute the subcommands based on the number
of gpus passed in via args.
params:
args - argparser args to pass to subcommand
current_platform_args (list) - GPU supported platform arguments
current_platform_values (list) - GPU supported values for the arguments
logger (AMDSMILogger) - Logger to print out output
subcommand (AMDSMICommands) - Function that can handle multiple gpus
@@ -260,11 +428,72 @@ class AMDSMIHelpers():
args.gpu = args.gpu[0]
return False, args.gpu
else:
raise IndexError("args.gpu should not be an empty list")
logging.debug("args.gpu has an empty list")
else:
return False, args.gpu
def handle_cpus(self, args, logger, subcommand):
"""This function will run execute the subcommands based on the number
of cpus passed in via args.
params:
args - argparser args to pass to subcommand
logger (AMDSMILogger) - Logger to print out output
subcommand (AMDSMICommands) - Function that can handle multiple gpus
return:
tuple(bool, device_handle) :
bool - True if executed subcommand for multiple devices
device_handle - Return the device_handle if the list of devices is a length of 1
(handled_multiple_gpus, device_handle)
"""
if isinstance(args.cpu, list):
if len(args.cpu) > 1:
for device_handle in args.cpu:
# Handle multiple_devices to print all output at once
subcommand(args, multiple_devices=True, cpu=device_handle)
logger.print_output(multiple_device_enabled=True)
return True, args.cpu
elif len(args.cpu) == 1:
args.cpu = args.cpu[0]
return False, args.cpu
else:
logging.debug("args.cpu has empty list")
else:
return False, args.cpu
def handle_cores(self, args, logger, subcommand):
"""This function will run execute the subcommands based on the number
of cores passed in via args.
params:
args - argparser args to pass to subcommand
logger (AMDSMILogger) - Logger to print out output
subcommand (AMDSMICommands) - Function that can handle multiple gpus
return:
tuple(bool, device_handle) :
bool - True if executed subcommand for multiple devices
device_handle - Return the device_handle if the list of devices is a length of 1
(handled_multiple_gpus, device_handle)
"""
if isinstance(args.core, list):
if len(args.core) > 1:
for device_handle in args.core:
# Handle multiple_devices to print all output at once
subcommand(args, multiple_devices=True, core=device_handle)
logger.print_output(multiple_device_enabled=True)
return True, args.core
elif len(args.core) == 1:
args.core = args.core[0]
return False, args.core
else:
logging.debug("args.core has empty list")
else:
return False, args.core
def handle_watch(self, args, subcommand, logger):
"""This function will run the subcommand multiple times based
on the passed watch, watch_time, and iterations passed in.
@@ -326,6 +555,31 @@ class AMDSMIHelpers():
"Unable to find gpu ID from device_handle")
def get_cpu_id_from_device_handle(self, input_device_handle):
"""Get the cpu index from the device_handle.
amdsmi_interface.amdsmi_get_cpusocket_handles() returns the list of device_handles in order of cpu_index
"""
device_handles = amdsmi_interface.amdsmi_get_cpusocket_handles()
for cpu_index, device_handle in enumerate(device_handles):
if input_device_handle.value == device_handle.value:
return cpu_index
raise amdsmi_exception.AmdSmiParameterException(input_device_handle,
amdsmi_interface.amdsmi_wrapper.amdsmi_processor_handle,
"Unable to find cpu ID from device_handle")
def get_core_id_from_device_handle(self, input_device_handle):
"""Get the core index from the device_handle.
amdsmi_interface.amdsmi_get_cpusocket_handles() returns the list of device_handles in order of cpu_index
"""
device_handles = amdsmi_interface.amdsmi_get_cpucore_handles()
for core_index, device_handle in enumerate(device_handles):
if input_device_handle.value == device_handle.value:
return core_index
raise amdsmi_exception.AmdSmiParameterException(input_device_handle,
amdsmi_interface.amdsmi_wrapper.amdsmi_processor_handle,
"Unable to find core ID from device_handle")
def get_amd_gpu_bdfs(self):
"""Return a list of GPU BDFs visibile to amdsmi
+42 -4
Просмотреть файл
@@ -53,17 +53,41 @@ def check_amdgpu_driver():
return False
def check_amdhsmp_driver():
""" Returns true if amd hsmp is found in the list of initialized modules """
amd_cpu_status_file = Path("/sys/module/amd_hsmp/initstate")
if amd_cpu_status_file.exists():
if amd_cpu_status_file.read_text(encoding="ascii").strip() == "live":
return True
return False
def init_amdsmi(flag=amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS):
""" Initializes AMDSMI
Raises:
err: AmdSmiLibraryException if not successful
"""
gpu_flag = False;
cpu_flag = False;
# Check if both the amdgpu and amdhsmp driver is up and handle error gracefully
if check_amdgpu_driver() and check_amdhsmp_driver():
# init AMD APUS
try:
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Drivers not loaded (amdgpu and hsmp drivers not found in modules)")
sys.exit(-1)
else:
raise e
# # Check if amdgpu driver is up & Handle error gracefully
if check_amdgpu_driver():
elif check_amdgpu_driver():
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
try:
amdsmi_interface.amdsmi_init(flag)
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS)
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
@@ -72,9 +96,23 @@ def init_amdsmi(flag=amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS):
else:
raise e
logging.debug("AMDSMI initialized successfully, but initstate was not live")
elif check_amdhsmp_driver():
# Only init AMD CPUs
try:
amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS)
cpu_flag = True
except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error("Driver not loaded (hsmp not found in modules)")
sys.exit(-1)
else:
raise e
else:
logging.error("Driver not found (amdgpu not found in modules)")
sys.exit(-1)
pass
logging.debug("AMDSMI initialized successfully")
def shut_down_amdsmi():
+71
Просмотреть файл
@@ -72,6 +72,8 @@ class AMDSMILogger():
def is_human_readable_format(self):
return self.format == self.LoggerFormat.human_readable.value
def clear_multiple_devices_ouput(self):
self.multiple_device_output.clear()
def _capitalize_keys(self, input_dict):
output_dict = {}
@@ -216,6 +218,75 @@ class AMDSMILogger():
self._store_output_amdsmi(gpu_id=gpu_id, argument=argument, data=data)
def store_cpu_output(self, device_handle, argument, data):
""" Convert device handle to cpu id and store output
params:
device_handle - device handle object to the target device output
argument (str) - key to store data
data (dict | list) - Data store against argument
return:
Nothing
"""
cpu_id = self.helpers.get_cpu_id_from_device_handle(device_handle)
self._store_cpu_output_amdsmi(cpu_id=cpu_id, argument=argument, data=data)
def store_core_output(self, device_handle, argument, data):
""" Convert device handle to core id and store output
params:
device_handle - device handle object to the target device output
argument (str) - key to store data
data (dict | list) - Data store against argument
return:
Nothing
"""
core_id = self.helpers.get_core_id_from_device_handle(device_handle)
self._store_core_output_amdsmi(core_id=core_id, argument=argument, data=data)
def _store_core_output_amdsmi(self, core_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())
if self.is_json_format() or self.is_human_readable_format():
self.output['core'] = int(core_id)
if argument == 'values' and isinstance(data, dict):
self.output.update(data)
else:
self.output[argument] = data
elif self.is_csv_format():
self.output['core'] = int(core_id)
if argument == 'values' or isinstance(data, dict):
flat_dict = self.flatten_dict(data)
self.output.update(flat_dict)
else:
self.output[argument] = data
else:
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")
def _store_cpu_output_amdsmi(self, cpu_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())
if self.is_json_format() or self.is_human_readable_format():
self.output['cpu'] = int(cpu_id)
if argument == 'values' and isinstance(data, dict):
self.output.update(data)
else:
self.output[argument] = data
elif self.is_csv_format():
self.output['cpu'] = int(cpu_id)
if argument == 'values' or isinstance(data, dict):
flat_dict = self.flatten_dict(data)
self.output.update(flat_dict)
else:
self.output[argument] = data
else:
raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported")
def _store_output_amdsmi(self, gpu_id, argument, data):
if argument == 'timestamp': # Make sure timestamp is the first element in the output
self.output['timestamp'] = int(time.time())
+140 -1
Просмотреть файл
@@ -72,6 +72,8 @@ class AMDSMIParser(argparse.ArgumentParser):
# Helper variables
self.helpers = AMDSMIHelpers()
self.gpu_choices, self.gpu_choices_str = self.helpers.get_gpu_choices()
self.cpu_choices, self.cpu_choices_str = self.helpers.get_cpu_choices()
self.core_choices, self.core_choices_str = self.helpers.get_core_choices()
self.vf_choices = ['3', '2', '1']
version_string = f"Version: {__version__}"
@@ -233,6 +235,56 @@ class AMDSMIParser(argparse.ArgumentParser):
return _GPUSelectAction
def _cpu_select(self, cpu_choices):
""" Custom argparse action to return the device handle(s) for the cpu(s) selected
This will set the destination (args.cpu) to a list of 1 or more device handles
If 1 or more device handles are not found then raise an ArgumentError for the first invalid cpu seen
"""
amdsmi_helpers = self.helpers
class _CPUSelectAction(argparse.Action):
ouputformat=self.helpers.get_output_format()
# Checks the values
def __call__(self, parser, args, values, option_string=None):
if "all" in cpu_choices:
del cpu_choices["all"]
status, selected_device_handles = amdsmi_helpers.get_device_handles_from_cpu_selections(cpu_selections=values,
cpu_choices=cpu_choices)
if status:
setattr(args, self.dest, selected_device_handles)
else:
if selected_device_handles == '':
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--cpu", _CPUSelectAction.ouputformat)
else:
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles,
_CPUSelectAction.ouputformat)
return _CPUSelectAction
def _core_select(self, core_choices):
""" Custom argparse action to return the device handle(s) for the core(s) selected
This will set the destination (args.core) to a list of 1 or more device handles
If 1 or more device handles are not found then raise an ArgumentError for the first invalid core seen
"""
amdsmi_helpers = self.helpers
class _CoreSelectAction(argparse.Action):
ouputformat=self.helpers.get_output_format()
# Checks the values
def __call__(self, parser, args, values, option_string=None):
if "all" in core_choices:
del core_choices["all"]
status, selected_device_handles = amdsmi_helpers.get_device_handles_from_core_selections(core_selections=values,
core_choices=core_choices)
if status:
setattr(args, self.dest, selected_device_handles)
else:
if selected_device_handles == '':
raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--core", _CoreSelectAction.ouputformat)
else:
raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles,
_CoreSelectAction.ouputformat)
return _CoreSelectAction
def _add_command_modifiers(self, subcommand_parser):
json_help = "Displays output in JSON format (human readable by default)."
csv_help = "Displays output in CSV format (human readable by default)."
@@ -274,11 +326,18 @@ class AMDSMIParser(argparse.ArgumentParser):
gpu_help = f"Select a GPU ID, BDF, or UUID from the possible choices:\n{self.gpu_choices_str}"
vf_help = "Gets general information about the specified VF (timeslice, fb info, …).\
\nAvailable only on virtualization OSs"
cpu_help = f"Select a CPU ID from the possible choices:\n{self.cpu_choices_str}"
core_help = f"Select a Core ID from the possible choices:\n{self.core_choices_str}"
# Mutually Exclusive Args within the subparser
device_args = subcommand_parser.add_mutually_exclusive_group(required=required)
device_args.add_argument('-g', '--gpu', action=self._gpu_select(self.gpu_choices),
nargs='+', help=gpu_help)
device_args.add_argument('-U', '--cpu', action=self._cpu_select(self.cpu_choices),
nargs='+', help=cpu_help)
device_args.add_argument('-O', '--core', action=self._core_select(self.core_choices),
nargs='+', help=core_help)
if self.helpers.is_hypervisor():
device_args.add_argument('-v', '--vf', action='store', nargs='+',
@@ -345,11 +404,16 @@ class AMDSMIParser(argparse.ArgumentParser):
fb_help = "Displays Frame Buffer information"
num_vf_help = "Displays number of supported and enabled VFs"
# Options arguments help text for cpu
smu_help = "All SMU FW information"
interface_help = "Displays hsmp interface version"
# Create static subparser
static_parser = subparsers.add_parser('static', help=static_help, description=static_subcommand_help)
static_parser._optionals.title = static_optionals_title
static_parser.formatter_class=lambda prog: AMDSMISubparserHelpFormatter(prog)
static_parser.set_defaults(func=func)
cpu_group = static_parser.add_argument_group("CPU Option<s>")
# Add Universal Arguments
self._add_command_modifiers(static_parser)
@@ -363,7 +427,8 @@ class AMDSMIParser(argparse.ArgumentParser):
static_parser.add_argument('-v', '--vram', action='store_true', required=False, help=vram_help)
static_parser.add_argument('-c', '--cache', action='store_true', required=False, help=cache_help)
static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help)
cpu_group.add_argument('-s', '--smu', action='store_true', required=False, help=smu_help)
cpu_group.add_argument('-i', '--interface_ver', action='store_true', required=False, help=interface_help)
# Options to display on Hypervisors and Baremetal
if self.helpers.is_hypervisor() or self.helpers.is_baremetal():
static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help)
@@ -475,11 +540,55 @@ class AMDSMIParser(argparse.ArgumentParser):
fb_usage_help = "Displays total and used Frame Buffer usage information"
xgmi_help = "Table of current XGMI metrics information"
# Help text for cpu options
cpu_power_metrics_help = "Cpu power metrics"
cpu_proc_help = "Displays prochot status"
cpu_freq_help = "Displays currentFclkMemclk frequencies and cclk frequency limit"
cpu_c0_res_help = "Displays C0 residency"
cpu_lclk_dpm_help = "Displays lclk dpm level range. Requires socket ID and nbio id as inputs"
cpu_pwr_svi_telemtry_rails_help = "Displays svi based telemetry for all rails"
cpu_io_bandwidth_help = "Displays current IO bandwidth for the selected CPU.\
\n input parameters are bandwidth type(1) and link ID encodings\
\n i.e. P2, P3, G0 - G7"
cpu_xgmi_bandwidth_help = "Displays current XGMI bandwidth for the selected CPU\
\n input parameters are bandwidth type(1,2,4) and link ID encodings\
\n i.e. P2, P3, G0 - G7"
cpu_enable_apb_help = "Enables the DF p-state performance boost algorithm"
cpu_disable_apb_help = "Disables the DF p-state performance boost alogorithm."
"Input parameter is DFPstate (0 -3 )"
set_cpu_pow_limit_help = "Set power limit for the given socket. Input parameter is \
power limit value."
set_cpu_xgmi_link_width_help = "Set max and Min linkwidth. Input parameters are \
min and max link width values"
set_cpu_lclk_dpm_level_help = "Sets the max and min dpm level on a given NBIO. Inpur parameters are \
die_index, min dpm, max dpm."
core_boost_limit_help = "Get booslimit for the selected cores"
core_curr_active_freq_core_limit_help = "Get Current CCLK limit set per Core"
set_soc_boost_limit_help = "Sets the boost limit for the given socket. Input parameter is \
socket limit value"
set_core_boost_limit_help = "Sets the boost limit for the given core. Input parameter is \
core limit value"
cpu_metrics_ver_help = "Displays metrics table version"
cpu_metrics_table_help = "Displays metric table"
core_energy_help = "Displays core energy for the selected core"
socket_energy_help = "Displays socket energy for the selected socket"
set_cpu_pwr_eff_mode_help = "Sets the power efficency mode policy. Input parameter is mode."
cpu_ddr_bandwidth_help = "Displays per socket max ddr bw, current utilized bw and current utilized ddr bw in percentage"
cpu_temp_help = "Displays cpu socket temperature"
cpu_dimm_temp_range_rate_help = "Displays dimm temperature range and refresh rate"
cpu_dimm_pow_conumption_help = "Displays dimm power conumption"
cpu_dimm_thermal_sensor_help = "Displays dimm thermal sensor"
set_cpu_gmi3_link_width_help = "Sets max and min gmi3 link width range"
set_cpu_pcie_lnk_rate_help = "Sets pcie link rate"
set_cpu_df_pstate_range_help = "Sets max and min df-pstates"
# Create metric subparser
metric_parser = subparsers.add_parser('metric', help=metric_help, description=metric_subcommand_help)
metric_parser._optionals.title = metric_optionals_title
metric_parser.formatter_class=lambda prog: AMDSMISubparserHelpFormatter(prog)
metric_parser.set_defaults(func=func)
cpu_group = metric_parser.add_argument_group("CPU Option<s>")
set_group = metric_parser.add_argument_group("Set Options<s>")
# Add Universal Arguments
self._add_command_modifiers(metric_parser)
@@ -519,6 +628,36 @@ class AMDSMIParser(argparse.ArgumentParser):
metric_parser.add_argument('-f', '--fb_usage', action='store_true', required=False, help=fb_usage_help)
metric_parser.add_argument('-m', '--xgmi', action='store_true', required=False, help=xgmi_help)
cpu_group.add_argument('--cpu_power_metrics', action='store_true', required=False, help=cpu_power_metrics_help)
cpu_group.add_argument('--cpu_prochot', action='store_true', required=False, help=cpu_proc_help)
cpu_group.add_argument('--cpu_freq_metrics', action='store_true', required=False, help=cpu_freq_help)
cpu_group.add_argument('--cpu_c0_res', action='store_true', required=False, help=cpu_c0_res_help)
cpu_group.add_argument('--cpu_lclk_dpm_level', action='append', required=False, type=int, nargs=1, metavar=("NBIOID"), help=cpu_lclk_dpm_help)
cpu_group.add_argument('--cpu_pwr_svi_telemtry_rails', action='store_true', required=False, help=cpu_pwr_svi_telemtry_rails_help)
cpu_group.add_argument('--cpu_io_bandwidth', action='append', required=False, nargs=2, metavar=("IO_BW","LINKID_NAME"), help=cpu_io_bandwidth_help)
cpu_group.add_argument('--cpu_xgmi_bandwidth', action='append', required=False, nargs=2, metavar=("XGMI_BW","LINKID_NAME"), help=cpu_xgmi_bandwidth_help)
cpu_group.add_argument('--cpu_enable_apb', action='store_true', required=False, help=cpu_enable_apb_help)
cpu_group.add_argument('--cpu_disable_apb', action='append', required=False, type=int, nargs=1, metavar=("DF_PSTATE"), help=cpu_disable_apb_help)
set_group.add_argument('--set_cpu_pow_limit', action='append', required=False, type=int, nargs=1, metavar=("POW_LIMIT"),help=set_cpu_pow_limit_help)
set_group.add_argument('--set_cpu_xgmi_link_width', action='append', required=False, type=int, nargs=2, metavar=("MIN_WIDTH", "MAX_WIDTH"), help=set_cpu_xgmi_link_width_help)
set_group.add_argument('--set_cpu_lclk_dpm_level', action='append', required=False, type=int, nargs=3, metavar=("NBIOID", "MIN_DPM", "MAX_DPM"),help=set_cpu_lclk_dpm_level_help)
cpu_group.add_argument('--core_boost_limit', action='store_true', required=False, help=core_boost_limit_help)
cpu_group.add_argument('--core_curr_active_freq_core_limit', action='store_true', required=False, help=core_curr_active_freq_core_limit_help)
set_group.add_argument('--set_soc_boost_limit', action='append', required=False, type=int, nargs=1, metavar=("BOOST_LIMIT"), help=set_soc_boost_limit_help)
set_group.add_argument('--set_core_boost_limit', action='append', required=False, type=int, nargs=1, metavar=("BOOST_LIMIT"), help=set_core_boost_limit_help)
cpu_group.add_argument('--cpu_metrics_ver', action='store_true', required=False, help=cpu_metrics_ver_help)
cpu_group.add_argument('--cpu_metrics_table', action='store_true', required=False, help=cpu_metrics_table_help)
cpu_group.add_argument('--core_energy', action='store_true', required=False, help=core_energy_help)
cpu_group.add_argument('--socket_energy', action='store_true', required=False, help=socket_energy_help)
set_group.add_argument('--set_cpu_pwr_eff_mode', action='append', required=False, type=int, nargs=1, metavar=("MODE"), help=set_cpu_pwr_eff_mode_help)
cpu_group.add_argument('--cpu_ddr_bandwidth', action='store_true', required=False, help=cpu_ddr_bandwidth_help)
cpu_group.add_argument('--cpu_temp', action='store_true', required=False, help=cpu_temp_help)
cpu_group.add_argument('--cpu_dimm_temp_range_rate', action='append', required=False, type=int, nargs=1, metavar=("DIMM_ADDR"), help=cpu_dimm_temp_range_rate_help)
cpu_group.add_argument('--cpu_dimm_pow_conumption', action='append', required=False, type=int, nargs=1, metavar=("DIMM_ADDR"), help=cpu_dimm_pow_conumption_help)
cpu_group.add_argument('--cpu_dimm_thermal_sensor', action='append', required=False, type=int, nargs=1, metavar=("DIMM_ADDR"), help=cpu_dimm_thermal_sensor_help)
set_group.add_argument('--set_cpu_gmi3_link_width', action='append', required=False, type=int, nargs=2, metavar=("MIN_LW", "MAX_LW"), help=set_cpu_gmi3_link_width_help)
set_group.add_argument('--set_cpu_pcie_lnk_rate', action='append', required=False, type=int, nargs=1, metavar=("LINK_RATE"), help=set_cpu_pcie_lnk_rate_help)
set_group.add_argument('--set_cpu_df_pstate_range', action='append', required=False, type=int, nargs=2, metavar=("MAX_PSTATE", "MIN_PSTATE"), help=set_cpu_df_pstate_range_help)
def _add_process_parser(self, subparsers, func):
if self.helpers.is_hypervisor():