diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index aa2ea7adeb..a38adcfde4 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -42,20 +42,45 @@ class AMDSMICommands(): def __init__(self, format='human_readable', destination='stdout') -> None: self.helpers = AMDSMIHelpers() self.logger = AMDSMILogger(format=format, destination=destination) + self.device_handles = [] + self.cpu_handles = [] + self.core_handles = [] try: self.device_handles = amdsmi_interface.amdsmi_get_processor_handles() except amdsmi_exception.AmdSmiLibraryException as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): logging.error('Unable to get devices, driver not initialized (amdgpu not found in modules)') - sys.exit(-1) else: raise e if len(self.device_handles) == 0: - logging.error('Unable to detect any devices, check if driver is initialized (amdgpu not found in modules)') - sys.exit(-1) + logging.info('Unable to detect any devices, check if driver is initialized (amdgpu not found in modules)') + # Fetch CPU handles + try: + self.cpu_handles = amdsmi_interface.amdsmi_get_cpusocket_handles() + except amdsmi_exception.AmdSmiLibraryException as e: + if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, + amdsmi_interface.amdsmi_wrapper.AMDSMI_NO_DRV): + + logging.info('Unable to get CPU devices, hsmp driver not loaded') + else: + raise e + + # core handles + try: + self.core_handles = amdsmi_interface.amdsmi_get_cpucore_handles() + except amdsmi_exception.AmdSmiLibraryException as e: + if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, + amdsmi_interface.amdsmi_wrapper.AMDSMI_NO_DRV): + logging.info('Unable to get CORE devices, hsmp driver not loaded') + else: + raise e + + if (len(self.device_handles) == 0 and len(self.cpu_handles) == 0 and len(self.core_handles) == 0): + logging.error('Unable to detect any devices, check if amdgpu and hsmp drivers are initialized') + sys.exit(-1) self.stop = '' @@ -135,14 +160,77 @@ class AMDSMICommands(): self.logger.print_output() - def static(self, args, multiple_devices=False, gpu=None, asic=None, - bus=None, vbios=None, limit=None, driver=None, ras=None, - board=None, numa=None, vram=None, cache=None, partition=None, - dfc_ucode=None, fb_info=None, num_vf=None): + def get_static_cpu(self, args, multiple_devices=False, cpu=None): + """Get Static information for target cpu + + Args: + args (Namespace): Namespace containing the parsed CLI args + multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. + cpu (device_handle, optional): device_handle for target device. Defaults to None. + + Returns: + None: Print output via AMDSMILogger to destination + """ + + if (cpu): + args.cpu = cpu + + #store cpu args that are applicable to the current platform + curr_platform_cpu_args = ["smu", "interface_ver"] + curr_platform_cpu_values = [args.smu, args.interface_ver] + + if (not any(curr_platform_cpu_values)): + for arg in curr_platform_cpu_args: + setattr(args, arg, True) + + if (len(self.cpu_handles)): + handled_multiple_cpus, device_handle = self.helpers.handle_cpus(args, + self.logger, + self.get_static_cpu) + if handled_multiple_cpus: + return # This function is recursive + args.cpu = device_handle + # get cpu id for logging + cpu_id = self.helpers.get_cpu_id_from_device_handle(args.cpu) + logging.debug(f"Static Arg information for CPU {cpu_id} on {self.helpers.os_info()}") + + static_dict = {} + + if (args.smu): + try: + smu = amdsmi_interface.amdsmi_get_cpu_smu_fw_version(args.cpu) + static_dict["smu"] = {"FW_VERSION" : f"{ smu['smu_fw_major_ver_num']}" + f":{smu['smu_fw_minor_ver_num']}:{smu['smu_fw_debug_ver_num']}"} + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["smu"] = "N/A" + logging.debug("Failed to get SMU FW for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.interface_ver): + static_dict["interface_version"] = {} + try: + intf_ver = amdsmi_interface.amdsmi_get_cpu_hsmp_proto_ver(args.cpu) + static_dict["interface_version"]["proto version"] = intf_ver + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["interface_version"]["proto version"] = "N/A" + logging.debug("Failed to get proto version for cpu %s | %s", cpu_id, e.get_error_info()) + + multiple_devices_csv_override = False + self.logger.store_cpu_output(args.cpu, 'values', static_dict) + if multiple_devices: + self.logger.store_multiple_device_output() + return # Skip printing when there are multiple devices + self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override) + + + def get_static_gpu(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None, + limit=None, driver=None, ras=None, board=None, numa=None, vram=None, + cache=None, partition=None, dfc_ucode=None, fb_info=None, num_vf=None): """Get Static information for target gpu Args: args (Namespace): Namespace containing the parsed CLI args + current_platform_args (list): gpu supported platform arguments + current_platform_values (list): gpu supported platform values for each argument multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. gpu (device_handle, optional): device_handle for target device. Defaults to None. asic (bool, optional): Value override for args.asic. Defaults to None. @@ -160,13 +248,10 @@ class AMDSMICommands(): fb_info (bool, optional): Value override for args.fb_info. Defaults to None. num_vf (bool, optional): Value override for args.num_vf. Defaults to None. - Raises: - IndexError: Index error if gpu list is empty - Returns: None: Print output via AMDSMILogger to destination """ - # Set args.* to passed in arguments + if gpu: args.gpu = gpu if asic: @@ -214,26 +299,20 @@ class AMDSMICommands(): current_platform_args += ["dfc_ucode", "fb_info", "num_vf"] current_platform_values += [args.dfc_ucode, args.fb_info, args.num_vf] - # Handle No GPU passed - if args.gpu == None: - args.gpu = self.device_handles + if (not any(current_platform_values)): + for arg in current_platform_args: + setattr(args, arg, True) - # Handle multiple GPUs - handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.static) + handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.get_static_gpu) if handled_multiple_gpus: return # This function is recursive args.gpu = device_handle - # Get gpu_id for logging gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu) logging.debug(f"Static Arg information for GPU {gpu_id} on {self.helpers.os_info()}") logging.debug(f"Applicable Args: {current_platform_args}") logging.debug(f"Arg Values: {current_platform_values}") - # Set the platform applicable args to True if no args are set - if not any(current_platform_values): - for arg in current_platform_args: - setattr(args, arg, True) static_dict = {} @@ -588,10 +667,84 @@ class AMDSMICommands(): if multiple_devices: self.logger.store_multiple_device_output() return # Skip printing when there are multiple devices - self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override) + def static(self, args, multiple_devices=False, gpu=None, asic=None, + bus=None, vbios=None, limit=None, driver=None, ras=None, + board=None, numa=None, vram=None, cache=None, partition=None, + dfc_ucode=None, fb_info=None, num_vf=None, cpu=None, + interface_ver=None): + """Get Static information for target gpu and cpu + + Args: + args (Namespace): Namespace containing the parsed CLI args + multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. + gpu (device_handle, optional): device_handle for target device. Defaults to None. + asic (bool, optional): Value override for args.asic. Defaults to None. + bus (bool, optional): Value override for args.bus. Defaults to None. + vbios (bool, optional): Value override for args.vbios. Defaults to None. + limit (bool, optional): Value override for args.limit. Defaults to None. + driver (bool, optional): Value override for args.driver. Defaults to None. + ras (bool, optional): Value override for args.ras. Defaults to None. + board (bool, optional): Value override for args.board. Defaults to None. + numa (bool, optional): Value override for args.numa. Defaults to None. + vram (bool, optional): Value override for args.vram. Defaults to None. + cache (bool, optional): Value override for args.cache. Defaults to None. + partition (bool, optional): Value override for args.partition. Defaults to None. + dfc_ucode (bool, optional): Value override for args.dfc_ucode. Defaults to None. + fb_info (bool, optional): Value override for args.fb_info. Defaults to None. + num_vf (bool, optional): Value override for args.num_vf. Defaults to None. + cpu (cpu_handle, optional): cpu_handle for target device. Defaults to None. + interface_ver (bool, optional): Value override for args.interface_ver. Defaults to None + + Raises: + IndexError: Index error if gpu list is empty + + Returns: + None: Print output via AMDSMILogger to destination + """ + # Set args.* to passed in arguments + if gpu: + args.gpu = gpu + if cpu: + args.cpu = cpu + if interface_ver: + args.interface_ver = interface_ver + + gpus = args.gpu + cpus = args.cpu + + gpu_options = any([args.gpu, args.asic, args.bus, args.vbios, args.driver, args.vram, args.cache, args.board]) + cpu_options = any([args.smu, args.interface_ver]) + + # Handle No GPU passed + if args.gpu == None: + args.gpu = self.device_handles + + # Handle No CPU passed + if args.cpu == None: + args.cpu = self.cpu_handles + + if (len(self.cpu_handles) and (((not gpus) and (not cpus)) or cpus)): + self.get_static_cpu(args, cpu) + else: + logging.info("No CPU devices present") + + if (len(self.device_handles) and (((not gpus) and (not cpus)) or gpus)): + self.logger.clear_multiple_devices_ouput() + self.get_static_gpu(args, multiple_devices, gpu, asic, + bus, vbios, limit, driver, ras, + board, numa, vram, cache, partition, + dfc_ucode, fb_info, num_vf) + else: + logging.info("No GPU devices present") + + if (len(self.cpu_handles) == 0 and len(self.device_handles) == 0): + logging.error("No CPU and GPU devices present") + sys.exit(-1) + + def firmware(self, args, multiple_devices=False, gpu=None, fw_list=True): """ Get Firmware information for target gpu @@ -787,7 +940,7 @@ class AMDSMICommands(): self.logger.print_output() - def metric(self, args, multiple_devices=False, watching_output=False, gpu=None, + def metric_gpu(self, args, multiple_devices=False, watching_output=False, gpu=None, usage=None, watch=None, watch_time=None, iterations=None, power=None, clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None, fan=None, voltage_curve=None, overdrive=None, perf_level=None, @@ -916,7 +1069,7 @@ class AMDSMICommands(): # Store output from multiple devices for device_handle in args.gpu: - self.metric(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle) + self.metric_gpu(args, multiple_devices=True, watching_output=watching_output, gpu=device_handle) # Reload original gpus args.gpu = stored_gpus @@ -1480,6 +1633,642 @@ class AMDSMICommands(): self.logger.store_watch_output(multiple_device_enabled=False) + def metric_cpu(self, args, multiple_devices=False, cpu=None, power_metrics=None, prochot=None, + freq_metrics=None, c0_res=None, lclk_dpm_level=None,pwr_svi_telemtry_rails=None, + io_bandwidth=None, xgmi_bandwidth=None, enable_apb=None, disable_apb=None, + set_pow_limit=None, set_xgmi_link_width=None, set_lclk_dpm_level=None, + set_soc_boost_limit=None, metrics_ver=None, metrics_table=None, socket_energy=None, + set_pwr_eff_mode=None, ddr_bandwidth=None, cpu_temp=None, dimm_temp_range_rate=None, + dimm_pow_conumption=None, dimm_thermal_sensor=None, set_gmi3_link_width=None, + set_pcie_lnk_rate=None, set_df_pstate_range=None): + """Get Metric information for target cpu + + Args: + args (Namespace): Namespace containing the parsed CLI args + multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. + cpu (cpu_handle, optional): device_handle for target device. Defaults to None. + + Returns: + None: Print output via AMDSMILogger to destination + """ + + if (cpu): + args.cpu = cpu + if (power_metrics): + args.cpu_power_metrics = power_metrics + if (prochot): + args.cpu_prochot = prochot + if (freq_metrics): + args.cpu_freq_metrics = freq_metrics + if (c0_res): + args.cpu_c0_res = c0_res + if (lclk_dpm_level): + args.cpu_lclk_dpm_level = lclk_dpm_level + if (pwr_svi_telemtry_rails): + args.cpu_pwr_svi_telemtry_rails = pwr_svi_telemtry_rails + if (io_bandwidth): + args.cpu_io_bandwidth = io_bandwidth + if (xgmi_bandwidth): + args.cpu_xgmi_bandwidth = xgmi_bandwidth + if (enable_apb): + args.cpu_enable_apb = enable_apb + if (disable_apb): + args.cpu_disable_apb = disable_apb + if (set_pow_limit): + args.set_cpu_pow_limit = set_pow_limit + if (set_xgmi_link_width): + args.set_xgmi_link_width = set_xgmi_link_width + if (set_lclk_dpm_level): + args.set_lclk_dpm_level = set_lclk_dpm_level + if (set_soc_boost_limit): + args.set_soc_boost_limit = set_soc_boost_limit + if (metrics_ver): + args.cpu_metrics_ver = metrics_ver + if (metrics_table): + args.cpu_metrics_table = metrics_table + if (socket_energy): + args.socket_energy = socket_energy + if (set_pwr_eff_mode): + args.set_cpu_pwr_eff_mode = set_pwr_eff_mode + if (ddr_bandwidth): + args.set_cpu_pwr_eff_mode = ddr_bandwidth + if (cpu_temp): + args.cpu_temp = cpu_temp + if (dimm_temp_range_rate): + args.cpu_dimm_temp_range_rate = dimm_temp_range_rate + if (dimm_pow_conumption): + args.cpu_dimm_pow_conumption = dimm_pow_conumption + if (dimm_thermal_sensor): + args.cpu_dimm_thermal_sensor = dimm_thermal_sensor + if (set_gmi3_link_width): + args.set_cpu_gmi3_link_width = set_gmi3_link_width + if (set_pcie_lnk_rate): + args.set_cpu_pcie_lnk_rate = set_pcie_lnk_rate + if (set_df_pstate_range): + args.set_cpu_df_pstate_range = set_df_pstate_range + + + #store cpu args that are applicable to the current platform + curr_platform_cpu_args = ["cpu_power_metrics", "cpu_prochot", "cpu_freq_metrics", + "cpu_c0_res", "cpu_lclk_dpm_level", "cpu_pwr_svi_telemtry_rails", + "cpu_io_bandwidth", "cpu_xgmi_bandwidth", "cpu_disable_apb", + "set_cpu_pow_limit","set_cpu_xgmi_link_width", "set_cpu_lclk_dpm_level", + "set_soc_boost_limit", "cpu_metrics_ver", "cpu_metrics_table", + "socket_energy", "set_cpu_pwr_eff_mode", "cpu_ddr_bandwidth", + "cpu_temp", "cpu_dimm_temp_range_rate", "cpu_dimm_pow_conumption", + "cpu_dimm_thermal_sensor", "set_cpu_gmi3_link_width", "set_cpu_pcie_lnk_rate", + "set_cpu_df_pstate_range"] + curr_platform_cpu_values = [args.cpu_power_metrics, args.cpu_prochot, args.cpu_freq_metrics, + args.cpu_c0_res, args.cpu_lclk_dpm_level, args.cpu_pwr_svi_telemtry_rails, + args.cpu_io_bandwidth, args.cpu_xgmi_bandwidth, args.cpu_disable_apb, + args.set_cpu_pow_limit, args.set_cpu_xgmi_link_width, args.set_cpu_lclk_dpm_level, + args.set_soc_boost_limit, args.cpu_metrics_ver, args.cpu_metrics_table, + args.socket_energy, args.set_cpu_pwr_eff_mode, args.cpu_ddr_bandwidth, + args.cpu_temp, args.cpu_dimm_temp_range_rate, args.cpu_dimm_pow_conumption, + args.cpu_dimm_thermal_sensor, args.set_cpu_gmi3_link_width, args.set_cpu_pcie_lnk_rate, + args.set_cpu_df_pstate_range] + + + # Handle No CPU passed + if args.cpu == None: + args.cpu = self.cpu_handles + + if (not any(curr_platform_cpu_values)): + for arg in curr_platform_cpu_args: + if arg not in("cpu_lclk_dpm_level", "cpu_io_bandwidth", "cpu_xgmi_bandwidth", "cpu_disable_apb", + "set_cpu_pow_limit", "set_cpu_xgmi_link_width", "set_cpu_lclk_dpm_level", + "set_soc_boost_limit", "set_cpu_pwr_eff_mode", "cpu_dimm_temp_range_rate", + "cpu_dimm_temp_range_rate", "cpu_dimm_pow_conumption", "cpu_dimm_thermal_sensor", + "set_cpu_gmi3_link_width", "set_cpu_pcie_lnk_rate", "set_cpu_df_pstate_range"): + setattr(args, arg, True) + + if (len(self.cpu_handles)): + handled_multiple_cpus, device_handle = self.helpers.handle_cpus(args, + self.logger, + self.metric_cpu) + if handled_multiple_cpus: + return # This function is recursive + args.cpu = device_handle + # get cpu id for logging + cpu_id = self.helpers.get_cpu_id_from_device_handle(args.cpu) + logging.debug(f"Metric Arg information for CPU {cpu_id} on {self.helpers.os_info()}") + + static_dict = {} + if (args.cpu_power_metrics): + static_dict["power_metrics"] = {} + try: + soc_pow = amdsmi_interface.amdsmi_get_cpu_socket_power(args.cpu) + static_dict["power_metrics"]["socket power"] = soc_pow + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["power_metrics"]["socket power"] = "N/A" + logging.debug("Failed to get socket power for cpu %s | %s", cpu_id, e.get_error_info()) + + try: + soc_pow_limit = amdsmi_interface.amdsmi_get_cpu_socket_power_cap(args.cpu) + static_dict["power_metrics"]["socket power limit"] = soc_pow_limit + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["power_metrics"]["socket power limit"] = "N/A" + logging.debug("Failed to get socket power limit for cpu %s | %s", cpu_id, e.get_error_info()) + + try: + soc_max_pow_limit = amdsmi_interface.amdsmi_get_cpu_socket_power_cap_max(args.cpu) + static_dict["power_metrics"]["socket max power limit"] = soc_max_pow_limit + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["power_metrics"]["socket max power limit"] = "N/A" + logging.debug("Failed to get max socket power limit for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_prochot): + static_dict["prochot"] = {} + try: + proc_status = amdsmi_interface.amdsmi_get_cpu_prochot_status(args.cpu) + static_dict["prochot"]["prochot_status"] = proc_status + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["prochot"]["prochot_status"] = "N/A" + logging.debug("Failed to get prochot status for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_freq_metrics): + static_dict["freq_metrics"] = {} + try: + fclk_mclk = amdsmi_interface.amdsmi_get_cpu_fclk_mclk(args.cpu) + static_dict["freq_metrics"]["fclkmemclk"] = fclk_mclk + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["freq_metrics"]["fclkmemclk"] = "N/A" + logging.debug("Failed to get current fclkmemclk freq for cpu %s | %s", cpu_id, e.get_error_info()) + + try: + cclk_freq = amdsmi_interface.amdsmi_get_cpu_cclk_limit(args.cpu) + static_dict["freq_metrics"]["cclkfreqlimit"] = cclk_freq + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["freq_metrics"]["cclkfreqlimit"] = "N/A" + logging.debug("Failed to get current cclk freq for cpu %s | %s", cpu_id, e.get_error_info()) + + try: + soc_cur_freq_limit = amdsmi_interface.amdsmi_get_cpu_socket_current_active_freq_limit(args.cpu) + static_dict["freq_metrics"]["soc_current_active_freq_limit"] = soc_cur_freq_limit + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["freq_metrics"]["soc_current_active_freq_limit"] = "N/A" + logging.debug("Failed to get socket current freq limit for cpu %s | %s", cpu_id, e.get_error_info()) + + try: + soc_freq_range = amdsmi_interface.amdsmi_get_cpu_socket_freq_range(args.cpu) + static_dict["freq_metrics"]["soc_freq_range"] = soc_freq_range + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["freq_metrics"]["soc_freq_range"] = "N/A" + logging.debug("Failed to get socket freq range for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_c0_res): + static_dict["c0_residency"] = {} + try: + residency = amdsmi_interface.amdsmi_get_cpu_socket_c0_residency(args.cpu) + static_dict["c0_residency"]["residency"] = residency + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["c0_residency"]["residency"] = "N/A" + logging.debug("Failed to get C0 residency for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_lclk_dpm_level): + static_dict["socket_dpm"] = {} + try: + dpm_val = amdsmi_interface.amdsmi_get_cpu_socket_lclk_dpm_level(args.cpu, + args.cpu_lclk_dpm_level[0][0]) + static_dict["socket_dpm"]["dpml_level_range"] = dpm_val + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["socket_dpm"]["dpml_level_range"] = dpm_val + logging.debug("Failed to get socket dpm level range for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_pwr_svi_telemtry_rails): + static_dict["svi_telemetry_all_rails"] = {} + try: + power = amdsmi_interface.amdsmi_get_cpu_pwr_svi_telemetry_all_rails(args.cpu) + static_dict["svi_telemetry_all_rails"]["power"] = power + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["c0_residency"]["residency"] = "N/A" + logging.debug("Failed to get svi telemetry all rails for cpu %s | %s", cpu_id, e.get_error_info()) + if (args.cpu_io_bandwidth): + static_dict["io_bandwidth"] = {} + try: + bandwidth = amdsmi_interface.amdsmi_get_cpu_current_io_bandwidth(args.cpu, + int(args.cpu_io_bandwidth[0][0]), + args.cpu_io_bandwidth[0][1]) + static_dict["io_bandwidth"]["band_width"] = bandwidth + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["io_bandwidth"]["band_width"] = "N/A" + logging.debug("Failed to get io bandwidth for cpu %s | %s", cpu_id, e.get_error_info()) + if (args.cpu_xgmi_bandwidth): + static_dict["xgmi_bandwidth"] = {} + try: + bandwidth = amdsmi_interface.amdsmi_get_cpu_current_xgmi_bw(args.cpu, + int(args.cpu_xgmi_bandwidth[0][0]), + args.cpu_xgmi_bandwidth[0][1]) + static_dict["xgmi_bandwidth"]["band_width"] = bandwidth + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["xgmi_bandwidth"]["band_width"] = "N/A" + logging.debug("Failed to get xgmi bandwidth for cpu %s | %s", cpu_id, e.get_error_info()) + if (args.cpu_enable_apb): + static_dict["apbenable"] = {} + try: + amdsmi_interface.amdsmi_cpu_apb_enable(args.cpu) + static_dict["apbenable"]["state"] = "Enabled DF - Pstate performance boost algorithm" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["apbenable"]["state"] = "N/A" + logging.debug("Failed to enable APB for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_disable_apb): + static_dict["apbdisable"] = {} + try: + amdsmi_interface.amdsmi_cpu_apb_disable(args.cpu, args.cpu_disable_apb[0][0]) + static_dict["apbdisable"]["state"] = "Disabled DF - Pstate performance boost algorithm" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["apbdisable"]["state"] = "N/A" + logging.debug("Failed to enable APB for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.set_cpu_pow_limit): + static_dict["set_pow_limit"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_socket_power_cap(args.cpu, args.set_cpu_pow_limit[0][0]) + static_dict["set_pow_limit"]["Response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_pow_limit"]["Response"] = "Set Operation successful" + logging.debug("Failed to set power limit for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.set_cpu_xgmi_link_width): + static_dict["set_xgmi_link_width"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_xgmi_width(args.cpu, args.set_cpu_xgmi_link_width[0][0], + args.set_cpu_xgmi_link_width[0][1]) + static_dict["set_xgmi_link_width"]["Response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_xgmi_link_width"]["Response"] = "N/A" + logging.debug("Failed to set xgmi link width for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.set_cpu_lclk_dpm_level): + static_dict["set_lclk_dpm_level"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_socket_lclk_dpm_level(args.cpu, args.set_cpu_lclk_dpm_level[0][0], + args.set_cpu_lclk_dpm_level[0][1], + args.set_cpu_lclk_dpm_level[0][2]) + static_dict["set_lclk_dpm_level"]["Response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_lclk_dpm_level"]["Response"] = "N/A" + logging.debug("Failed to set lclk dpm level for cpu %s | %s", cpu_id, e.get_error_info()) + if (args.set_soc_boost_limit): + static_dict["set_soc_boost_limit"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_socket_boostlimit(args.cpu, args.set_soc_boost_limit[0][0]) + static_dict["set_soc_boost_limit"]["Response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_soc_boost_limit"]["Response"] = "N/A" + logging.debug("Failed to set socket boost limit for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_metrics_ver): + static_dict["metric_version"] = {} + try: + version = amdsmi_interface.amdsmi_get_metrics_table_version(args.cpu) + static_dict["metric_version"]["version"] = version + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["metric_version"]["version"] = "N/A" + logging.debug("Failed to get metrics table version for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_metrics_table): + static_dict["metrics_table"] = {} + static_dict["metrics_table"]["response"] = "N/A" + # Note:- amdsmi_get_metrics_table has been disabled as there is fix needed in the library API and will be + # in next version + """try: + metrics_table = amdsmi_interface.amdsmi_get_metrics_table(args.cpu) + static_dict["metrics_table"]["response"] = metrics_table + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["metrics_table"]["response"] = "N/A" + logging.debug("Failed to get metrics table for cpu %s | %s", cpu_id, e.get_error_info())""" + + if (args.socket_energy): + static_dict["socket_energy"] = {} + try: + energy = amdsmi_interface.amdsmi_get_cpu_socket_energy(args.cpu) + static_dict["socket_energy"]["response"] = energy + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["socket_energy"]["response"] = "N/A" + logging.debug("Failed to get socket energy for cpu %s | %s", cpu_id, e.get_error_info()) + + if(args.set_cpu_pwr_eff_mode): + static_dict["set_pwr_eff_mode"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_pwr_efficiency_mode(args.cpu, args.set_cpu_pwr_eff_mode[0][0]) + static_dict["set_pwr_eff_mode"]["Response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_pwr_eff_mode"]["Response"] = "N/A" + logging.debug("Failed to set power efficiency mode for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_ddr_bandwidth): + static_dict["ddr_bandwidth"] = {} + try: + resp = amdsmi_interface.amdsmi_get_cpu_ddr_bw(args.cpu) + static_dict["ddr_bandwidth"]["response"] = resp + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["ddr_bandwidth"]["response"] = "N/A" + logging.debug("Failed to get ddr bandwdith for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_temp): + static_dict["cpu_temp"] = {} + try: + resp = amdsmi_interface.amdsmi_get_cpu_socket_temperature(args.cpu) + static_dict["cpu_temp"]["response"] = resp + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["cpu_temp"]["response"] = "N/A" + logging.debug("Failed to get cpu temperature for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_dimm_temp_range_rate): + static_dict["dimm_temp_range_rate"] = {} + try: + resp = amdsmi_interface.amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(args.cpu, args.cpu_dimm_temp_range_rate[0][0]) + static_dict["dimm_temp_range_rate"]["response"] = resp + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["dimm_temp_range_rate"]["response"] = "N/A" + logging.debug("Failed to get dimm temperature range and refresh rate for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_dimm_pow_conumption): + static_dict["dimm_pow_conumption"] = {} + try: + resp = amdsmi_interface.amdsmi_get_cpu_dimm_power_consumption(args.cpu, args.cpu_dimm_pow_conumption[0][0]) + static_dict["dimm_pow_conumption"]["response"] = resp + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["dimm_pow_conumption"]["response"] = "N/A" + logging.debug("Failed to get dimm temperature range and refresh rate for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.cpu_dimm_thermal_sensor): + static_dict["dimm_thermal_sensor"] = {} + try: + resp = amdsmi_interface.amdsmi_get_cpu_dimm_thermal_sensor(args.cpu, args.cpu_dimm_thermal_sensor[0][0]) + static_dict["dimm_thermal_sensor"]["response"] = resp + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["dimm_thermal_sensor"]["response"] = "N/A" + logging.debug("Failed to get dimm temperature range and refresh rate for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.set_cpu_gmi3_link_width): + static_dict["set_gmi3_link_width"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_gmi3_link_width_range(args.cpu, args.set_cpu_gmi3_link_width[0][0], + args.set_cpu_gmi3_link_width[0][1]) + static_dict["set_gmi3_link_width"]["response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_gmi3_link_width"]["response"] = "N/A" + logging.debug("Failed to set gmi3 link width for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.set_cpu_pcie_lnk_rate): + static_dict["set_pcie_lnk_rate"] = {} + try: + resp = amdsmi_interface.amdsmi_set_cpu_pcie_link_rate(args.cpu, args.set_cpu_pcie_lnk_rate[0][0]) + static_dict["set_pcie_lnk_rate"]["prev_mode"] = resp + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_pcie_lnk_rate"]["prev_mode"] = "N/A" + logging.debug("Failed to set pcie link rate for cpu %s | %s", cpu_id, e.get_error_info()) + + if (args.set_cpu_df_pstate_range): + static_dict["set_df_pstate_range"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_df_pstate_range(args.cpu, args.set_cpu_df_pstate_range[0][0], + args.set_cpu_df_pstate_range[0][1]) + static_dict["set_df_pstate_range"]["response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_df_pstate_range"]["response"] = "N/A" + logging.debug("Failed to set df pstate range for cpu %s | %s", cpu_id, e.get_error_info()) + + multiple_devices_csv_override = False + self.logger.store_cpu_output(args.cpu, 'values', static_dict) + if multiple_devices: + self.logger.store_multiple_device_output() + return # Skip printing when there are multiple devices + self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override) + + + def metric_core(self, args, multiple_devices=False, core=None, boost_limit=None, + curr_active_freq_core_limit=None, set_core_boost_limit=None, core_energy=None): + """Get Static information for target core + + Args: + args (Namespace): Namespace containing the parsed CLI args + multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. + core (device_handle, optional): device_handle for target device. Defaults to None. + + Returns: + None: Print output via AMDSMILogger to destination + """ + if core: + args.core = core + if boost_limit: + args.core_boost_limit = boost_limit + if curr_active_freq_core_limit: + args.core_curr_active_freq_core_limit = curr_active_freq_core_limit + if set_core_boost_limit: + args.set_core_boost_limit = boost_limit + if core_energy: + args.core_energy = core_energy + + #store core args that are applicable to the current platform + curr_platform_core_args = ["core_boost_limit", "core_curr_active_freq_core_limit", + "set_core_boost_limit","core_energy"] + curr_platform_core_values = [args.core_boost_limit, args.core_curr_active_freq_core_limit, + args.set_core_boost_limit, args.core_energy] + + # Handle No core passed + if args.core == None: + args.core = self.core_handles + + if (not any(curr_platform_core_values)): + for arg in curr_platform_core_args: + if arg not in (["set_core_boost_limit"]): + setattr(args, arg, True) + + if (len(self.core_handles)): + handled_multiple_cores, device_handle = self.helpers.handle_cores(args, + self.logger, + self.metric_core) + if handled_multiple_cores: + return # This function is recursive + args.core = device_handle + # get core id for logging + core_id = self.helpers.get_core_id_from_device_handle(args.core) + logging.debug(f"Static Arg information for Core {core_id} on {self.helpers.os_info()}") + + static_dict = {} + if (args.core_boost_limit): + static_dict["boost_limit"] ={} + + try: + boost_limit = amdsmi_interface.amdsmi_get_cpu_core_boostlimit(args.core) + static_dict["boost_limit"]["value"] = boost_limit + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["boost_limit"]["value"] = "N/A" + logging.debug("Failed to get core boost limit for core %s | %s", core_id, e.get_error_info()) + if (args.core_curr_active_freq_core_limit): + static_dict["curr_active_freq_core_limit"] = {} + + try: + freq = amdsmi_interface.amdsmi_get_cpu_core_current_freq_limit(args.core) + static_dict["curr_active_freq_core_limit"]["value"] = freq + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["curr_active_freq_core_limit"]["value"] = "N/A" + logging.debug("Failed to get current active frequency core for core %s | %s", core_id, e.get_error_info()) + + if (args.set_core_boost_limit): + static_dict["set_core_boost_limit"] = {} + try: + amdsmi_interface.amdsmi_set_cpu_core_boostlimit(args.core, args.set_core_boost_limit[0][0]) + static_dict["set_core_boost_limit"]["Response"] = "Set Operation successful" + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["set_core_boost_limit"]["Response"] = "N/A" + logging.debug("Failed to set core boost limit for cpu %s | %s", core_id, e.get_error_info()) + + + if (args.core_energy): + static_dict["core_energy"] ={} + try: + energy = amdsmi_interface.amdsmi_get_cpu_core_energy(args.core) + static_dict["core_energy"]["value"] = energy + except amdsmi_exception.AmdSmiLibraryException as e: + static_dict["core_energy"]["value"] = "N/A" + logging.debug("Failed to get core energy for core %s | %s", core_id, e.get_error_info()) + + + multiple_devices_csv_override = False + self.logger.store_core_output(args.core, 'values', static_dict) + if multiple_devices: + self.logger.store_multiple_device_output() + return # Skip printing when there are multiple devices + self.logger.print_output(multiple_device_enabled=multiple_devices_csv_override) + + + def metric(self, args, multiple_devices=False, watching_output=False, gpu=None, + usage=None, watch=None, watch_time=None, iterations=None, power=None, + clock=None, temperature=None, ecc=None, ecc_block=None, pcie=None, + fan=None, voltage_curve=None, overdrive=None, perf_level=None, + xgmi_err=None, energy=None, mem_usage=None, schedule=None, + guard=None, guest_data=None, fb_usage=None, xgmi=None,cpu=None, + cpu_power_metrics=None, prochot=None, freq_metrics=None, c0_res=None, + lclk_dpm_level=None,pwr_svi_telemtry_rails=None, io_bandwidth=None, + xgmi_bandwidth=None, enable_apb=None, disable_apb=None,set_pow_limit=None, + set_xgmi_link_width=None, set_lclk_dpm_level=None, set_soc_boost_limit=None, + metrics_ver=None, metrics_table=None, socket_energy=None,set_pwr_eff_mode=None, + ddr_bandwidth=None, cpu_temp=None, dimm_temp_range_rate=None,dimm_pow_conumption=None, + dimm_thermal_sensor=None, set_gmi3_link_width=None, set_pcie_lnk_rate=None, + set_df_pstate_range=None, core=None, boost_limit=None, + curr_active_freq_core_limit=None, set_core_boost_limit=None, core_energy=None): + """Get Metric information for target gpu + + Args: + args (Namespace): Namespace containing the parsed CLI args + multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False. + watching_output (bool, optional): True if watch option has been set. Defaults to False. + gpu (device_handle, optional): device_handle for target device. Defaults to None. + usage (bool, optional): Value override for args.usage. Defaults to None. + watch (Positive int, optional): Value override for args.watch. Defaults to None. + watch_time (Positive int, optional): Value override for args.watch_time. Defaults to None. + iterations (Positive int, optional): Value override for args.iterations. Defaults to None. + power (bool, optional): Value override for args.power. Defaults to None. + clock (bool, optional): Value override for args.clock. Defaults to None. + temperature (bool, optional): Value override for args.temperature. Defaults to None. + ecc (bool, optional): Value override for args.ecc. Defaults to None. + ecc_block (bool, optional): Value override for args.ecc. Defaults to None. + pcie (bool, optional): Value override for args.pcie. Defaults to None. + fan (bool, optional): Value override for args.fan. Defaults to None. + voltage_curve (bool, optional): Value override for args.voltage_curve. Defaults to None. + overdrive (bool, optional): Value override for args.overdrive. Defaults to None. + perf_level (bool, optional): Value override for args.perf_level. Defaults to None. + xgmi_err (bool, optional): Value override for args.xgmi_err. Defaults to None. + energy (bool, optional): Value override for args.energy. Defaults to None. + mem_usage (bool, optional): Value override for args.mem_usage. Defaults to None. + schedule (bool, optional): Value override for args.schedule. Defaults to None. + guard (bool, optional): Value override for args.guard. Defaults to None. + guest_data (bool, optional): Value override for args.guest_data. Defaults to None. + fb_usage (bool, optional): Value override for args.fb_usage. Defaults to None. + xgmi (bool, optional): Value override for args.xgmi. Defaults to None. + cpu_power_metrics (bool, optional): Value override for args.cpu_power_metrics. Defaults to None + prochot (bool, optional): Value override for args.prochot. Defaults to None. + freq_metrics (bool, optional): Value override for args.freq_metrics. Defaults to None. + c0_res (bool, optional): Value override for args.c0_res. Defaults to None + lclk_dpm_level (list, optional): Value override for args.lclk_dpm_level. Defaults to None + pwr_svi_telemtry_rails (list, optional): value override for args.pwr_svi_telemtry_rails. Defaults to None + io_bandwidth (list, optional): value override for args.io_bandwidth. Defaults to None + xgmi_bandwidth (list, optional): value override for args.xgmi_bandwidth. Defaults to None + enable_apb (bool, optional): Value override for args.enable_apb. Defaults to None + disable_apb (bool, optional): Value override for args.disable_apb. Defaults to None + set_pow_limit (bool, optional): Value override for args.cpu_set_pow_limit. Defaults to None + set_xgmi_link_width (list, optional): Value override for args.set_cpu_xgmi_link_width. Defaults to None + set_lclk_dpm_level (bool, optional): Value override for args.set_cpu_lclk_dpm_level. Defaults to None + boost_limit (bool, optional): Value override for args.boost_limit. Defaults to None + set_soc_boost_limit (list, optional): Value override for args.set_soc_boost_limit. Defaults to None + metrics_ver (bool, optional): Value override for args.cpu_metrics_ver. Defaults to None + metrics_table (bool, optional): Value override for args.cpu_metrics_table. Defaults to None + socket_energy (bool, optional): Value override for args.socket_energy. Defaults to None + set_pwr_eff_mode (list, optional): Value override for args.set_cpu_pwr_eff_mode. Defaults to None + ddr_bandwidth (bool, optional): Value override for args.ddr_bandwidth. Defaults to None + cpu_temp (bool, optional): Value override for args.cpu_temp. Defaults to None + dimm_temp_range_rate (bool, optional): Value override for args.cpu_dimm_temp_range_rate. Defaults to None + dimm_pow_conumption (bool, optional): Value override for args.cpu_dimm_pow_conumption. Defaults to None + dimm_thermal_sensor (bool, optional): Value override for args.cpu_dimm_thermal_sensor. Defaults to None + set_gmi3_link_width (list, optional): Value override for args.set_cpu_gmi3_link_width. Defaults to None + set_pcie_lnk_rate (list, optional): Value override for args.set_cpu_pcie_lnk_rate. Defaults to None + set_df_pstate_range (list, optional): Value override for args.set_cpu_df_pstate_range. Defaults to None + + Raises: + IndexError: Index error if gpu list is empty + + Returns: + None: Print output via AMDSMILogger to destination + """ + gpus = args.gpu + cpus= args.cpu + cores = args.core + gpu_options = any([args.gpu, args.usage,args.watch, args.watch_time, args.iterations, + args.power, args.clock, args.temperature, args.ecc, args.ecc_block, + args.pcie, args.fan, args.voltage_curve, args.overdrive, args.perf_level, + args.xgmi_err, args.energy, args.mem_usage]) + cpu_options = any([args.cpu, args.cpu_power_metrics, args.cpu_prochot, + args.cpu_freq_metrics, args.cpu_c0_res, args.cpu_lclk_dpm_level, + args.cpu_pwr_svi_telemtry_rails, args.cpu_io_bandwidth, args.cpu_xgmi_bandwidth, + args.cpu_enable_apb, args.cpu_disable_apb, args.set_cpu_pow_limit, + args.set_cpu_xgmi_link_width, args.set_cpu_lclk_dpm_level, + args.set_soc_boost_limit,args.cpu_metrics_ver, args.cpu_metrics_table, + args.socket_energy, args.set_cpu_pwr_eff_mode,args.cpu_ddr_bandwidth, + args.cpu_temp, args.cpu_dimm_temp_range_rate, args.cpu_dimm_pow_conumption, + args.cpu_dimm_thermal_sensor, args.set_cpu_gmi3_link_width, + args.set_cpu_pcie_lnk_rate, args.set_cpu_df_pstate_range]) + + core_options = any([args.core_boost_limit, args.core_curr_active_freq_core_limit, + args.set_core_boost_limit, args.core_energy]) + if ((len(self.device_handles) and ((((not gpus) and (not cpus) and (not cores)) or gpus) + and not cpu_options and not core_options))): + self.metric_gpu( args, multiple_devices, watching_output, gpu, + usage, watch, watch_time, iterations, power, + clock, temperature, ecc, ecc_block, pcie, + fan, voltage_curve, overdrive, perf_level, + xgmi_err, energy, mem_usage, schedule, + guard, guest_data, fb_usage, xgmi) + + if ((len(self.cpu_handles) and ((((not gpus) and (not cpus) and (not cores)) or cpus) + and not gpu_options and not core_options))): + self.logger.clear_multiple_devices_ouput() + self.metric_cpu(args, multiple_devices, cpu, cpu_power_metrics, prochot, + freq_metrics, c0_res, lclk_dpm_level, pwr_svi_telemtry_rails, + io_bandwidth, xgmi_bandwidth, enable_apb, disable_apb, + set_pow_limit,set_xgmi_link_width, set_lclk_dpm_level, + set_soc_boost_limit, metrics_ver, metrics_table, socket_energy, + set_pwr_eff_mode,ddr_bandwidth, cpu_temp, dimm_temp_range_rate, + dimm_pow_conumption,dimm_thermal_sensor, set_gmi3_link_width, + set_pcie_lnk_rate, set_df_pstate_range) + + + if ((len(self.core_handles) and ((((not gpus) and (not cpus) and (not cores)) or cores) + and not gpu_options and not cpu_options))): + self.logger.clear_multiple_devices_ouput() + self.metric_core(args, multiple_devices, core, boost_limit, + curr_active_freq_core_limit, set_core_boost_limit, + core_energy) + + if (len(self.cpu_handles) == 0 and len(self.device_handles) == 0 and + len(self.core_handles) == 0): + logging.error("No CPU and GPU devices present") + sys.exit(-1) + def process(self, args, multiple_devices=False, watching_output=False, gpu=None, general=None, engine=None, pid=None, name=None, watch=None, watch_time=None, iterations=None): diff --git a/amdsmi_cli/amdsmi_helpers.py b/amdsmi_cli/amdsmi_helpers.py index 9e08bfbc70..91879e721c 100644 --- a/amdsmi_cli/amdsmi_helpers.py +++ b/amdsmi_cli/amdsmi_helpers.py @@ -116,6 +116,97 @@ class AMDSMIHelpers(): return self._is_windows + def get_cpu_choices(self): + """Return dictionary of possible CPU choices and string of the output: + Dictionary will be in format: cpus[ID]: Device Handle) + String output will be in format: + "ID: 0 " + params: + None + return: + (dict, str) : (cpu_choices, cpu_choices_str) + """ + cpu_choices = {} + cpu_choices_str = "" + #import pdb;pdb.set_trace() + try: + cpu_handles = [] + # amdsmi_get_cpusocket_handles() returns the cpu socket handles stored for cpu_id + cpu_handles = amdsmi_interface.amdsmi_get_cpusocket_handles() + except amdsmi_interface.AmdSmiLibraryException as e: + if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, + amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): + logging.info('Unable to get device choices, driver not initialized (amdhsmp not found in modules)') + else: + raise e + if len(cpu_handles) == 0: + logging.info('Unable to find any devices, check if driver is initialized (amdhsmp not found in modules)') + else: + # Handle spacing for the gpu_choices_str + max_padding = int(math.log10(len(cpu_handles))) + 1 + + for cpu_id, device_handle in enumerate(cpu_handles): + cpu_choices[str(cpu_id)] = { + "Device Handle": device_handle + } + if cpu_id == 0: + id_padding = max_padding + else: + id_padding = max_padding - int(math.log10(cpu_id)) + cpu_choices_str += f"ID: {cpu_id}\n" + + # Add the all option to the gpu_choices + cpu_choices["all"] = "all" + cpu_choices_str += f" all{' ' * max_padding}| Selects all devices\n" + + return (cpu_choices, cpu_choices_str) + + def get_core_choices(self): + """Return dictionary of possible Core choices and string of the output: + Dictionary will be in format: coress[ID]: Device Handle) + String output will be in format: + "ID: 0 " + params: + None + return: + (dict, str) : (core_choices, core_choices_str) + """ + core_choices = {} + core_choices_str = "" + + try: + core_handles = [] + # amdsmi_get_cpucore_handles() returns the core handles stored for core_id + core_handles = amdsmi_interface.amdsmi_get_cpucore_handles() + except amdsmi_interface.AmdSmiLibraryException as e: + if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, + amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): + logging.info('Unable to get device choices, driver not initialized (amdhsmp not found in modules)') + else: + raise e + if len(core_handles) == 0: + logging.info('Unable to find any devices, check if driver is initialized (amdhsmp not found in modules)') + else: + # Handle spacing for the gpu_choices_str + max_padding = int(math.log10(len(core_handles))) + 1 + + for core_id, device_handle in enumerate(core_handles): + core_choices[str(core_id)] = { + "Device Handle": device_handle + } + if core_id == 0: + id_padding = max_padding + else: + id_padding = max_padding - int(math.log10(core_id)) + core_choices_str += f"ID: 0 - {len(core_handles) - 1}\n" + + # Add the all option to the core_choices + core_choices["all"] = "all" + core_choices_str += f" all{' ' * max_padding}| Selects all devices\n" + + return (core_choices, core_choices_str) + + def get_output_format(self): """Returns the output format read from sys.argv Returns: @@ -142,6 +233,7 @@ class AMDSMIHelpers(): """ gpu_choices = {} gpu_choices_str = "" + device_handles = [] try: # amdsmi_get_processor_handles returns the device_handles storted for gpu_id @@ -149,36 +241,34 @@ class AMDSMIHelpers(): except amdsmi_interface.AmdSmiLibraryException as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): - logging.error('Unable to get device choices, driver not initialized (amdgpu not found in modules)') - sys.exit(-1) + logging.info('Unable to get device choices, driver not initialized (amdgpu not found in modules)') else: raise e if len(device_handles) == 0: - logging.error('Unable to find any devices, check if driver is initialized (amdgpu not found in modules)') - sys.exit(-1) + logging.info('Unable to find any devices, check if driver is initialized (amdgpu not found in modules)') + else: + # Handle spacing for the gpu_choices_str + max_padding = int(math.log10(len(device_handles))) + 1 - # Handle spacing for the gpu_choices_str - max_padding = int(math.log10(len(device_handles))) + 1 + for gpu_id, device_handle in enumerate(device_handles): + bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(device_handle) + uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(device_handle) + gpu_choices[str(gpu_id)] = { + "BDF": bdf, + "UUID": uuid, + "Device Handle": device_handle, + } - for gpu_id, device_handle in enumerate(device_handles): - bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(device_handle) - uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(device_handle) - gpu_choices[str(gpu_id)] = { - "BDF": bdf, - "UUID": uuid, - "Device Handle": device_handle, - } + if gpu_id == 0: + id_padding = max_padding + else: + id_padding = max_padding - int(math.log10(gpu_id)) + gpu_choices_str += f"ID: {gpu_id}{' ' * id_padding}| BDF: {bdf} | UUID: {uuid}\n" - if gpu_id == 0: - id_padding = max_padding - else: - id_padding = max_padding - int(math.log10(gpu_id)) - gpu_choices_str += f"ID: {gpu_id}{' ' * id_padding}| BDF: {bdf} | UUID: {uuid}\n" - - # Add the all option to the gpu_choices - gpu_choices["all"] = "all" - gpu_choices_str += f" all{' ' * max_padding}| Selects all devices\n" + # Add the all option to the gpu_choices + gpu_choices["all"] = "all" + gpu_choices_str += f" all{' ' * max_padding}| Selects all devices\n" return (gpu_choices, gpu_choices_str) @@ -234,11 +324,89 @@ class AMDSMIHelpers(): return True, selected_device_handles - def handle_gpus(self, args, logger, subcommand): + def get_device_handles_from_cpu_selections(self, cpu_selections: List[str], cpu_choices=None): + """Convert provided cpu_selections to device_handles + + Args: + cpu_selections (list[str]): Selected CPU ID(s): + ex: ID:0 + cpu_choices (dict{cpu_choices}): This is a dictionary of the possible cpu_choices + Returns: + (True, list[device_handles]): Returns a list of all the cpu_selections converted to + amdsmi device_handles + (False, str): Return False, and the first input that failed to be converted + """ + if 'all' in cpu_selections: + return (True, amdsmi_interface.amdsmi_get_cpusocket_handles()) + + if isinstance(cpu_selections, str): + cpu_selections = [cpu_selections] + + if cpu_choices is None: + cpu_choices = self.get_cpu_choices()[0] + + selected_device_handles = [] + for cpu_selection in cpu_selections: + valid_cpu_choice = False + for cpu_id, cpu_info in cpu_choices.items(): + device_handle = cpu_info['Device Handle'] + + # Check if passed gpu is a gpu ID + if cpu_selection == cpu_id: + selected_device_handles.append(device_handle) + valid_cpu_choice = True + break + if not valid_cpu_choice: + logging.debug(f"AMDSMIHelpers.get_device_handles_from_cpu_selections - Unable to convert {cpu_selection}") + return False, cpu_selection + return True, selected_device_handles + + + def get_device_handles_from_core_selections(self, core_selections: List[str], core_choices=None): + """Convert provided core_selections to device_handles + + Args: + core_selections (list[str]): Selected CORE ID(s): + ex: ID:0 + core_choices (dict{core_choices}): This is a dictionary of the possible core_choices + Returns: + (True, list[device_handles]): Returns a list of all the core_selections converted to + amdsmi device_handles + (False, str): Return False, and the first input that failed to be converted + """ + if 'all' in core_selections: + return (True, amdsmi_interface.amdsmi_get_cpucore_handles()) + + if isinstance(core_selections, str): + core_selections = [core_selections] + + if core_choices is None: + core_choices = self.get_core_choices()[0] + + selected_device_handles = [] + for core_selection in core_selections: + valid_cpu_choice = False + for core_id, core_info in core_choices.items(): + device_handle = core_info['Device Handle'] + + # Check if passed core is a core ID + if core_selection == core_id: + selected_device_handles.append(device_handle) + valid_core_choice = True + break + if not valid_core_choice: + logging.debug(f"AMDSMIHelpers.get_device_handles_from_core_selections - Unable to convert {core_selection}") + return False, core_selection + return True, selected_device_handles + + + def handle_gpus(self, args,logger, subcommand): """This function will run execute the subcommands based on the number of gpus passed in via args. params: args - argparser args to pass to subcommand + current_platform_args (list) - GPU supported platform arguments + current_platform_values (list) - GPU supported values for the arguments logger (AMDSMILogger) - Logger to print out output subcommand (AMDSMICommands) - Function that can handle multiple gpus @@ -260,11 +428,72 @@ class AMDSMIHelpers(): args.gpu = args.gpu[0] return False, args.gpu else: - raise IndexError("args.gpu should not be an empty list") + logging.debug("args.gpu has an empty list") else: return False, args.gpu + def handle_cpus(self, args, logger, subcommand): + """This function will run execute the subcommands based on the number + of cpus passed in via args. + params: + args - argparser args to pass to subcommand + logger (AMDSMILogger) - Logger to print out output + subcommand (AMDSMICommands) - Function that can handle multiple gpus + + return: + tuple(bool, device_handle) : + bool - True if executed subcommand for multiple devices + device_handle - Return the device_handle if the list of devices is a length of 1 + (handled_multiple_gpus, device_handle) + + """ + if isinstance(args.cpu, list): + if len(args.cpu) > 1: + for device_handle in args.cpu: + # Handle multiple_devices to print all output at once + subcommand(args, multiple_devices=True, cpu=device_handle) + logger.print_output(multiple_device_enabled=True) + return True, args.cpu + elif len(args.cpu) == 1: + args.cpu = args.cpu[0] + return False, args.cpu + else: + logging.debug("args.cpu has empty list") + else: + return False, args.cpu + + def handle_cores(self, args, logger, subcommand): + """This function will run execute the subcommands based on the number + of cores passed in via args. + params: + args - argparser args to pass to subcommand + logger (AMDSMILogger) - Logger to print out output + subcommand (AMDSMICommands) - Function that can handle multiple gpus + + return: + tuple(bool, device_handle) : + bool - True if executed subcommand for multiple devices + device_handle - Return the device_handle if the list of devices is a length of 1 + (handled_multiple_gpus, device_handle) + + """ + if isinstance(args.core, list): + if len(args.core) > 1: + for device_handle in args.core: + # Handle multiple_devices to print all output at once + subcommand(args, multiple_devices=True, core=device_handle) + logger.print_output(multiple_device_enabled=True) + return True, args.core + elif len(args.core) == 1: + args.core = args.core[0] + return False, args.core + else: + logging.debug("args.core has empty list") + else: + return False, args.core + + def handle_watch(self, args, subcommand, logger): """This function will run the subcommand multiple times based on the passed watch, watch_time, and iterations passed in. @@ -326,6 +555,31 @@ class AMDSMIHelpers(): "Unable to find gpu ID from device_handle") + def get_cpu_id_from_device_handle(self, input_device_handle): + """Get the cpu index from the device_handle. + amdsmi_interface.amdsmi_get_cpusocket_handles() returns the list of device_handles in order of cpu_index + """ + device_handles = amdsmi_interface.amdsmi_get_cpusocket_handles() + for cpu_index, device_handle in enumerate(device_handles): + if input_device_handle.value == device_handle.value: + return cpu_index + raise amdsmi_exception.AmdSmiParameterException(input_device_handle, + amdsmi_interface.amdsmi_wrapper.amdsmi_processor_handle, + "Unable to find cpu ID from device_handle") + + def get_core_id_from_device_handle(self, input_device_handle): + """Get the core index from the device_handle. + amdsmi_interface.amdsmi_get_cpusocket_handles() returns the list of device_handles in order of cpu_index + """ + device_handles = amdsmi_interface.amdsmi_get_cpucore_handles() + for core_index, device_handle in enumerate(device_handles): + if input_device_handle.value == device_handle.value: + return core_index + raise amdsmi_exception.AmdSmiParameterException(input_device_handle, + amdsmi_interface.amdsmi_wrapper.amdsmi_processor_handle, + "Unable to find core ID from device_handle") + + def get_amd_gpu_bdfs(self): """Return a list of GPU BDFs visibile to amdsmi diff --git a/amdsmi_cli/amdsmi_init.py b/amdsmi_cli/amdsmi_init.py index 8e56cbbd3a..dc742d091a 100644 --- a/amdsmi_cli/amdsmi_init.py +++ b/amdsmi_cli/amdsmi_init.py @@ -53,17 +53,41 @@ def check_amdgpu_driver(): return False +def check_amdhsmp_driver(): + """ Returns true if amd hsmp is found in the list of initialized modules """ + amd_cpu_status_file = Path("/sys/module/amd_hsmp/initstate") + if amd_cpu_status_file.exists(): + if amd_cpu_status_file.read_text(encoding="ascii").strip() == "live": + return True + return False + + def init_amdsmi(flag=amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS): """ Initializes AMDSMI Raises: err: AmdSmiLibraryException if not successful """ + gpu_flag = False; + cpu_flag = False; + + # Check if both the amdgpu and amdhsmp driver is up and handle error gracefully + if check_amdgpu_driver() and check_amdhsmp_driver(): + # init AMD APUS + try: + amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_APUS) + except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e: + if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, + amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): + logging.error("Drivers not loaded (amdgpu and hsmp drivers not found in modules)") + sys.exit(-1) + else: + raise e # # Check if amdgpu driver is up & Handle error gracefully - if check_amdgpu_driver(): + elif check_amdgpu_driver(): # Only init AMD GPUs for now, waiting for future support for AMD CPUs try: - amdsmi_interface.amdsmi_init(flag) + amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS) except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e: if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): @@ -72,9 +96,23 @@ def init_amdsmi(flag=amdsmi_interface.AmdSmiInitFlags.INIT_AMD_GPUS): else: raise e logging.debug("AMDSMI initialized successfully, but initstate was not live") + + elif check_amdhsmp_driver(): + # Only init AMD CPUs + try: + amdsmi_interface.amdsmi_init(amdsmi_interface.AmdSmiInitFlags.INIT_AMD_CPUS) + cpu_flag = True + except (amdsmi_interface.AmdSmiLibraryException, amdsmi_interface.AmdSmiParameterException) as e: + if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT, + amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED): + logging.error("Driver not loaded (hsmp not found in modules)") + sys.exit(-1) + else: + raise e else: - logging.error("Driver not found (amdgpu not found in modules)") - sys.exit(-1) + pass + + logging.debug("AMDSMI initialized successfully") def shut_down_amdsmi(): diff --git a/amdsmi_cli/amdsmi_logger.py b/amdsmi_cli/amdsmi_logger.py index fb2c2dbecf..52286918b8 100644 --- a/amdsmi_cli/amdsmi_logger.py +++ b/amdsmi_cli/amdsmi_logger.py @@ -72,6 +72,8 @@ class AMDSMILogger(): def is_human_readable_format(self): return self.format == self.LoggerFormat.human_readable.value + def clear_multiple_devices_ouput(self): + self.multiple_device_output.clear() def _capitalize_keys(self, input_dict): output_dict = {} @@ -216,6 +218,75 @@ class AMDSMILogger(): self._store_output_amdsmi(gpu_id=gpu_id, argument=argument, data=data) + def store_cpu_output(self, device_handle, argument, data): + """ Convert device handle to cpu id and store output + params: + device_handle - device handle object to the target device output + argument (str) - key to store data + data (dict | list) - Data store against argument + return: + Nothing + """ + cpu_id = self.helpers.get_cpu_id_from_device_handle(device_handle) + self._store_cpu_output_amdsmi(cpu_id=cpu_id, argument=argument, data=data) + + + def store_core_output(self, device_handle, argument, data): + """ Convert device handle to core id and store output + params: + device_handle - device handle object to the target device output + argument (str) - key to store data + data (dict | list) - Data store against argument + return: + Nothing + """ + core_id = self.helpers.get_core_id_from_device_handle(device_handle) + self._store_core_output_amdsmi(core_id=core_id, argument=argument, data=data) + + def _store_core_output_amdsmi(self, core_id, argument, data): + if argument == 'timestamp': # Make sure timestamp is the first element in the output + self.output['timestamp'] = int(time.time()) + + if self.is_json_format() or self.is_human_readable_format(): + self.output['core'] = int(core_id) + if argument == 'values' and isinstance(data, dict): + self.output.update(data) + else: + self.output[argument] = data + elif self.is_csv_format(): + self.output['core'] = int(core_id) + + if argument == 'values' or isinstance(data, dict): + flat_dict = self.flatten_dict(data) + self.output.update(flat_dict) + else: + self.output[argument] = data + else: + raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported") + + + def _store_cpu_output_amdsmi(self, cpu_id, argument, data): + if argument == 'timestamp': # Make sure timestamp is the first element in the output + self.output['timestamp'] = int(time.time()) + + if self.is_json_format() or self.is_human_readable_format(): + self.output['cpu'] = int(cpu_id) + if argument == 'values' and isinstance(data, dict): + self.output.update(data) + else: + self.output[argument] = data + elif self.is_csv_format(): + self.output['cpu'] = int(cpu_id) + + if argument == 'values' or isinstance(data, dict): + flat_dict = self.flatten_dict(data) + self.output.update(flat_dict) + else: + self.output[argument] = data + else: + raise amdsmi_cli_exceptions(self, "Invalid output format given, only json, csv, and human_readable supported") + + def _store_output_amdsmi(self, gpu_id, argument, data): if argument == 'timestamp': # Make sure timestamp is the first element in the output self.output['timestamp'] = int(time.time()) diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index 4828854cd6..3a1496bc15 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -72,6 +72,8 @@ class AMDSMIParser(argparse.ArgumentParser): # Helper variables self.helpers = AMDSMIHelpers() self.gpu_choices, self.gpu_choices_str = self.helpers.get_gpu_choices() + self.cpu_choices, self.cpu_choices_str = self.helpers.get_cpu_choices() + self.core_choices, self.core_choices_str = self.helpers.get_core_choices() self.vf_choices = ['3', '2', '1'] version_string = f"Version: {__version__}" @@ -233,6 +235,56 @@ class AMDSMIParser(argparse.ArgumentParser): return _GPUSelectAction + def _cpu_select(self, cpu_choices): + """ Custom argparse action to return the device handle(s) for the cpu(s) selected + This will set the destination (args.cpu) to a list of 1 or more device handles + If 1 or more device handles are not found then raise an ArgumentError for the first invalid cpu seen + """ + amdsmi_helpers = self.helpers + class _CPUSelectAction(argparse.Action): + ouputformat=self.helpers.get_output_format() + # Checks the values + def __call__(self, parser, args, values, option_string=None): + if "all" in cpu_choices: + del cpu_choices["all"] + status, selected_device_handles = amdsmi_helpers.get_device_handles_from_cpu_selections(cpu_selections=values, + cpu_choices=cpu_choices) + if status: + setattr(args, self.dest, selected_device_handles) + else: + if selected_device_handles == '': + raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--cpu", _CPUSelectAction.ouputformat) + else: + raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, + _CPUSelectAction.ouputformat) + return _CPUSelectAction + + + def _core_select(self, core_choices): + """ Custom argparse action to return the device handle(s) for the core(s) selected + This will set the destination (args.core) to a list of 1 or more device handles + If 1 or more device handles are not found then raise an ArgumentError for the first invalid core seen + """ + amdsmi_helpers = self.helpers + class _CoreSelectAction(argparse.Action): + ouputformat=self.helpers.get_output_format() + # Checks the values + def __call__(self, parser, args, values, option_string=None): + if "all" in core_choices: + del core_choices["all"] + status, selected_device_handles = amdsmi_helpers.get_device_handles_from_core_selections(core_selections=values, + core_choices=core_choices) + if status: + setattr(args, self.dest, selected_device_handles) + else: + if selected_device_handles == '': + raise amdsmi_cli_exceptions.AmdSmiMissingParameterValueException("--core", _CoreSelectAction.ouputformat) + else: + raise amdsmi_cli_exceptions.AmdSmiDeviceNotFoundException(selected_device_handles, + _CoreSelectAction.ouputformat) + return _CoreSelectAction + + def _add_command_modifiers(self, subcommand_parser): json_help = "Displays output in JSON format (human readable by default)." csv_help = "Displays output in CSV format (human readable by default)." @@ -274,11 +326,18 @@ class AMDSMIParser(argparse.ArgumentParser): gpu_help = f"Select a GPU ID, BDF, or UUID from the possible choices:\n{self.gpu_choices_str}" vf_help = "Gets general information about the specified VF (timeslice, fb info, …).\ \nAvailable only on virtualization OSs" + cpu_help = f"Select a CPU ID from the possible choices:\n{self.cpu_choices_str}" + core_help = f"Select a Core ID from the possible choices:\n{self.core_choices_str}" + # Mutually Exclusive Args within the subparser device_args = subcommand_parser.add_mutually_exclusive_group(required=required) device_args.add_argument('-g', '--gpu', action=self._gpu_select(self.gpu_choices), nargs='+', help=gpu_help) + device_args.add_argument('-U', '--cpu', action=self._cpu_select(self.cpu_choices), + nargs='+', help=cpu_help) + device_args.add_argument('-O', '--core', action=self._core_select(self.core_choices), + nargs='+', help=core_help) if self.helpers.is_hypervisor(): device_args.add_argument('-v', '--vf', action='store', nargs='+', @@ -345,11 +404,16 @@ class AMDSMIParser(argparse.ArgumentParser): fb_help = "Displays Frame Buffer information" num_vf_help = "Displays number of supported and enabled VFs" + # Options arguments help text for cpu + smu_help = "All SMU FW information" + interface_help = "Displays hsmp interface version" + # Create static subparser static_parser = subparsers.add_parser('static', help=static_help, description=static_subcommand_help) static_parser._optionals.title = static_optionals_title static_parser.formatter_class=lambda prog: AMDSMISubparserHelpFormatter(prog) static_parser.set_defaults(func=func) + cpu_group = static_parser.add_argument_group("CPU Option") # Add Universal Arguments self._add_command_modifiers(static_parser) @@ -363,7 +427,8 @@ class AMDSMIParser(argparse.ArgumentParser): static_parser.add_argument('-v', '--vram', action='store_true', required=False, help=vram_help) static_parser.add_argument('-c', '--cache', action='store_true', required=False, help=cache_help) static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help) - + cpu_group.add_argument('-s', '--smu', action='store_true', required=False, help=smu_help) + cpu_group.add_argument('-i', '--interface_ver', action='store_true', required=False, help=interface_help) # Options to display on Hypervisors and Baremetal if self.helpers.is_hypervisor() or self.helpers.is_baremetal(): static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help) @@ -475,11 +540,55 @@ class AMDSMIParser(argparse.ArgumentParser): fb_usage_help = "Displays total and used Frame Buffer usage information" xgmi_help = "Table of current XGMI metrics information" + # Help text for cpu options + cpu_power_metrics_help = "Cpu power metrics" + cpu_proc_help = "Displays prochot status" + cpu_freq_help = "Displays currentFclkMemclk frequencies and cclk frequency limit" + cpu_c0_res_help = "Displays C0 residency" + cpu_lclk_dpm_help = "Displays lclk dpm level range. Requires socket ID and nbio id as inputs" + cpu_pwr_svi_telemtry_rails_help = "Displays svi based telemetry for all rails" + cpu_io_bandwidth_help = "Displays current IO bandwidth for the selected CPU.\ + \n input parameters are bandwidth type(1) and link ID encodings\ + \n i.e. P2, P3, G0 - G7" + cpu_xgmi_bandwidth_help = "Displays current XGMI bandwidth for the selected CPU\ + \n input parameters are bandwidth type(1,2,4) and link ID encodings\ + \n i.e. P2, P3, G0 - G7" + cpu_enable_apb_help = "Enables the DF p-state performance boost algorithm" + cpu_disable_apb_help = "Disables the DF p-state performance boost alogorithm." + "Input parameter is DFPstate (0 -3 )" + set_cpu_pow_limit_help = "Set power limit for the given socket. Input parameter is \ +power limit value." + set_cpu_xgmi_link_width_help = "Set max and Min linkwidth. Input parameters are \ +min and max link width values" + set_cpu_lclk_dpm_level_help = "Sets the max and min dpm level on a given NBIO. Inpur parameters are \ +die_index, min dpm, max dpm." + core_boost_limit_help = "Get booslimit for the selected cores" + core_curr_active_freq_core_limit_help = "Get Current CCLK limit set per Core" + set_soc_boost_limit_help = "Sets the boost limit for the given socket. Input parameter is \ +socket limit value" + set_core_boost_limit_help = "Sets the boost limit for the given core. Input parameter is \ +core limit value" + cpu_metrics_ver_help = "Displays metrics table version" + cpu_metrics_table_help = "Displays metric table" + core_energy_help = "Displays core energy for the selected core" + socket_energy_help = "Displays socket energy for the selected socket" + set_cpu_pwr_eff_mode_help = "Sets the power efficency mode policy. Input parameter is mode." + cpu_ddr_bandwidth_help = "Displays per socket max ddr bw, current utilized bw and current utilized ddr bw in percentage" + cpu_temp_help = "Displays cpu socket temperature" + cpu_dimm_temp_range_rate_help = "Displays dimm temperature range and refresh rate" + cpu_dimm_pow_conumption_help = "Displays dimm power conumption" + cpu_dimm_thermal_sensor_help = "Displays dimm thermal sensor" + set_cpu_gmi3_link_width_help = "Sets max and min gmi3 link width range" + set_cpu_pcie_lnk_rate_help = "Sets pcie link rate" + set_cpu_df_pstate_range_help = "Sets max and min df-pstates" + # Create metric subparser metric_parser = subparsers.add_parser('metric', help=metric_help, description=metric_subcommand_help) metric_parser._optionals.title = metric_optionals_title metric_parser.formatter_class=lambda prog: AMDSMISubparserHelpFormatter(prog) metric_parser.set_defaults(func=func) + cpu_group = metric_parser.add_argument_group("CPU Option") + set_group = metric_parser.add_argument_group("Set Options") # Add Universal Arguments self._add_command_modifiers(metric_parser) @@ -519,6 +628,36 @@ class AMDSMIParser(argparse.ArgumentParser): metric_parser.add_argument('-f', '--fb_usage', action='store_true', required=False, help=fb_usage_help) metric_parser.add_argument('-m', '--xgmi', action='store_true', required=False, help=xgmi_help) + cpu_group.add_argument('--cpu_power_metrics', action='store_true', required=False, help=cpu_power_metrics_help) + cpu_group.add_argument('--cpu_prochot', action='store_true', required=False, help=cpu_proc_help) + cpu_group.add_argument('--cpu_freq_metrics', action='store_true', required=False, help=cpu_freq_help) + cpu_group.add_argument('--cpu_c0_res', action='store_true', required=False, help=cpu_c0_res_help) + cpu_group.add_argument('--cpu_lclk_dpm_level', action='append', required=False, type=int, nargs=1, metavar=("NBIOID"), help=cpu_lclk_dpm_help) + cpu_group.add_argument('--cpu_pwr_svi_telemtry_rails', action='store_true', required=False, help=cpu_pwr_svi_telemtry_rails_help) + cpu_group.add_argument('--cpu_io_bandwidth', action='append', required=False, nargs=2, metavar=("IO_BW","LINKID_NAME"), help=cpu_io_bandwidth_help) + cpu_group.add_argument('--cpu_xgmi_bandwidth', action='append', required=False, nargs=2, metavar=("XGMI_BW","LINKID_NAME"), help=cpu_xgmi_bandwidth_help) + cpu_group.add_argument('--cpu_enable_apb', action='store_true', required=False, help=cpu_enable_apb_help) + cpu_group.add_argument('--cpu_disable_apb', action='append', required=False, type=int, nargs=1, metavar=("DF_PSTATE"), help=cpu_disable_apb_help) + set_group.add_argument('--set_cpu_pow_limit', action='append', required=False, type=int, nargs=1, metavar=("POW_LIMIT"),help=set_cpu_pow_limit_help) + set_group.add_argument('--set_cpu_xgmi_link_width', action='append', required=False, type=int, nargs=2, metavar=("MIN_WIDTH", "MAX_WIDTH"), help=set_cpu_xgmi_link_width_help) + set_group.add_argument('--set_cpu_lclk_dpm_level', action='append', required=False, type=int, nargs=3, metavar=("NBIOID", "MIN_DPM", "MAX_DPM"),help=set_cpu_lclk_dpm_level_help) + cpu_group.add_argument('--core_boost_limit', action='store_true', required=False, help=core_boost_limit_help) + cpu_group.add_argument('--core_curr_active_freq_core_limit', action='store_true', required=False, help=core_curr_active_freq_core_limit_help) + set_group.add_argument('--set_soc_boost_limit', action='append', required=False, type=int, nargs=1, metavar=("BOOST_LIMIT"), help=set_soc_boost_limit_help) + set_group.add_argument('--set_core_boost_limit', action='append', required=False, type=int, nargs=1, metavar=("BOOST_LIMIT"), help=set_core_boost_limit_help) + cpu_group.add_argument('--cpu_metrics_ver', action='store_true', required=False, help=cpu_metrics_ver_help) + cpu_group.add_argument('--cpu_metrics_table', action='store_true', required=False, help=cpu_metrics_table_help) + cpu_group.add_argument('--core_energy', action='store_true', required=False, help=core_energy_help) + cpu_group.add_argument('--socket_energy', action='store_true', required=False, help=socket_energy_help) + set_group.add_argument('--set_cpu_pwr_eff_mode', action='append', required=False, type=int, nargs=1, metavar=("MODE"), help=set_cpu_pwr_eff_mode_help) + cpu_group.add_argument('--cpu_ddr_bandwidth', action='store_true', required=False, help=cpu_ddr_bandwidth_help) + cpu_group.add_argument('--cpu_temp', action='store_true', required=False, help=cpu_temp_help) + cpu_group.add_argument('--cpu_dimm_temp_range_rate', action='append', required=False, type=int, nargs=1, metavar=("DIMM_ADDR"), help=cpu_dimm_temp_range_rate_help) + cpu_group.add_argument('--cpu_dimm_pow_conumption', action='append', required=False, type=int, nargs=1, metavar=("DIMM_ADDR"), help=cpu_dimm_pow_conumption_help) + cpu_group.add_argument('--cpu_dimm_thermal_sensor', action='append', required=False, type=int, nargs=1, metavar=("DIMM_ADDR"), help=cpu_dimm_thermal_sensor_help) + set_group.add_argument('--set_cpu_gmi3_link_width', action='append', required=False, type=int, nargs=2, metavar=("MIN_LW", "MAX_LW"), help=set_cpu_gmi3_link_width_help) + set_group.add_argument('--set_cpu_pcie_lnk_rate', action='append', required=False, type=int, nargs=1, metavar=("LINK_RATE"), help=set_cpu_pcie_lnk_rate_help) + set_group.add_argument('--set_cpu_df_pstate_range', action='append', required=False, type=int, nargs=2, metavar=("MAX_PSTATE", "MIN_PSTATE"), help=set_cpu_df_pstate_range_help) def _add_process_parser(self, subparsers, func): if self.helpers.is_hypervisor():