amdsmi_cli: Add checks for no gpu devices, cpu and core devices

- Add checks for no gpu devices, cpu and core devices
  - Update units for core energy and cpu energy

Change-Id: Ieea43f1bb7fc303ebbbdf72f1ab22644a28df25c


[ROCm/amdsmi commit: 18d8087711]
Этот коммит содержится в:
khashaik
2024-01-04 08:20:19 -05:00
коммит произвёл Galantsev, Dmitrii
родитель c500be9b35
Коммит a66efce2da
2 изменённых файлов: 65 добавлений и 16 удалений
+63 -14
Просмотреть файл
@@ -736,6 +736,10 @@ class AMDSMICommands():
else:
logging.info("No CPU devices present")
if (cpu_options and (len(self.cpu_handles) == 0)):
logging.error("No CPU devices present")
sys.exit(-1)
if (len(self.device_handles) and ((((not gpus) and (not cpus)) or gpus)
and not cpu_options)):
self.logger.clear_multiple_devices_ouput()
@@ -746,6 +750,10 @@ class AMDSMICommands():
else:
logging.info("No GPU devices present")
if (gpu_options and (len(self.device_handles) == 0)):
logging.error("No GPU devices present")
sys.exit(-1)
if (len(self.cpu_handles) == 0 and len(self.device_handles) == 0):
logging.error("No CPU and GPU devices present")
sys.exit(-1)
@@ -1652,7 +1660,32 @@ class AMDSMICommands():
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
cpu (cpu_handle, optional): device_handle for target device. Defaults to None.
cpu_power_metrics (bool, optional): Value override for args.cpu_power_metrics. Defaults to None
prochot (bool, optional): Value override for args.prochot. Defaults to None.
freq_metrics (bool, optional): Value override for args.freq_metrics. Defaults to None.
c0_res (bool, optional): Value override for args.c0_res. Defaults to None
lclk_dpm_level (list, optional): Value override for args.lclk_dpm_level. Defaults to None
pwr_svi_telemtry_rails (list, optional): value override for args.pwr_svi_telemtry_rails. Defaults to None
io_bandwidth (list, optional): value override for args.io_bandwidth. Defaults to None
xgmi_bandwidth (list, optional): value override for args.xgmi_bandwidth. Defaults to None
enable_apb (bool, optional): Value override for args.enable_apb. Defaults to None
disable_apb (bool, optional): Value override for args.disable_apb. Defaults to None
set_pow_limit (int, optional): Value override for args.cpu_set_pow_limit. Defaults to None
set_xgmi_link_width (list, optional): Value override for args.set_cpu_xgmi_link_width. Defaults to None
set_lclk_dpm_level (list, optional): Value override for args.set_cpu_lclk_dpm_level. Defaults to None
set_soc_boost_limit (list, optional): Value override for args.set_soc_boost_limit. Defaults to None
metrics_ver (bool, optional): Value override for args.cpu_metrics_ver. Defaults to None
metrics_table (bool, optional): Value override for args.cpu_metrics_table. Defaults to None
socket_energy (bool, optional): Value override for args.socket_energy. Defaults to None
set_pwr_eff_mode (list, optional): Value override for args.set_cpu_pwr_eff_mode. Defaults to None
ddr_bandwidth (bool, optional): Value override for args.ddr_bandwidth. Defaults to None
cpu_temp (bool, optional): Value override for args.cpu_temp. Defaults to None
dimm_temp_range_rate (list, optional): Dimm address.Value override for args.cpu_dimm_temp_range_rate. Defaults to None
dimm_pow_conumption (list, optional): Dimm address. Value override for args.cpu_dimm_pow_conumption. Defaults to None
dimm_thermal_sensor (list, optional): Dimm address. Value override for args.cpu_dimm_thermal_sensor. Defaults to None
set_gmi3_link_width (list, optional): Min and Max link wdiths.Value override for args.set_cpu_gmi3_link_width. Defaults to None
set_pcie_lnk_rate (list, optional): Link rate.Value override for args.set_cpu_pcie_lnk_rate. Defaults to None
set_df_pstate_range (list, optional): Max and Min pstates.Value override for args.set_cpu_df_pstate_range. Defaults to None
Returns:
None: Print output via AMDSMILogger to destination
"""
@@ -1936,9 +1969,6 @@ class AMDSMICommands():
if (args.cpu_metrics_table):
static_dict["metrics_table"] = {}
static_dict["metrics_table"]["response"] = "N/A"
# Note:- amdsmi_get_metrics_table has been disabled as there is fix needed in the library API and will be
# in next version
try:
metrics_table = amdsmi_interface.amdsmi_get_metrics_table(args.cpu)
static_dict["metrics_table"]["response"] = metrics_table
@@ -2054,7 +2084,10 @@ class AMDSMICommands():
args (Namespace): Namespace containing the parsed CLI args
multiple_devices (bool, optional): True if checking for multiple devices. Defaults to False.
core (device_handle, optional): device_handle for target device. Defaults to None.
boost_limit (bool, optional): Value override for args.boostlimit. Defaults to None
curr_active_freq_core_limit (bool, optional): Value override for args.boostlimit. Defaults to None
set_core_boost_limit(list, optional): boost limit value.Value override for args.set_core_boost_limit. Defaults to None
core_energy (bool, optional): Value override for args.core_energy. Defaults to None
Returns:
None: Print output via AMDSMILogger to destination
"""
@@ -2187,6 +2220,7 @@ class AMDSMICommands():
guest_data (bool, optional): Value override for args.guest_data. Defaults to None.
fb_usage (bool, optional): Value override for args.fb_usage. Defaults to None.
xgmi (bool, optional): Value override for args.xgmi. Defaults to None.
cpu (device_handle, optional): cpu index. Defaults to None
cpu_power_metrics (bool, optional): Value override for args.cpu_power_metrics. Defaults to None
prochot (bool, optional): Value override for args.prochot. Defaults to None.
freq_metrics (bool, optional): Value override for args.freq_metrics. Defaults to None.
@@ -2197,10 +2231,9 @@ class AMDSMICommands():
xgmi_bandwidth (list, optional): value override for args.xgmi_bandwidth. Defaults to None
enable_apb (bool, optional): Value override for args.enable_apb. Defaults to None
disable_apb (bool, optional): Value override for args.disable_apb. Defaults to None
set_pow_limit (bool, optional): Value override for args.cpu_set_pow_limit. Defaults to None
set_pow_limit (int, optional): Value override for args.cpu_set_pow_limit. Defaults to None
set_xgmi_link_width (list, optional): Value override for args.set_cpu_xgmi_link_width. Defaults to None
set_lclk_dpm_level (bool, optional): Value override for args.set_cpu_lclk_dpm_level. Defaults to None
boost_limit (bool, optional): Value override for args.boost_limit. Defaults to None
set_lclk_dpm_level (list, optional): Value override for args.set_cpu_lclk_dpm_level. Defaults to None
set_soc_boost_limit (list, optional): Value override for args.set_soc_boost_limit. Defaults to None
metrics_ver (bool, optional): Value override for args.cpu_metrics_ver. Defaults to None
metrics_table (bool, optional): Value override for args.cpu_metrics_table. Defaults to None
@@ -2208,12 +2241,17 @@ class AMDSMICommands():
set_pwr_eff_mode (list, optional): Value override for args.set_cpu_pwr_eff_mode. Defaults to None
ddr_bandwidth (bool, optional): Value override for args.ddr_bandwidth. Defaults to None
cpu_temp (bool, optional): Value override for args.cpu_temp. Defaults to None
dimm_temp_range_rate (bool, optional): Value override for args.cpu_dimm_temp_range_rate. Defaults to None
dimm_pow_conumption (bool, optional): Value override for args.cpu_dimm_pow_conumption. Defaults to None
dimm_thermal_sensor (bool, optional): Value override for args.cpu_dimm_thermal_sensor. Defaults to None
set_gmi3_link_width (list, optional): Value override for args.set_cpu_gmi3_link_width. Defaults to None
set_pcie_lnk_rate (list, optional): Value override for args.set_cpu_pcie_lnk_rate. Defaults to None
set_df_pstate_range (list, optional): Value override for args.set_cpu_df_pstate_range. Defaults to None
dimm_temp_range_rate (list, optional): Dimm address.Value override for args.cpu_dimm_temp_range_rate. Defaults to None
dimm_pow_conumption (list, optional): Dimm address. Value override for args.cpu_dimm_pow_conumption. Defaults to None
dimm_thermal_sensor (list, optional): Dimm address. Value override for args.cpu_dimm_thermal_sensor. Defaults to None
set_gmi3_link_width (list, optional): Min and Max link wdiths.Value override for args.set_cpu_gmi3_link_width. Defaults to None
set_pcie_lnk_rate (list, optional): Link rate.Value override for args.set_cpu_pcie_lnk_rate. Defaults to None
set_df_pstate_range (list, optional): Max and Min pstates.Value override for args.set_cpu_df_pstate_range. Defaults to None
core (int, optional): core index. Value override for args.core.Defaults to None
boost_limit (bool, optional): Value override for args.boostlimit. Defaults to None
curr_active_freq_core_limit (bool, optional): Value override for args.boostlimit. Defaults to None
set_core_boost_limit(list, optional): boost limit value.Value override for args.set_core_boost_limit. Defaults to None
Raises:
IndexError: Index error if gpu list is empty
@@ -2250,6 +2288,10 @@ class AMDSMICommands():
xgmi_err, energy, mem_usage, schedule,
guard, guest_data, fb_usage, xgmi)
if (gpu_options and (len(self.device_handles) == 0)):
logging.error("No GPU devices present")
sys.exit(-1)
if ((len(self.cpu_handles) and ((((not gpus) and (not cpus) and (not cores)) or cpus)
and not gpu_options and not core_options))):
self.logger.clear_multiple_devices_ouput()
@@ -2262,6 +2304,9 @@ class AMDSMICommands():
dimm_pow_conumption,dimm_thermal_sensor, set_gmi3_link_width,
set_pcie_lnk_rate, set_df_pstate_range)
if (cpu_options and (len(self.cpu_handles) == 0)):
logging.error("No CPU devices present")
sys.exit(-1)
if ((len(self.core_handles) and ((((not gpus) and (not cpus) and (not cores)) or cores)
and not gpu_options and not cpu_options))):
@@ -2270,6 +2315,10 @@ class AMDSMICommands():
curr_active_freq_core_limit, set_core_boost_limit,
core_energy)
if (core_options and (len(self.cpu_handles) == 0)):
logging.error("No Core devices present")
sys.exit(-1)
if (len(self.cpu_handles) == 0 and len(self.device_handles) == 0 and
len(self.core_handles) == 0):
logging.error("No CPU and GPU devices present")
+2 -2
Просмотреть файл
@@ -743,7 +743,7 @@ def amdsmi_get_cpu_core_energy(
)
)
return f"{penergy.value} J"
return f"{float(penergy.value * pow(10, -6))} J"
def amdsmi_get_cpu_socket_energy(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle
@@ -760,7 +760,7 @@ def amdsmi_get_cpu_socket_energy(
)
)
return f"{penergy.value} J"
return f"{float(penergy.value * pow(10, -6))} J"
def amdsmi_get_cpu_prochot_status(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle