[SWDEV-495305] Fix AttributeError: 'Namespace' object has no attribute 'compute_partition'

Changes:
   - [CLI] Earlier we removed compute & memory partition resets,
     this fix changes back to the correct spacing for
     reset commands

Change-Id: I707ff197baf7a32bfb7ef20f2b26a63acd13f08a
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
This commit is contained in:
Charis Poag
2024-10-31 11:51:07 -05:00
rodzic 2678e1f3f7
commit 7fc4b853d4
2 zmienionych plików z 46 dodań i 44 usunięć
+2
Wyświetl plik
@@ -591,6 +591,8 @@ GPU: 0
### Resolved issues
- **Fixed `amd-smi reset` commands showing an AttributeError**.
- **Improved Offline install process & lowered dependency for PyYAML**.
- **Fixed CPX not showing total number of logical GPUs**.
+44 -44
Wyświetl plik
@@ -4270,7 +4270,7 @@ class AMDSMICommands():
# Error if no subcommand args are passed
if self.helpers.is_baremetal():
if not any([args.gpureset, args.clocks, args.fans, args.profile, args.xgmierr, \
args.perf_determinism, args.compute_partition, args.memory_partition, \
args.perf_determinism, \
args.power_cap, args.clean_local_data]):
command = " ".join(sys.argv[1:])
raise AmdSmiRequiredCommandException(command, self.logger.format)
@@ -4337,8 +4337,8 @@ class AMDSMICommands():
logging.debug("Failed to reset fans on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.store_output(args.gpu, 'reset_fans', result)
if args.profile:
reset_profile_results = {'power_profile' : '',
'performance_level': ''}
reset_profile_results = {'power_profile' : 'N/A',
'performance_level': 'N/A'}
try:
power_profile_mask = amdsmi_interface.AmdSmiPowerProfilePresetMasks.BOOTUP_DEFAULT
amdsmi_interface.amdsmi_set_gpu_power_profile(args.gpu, 0, power_profile_mask)
@@ -4349,48 +4349,48 @@ class AMDSMICommands():
reset_profile_results['power_profile'] = "N/A"
logging.debug("Failed to reset power profile on gpu %s | %s", gpu_id, e.get_error_info())
try:
level_auto = amdsmi_interface.AmdSmiDevPerfLevel.AUTO
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, level_auto)
reset_profile_results['performance_level'] = 'Successfully reset Performance Level'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
reset_profile_results['performance_level'] = "N/A"
logging.debug("Failed to reset perf level on gpu %s | %s", gpu_id, e.get_error_info())
try:
level_auto = amdsmi_interface.AmdSmiDevPerfLevel.AUTO
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, level_auto)
reset_profile_results['performance_level'] = 'Successfully reset Performance Level'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
reset_profile_results['performance_level'] = "N/A"
logging.debug("Failed to reset perf level on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.store_output(args.gpu, 'reset_profile', reset_profile_results)
if args.xgmierr:
try:
amdsmi_interface.amdsmi_reset_gpu_xgmi_error(args.gpu)
result = 'Successfully reset XGMI Error count'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
result = "N/A"
logging.debug("Failed to reset xgmi error count on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.store_output(args.gpu, 'reset_xgmi_err', result)
if args.perf_determinism:
try:
level_auto = amdsmi_interface.AmdSmiDevPerfLevel.AUTO
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, level_auto)
result = 'Successfully disabled performance determinism'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
result = "N/A"
logging.debug("Failed to set perf level on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.store_output(args.gpu, 'reset_perf_determinism', result)
if args.power_cap:
try:
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
logging.debug(f"Power cap info for gpu {gpu_id} | {power_cap_info}")
default_power_cap_in_w = power_cap_info["default_power_cap"]
default_power_cap_in_w = self.helpers.convert_SI_unit(default_power_cap_in_w, AMDSMIHelpers.SI_Unit.MICRO)
current_power_cap_in_w = power_cap_info["power_cap"]
current_power_cap_in_w = self.helpers.convert_SI_unit(current_power_cap_in_w, AMDSMIHelpers.SI_Unit.MICRO)
except amdsmi_exception.AmdSmiLibraryException as e:
raise ValueError(f"Unable to get power cap info from {gpu_id}") from e
self.logger.store_output(args.gpu, 'reset_profile', reset_profile_results)
if args.xgmierr:
try:
amdsmi_interface.amdsmi_reset_gpu_xgmi_error(args.gpu)
result = 'Successfully reset XGMI Error count'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
result = "N/A"
logging.debug("Failed to reset xgmi error count on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.store_output(args.gpu, 'reset_xgmi_err', result)
if args.perf_determinism:
try:
level_auto = amdsmi_interface.AmdSmiDevPerfLevel.AUTO
amdsmi_interface.amdsmi_set_gpu_perf_level(args.gpu, level_auto)
result = 'Successfully disabled performance determinism'
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
result = "N/A"
logging.debug("Failed to set perf level on gpu %s | %s", gpu_id, e.get_error_info())
self.logger.store_output(args.gpu, 'reset_perf_determinism', result)
if args.power_cap:
try:
power_cap_info = amdsmi_interface.amdsmi_get_power_cap_info(args.gpu)
logging.debug(f"Power cap info for gpu {gpu_id} | {power_cap_info}")
default_power_cap_in_w = power_cap_info["default_power_cap"]
default_power_cap_in_w = self.helpers.convert_SI_unit(default_power_cap_in_w, AMDSMIHelpers.SI_Unit.MICRO)
current_power_cap_in_w = power_cap_info["power_cap"]
current_power_cap_in_w = self.helpers.convert_SI_unit(current_power_cap_in_w, AMDSMIHelpers.SI_Unit.MICRO)
except amdsmi_exception.AmdSmiLibraryException as e:
raise ValueError(f"Unable to get power cap info from {gpu_id}") from e
if current_power_cap_in_w == default_power_cap_in_w:
self.logger.store_output(args.gpu, 'powercap', f"Power cap is already set to {default_power_cap_in_w}")