[SWDEV-529266] [MI308][AMDSMI][RAS CPER] CPER dump not working on CPX mode (#319)
* Do not raise excepction for cper status not found, but keep iterating to next gpu
* Do not raise excepction for cper status not found, but keep iterating to next gpu
* use partition id and skip if non-zero
* reverting un-needed change
* Do not raise excepction for cper status not found, but keep iterating to next gpu
* use partition id and skip if non-zero
---------
Co-authored-by: Oosman Saeed <oossaeed@amd.com>
[ROCm/amdsmi commit: 9c297639f3]
This commit is contained in:
کامیت شده توسط
Maisam Arif
والد
7201649fbf
کامیت
2391516ced
@@ -6381,6 +6381,20 @@ class AMDSMICommands():
|
||||
if args.follow and not getattr(self, "_cper_follow_prompted", False):
|
||||
print("Press CTRL + C to stop.")
|
||||
self._cper_follow_prompted = True
|
||||
|
||||
partition_id = -1
|
||||
try:
|
||||
kfd_info = amdsmi_interface.amdsmi_get_gpu_kfd_info(args.gpu)
|
||||
kfd_id = kfd_info['kfd_id']
|
||||
node_id = kfd_info['node_id']
|
||||
partition_id = kfd_info['current_partition_id']
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
logging.debug("Failed to get kfd info for gpu %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
if partition_id != 0:
|
||||
logging.debug(f"Skipping gpu {gpu_id} on non zero partition {partition_id}")
|
||||
return
|
||||
|
||||
if args.folder and args.gpu:
|
||||
print(f"Dumping CPER file header entries for GPU {gpu_id} in folder {args.folder}")
|
||||
elif args.folder:
|
||||
|
||||
مرجع در شماره جدید
Block a user