diff --git a/projects/amdsmi/CHANGELOG.md b/projects/amdsmi/CHANGELOG.md index 3e2254638b..74e35c842d 100644 --- a/projects/amdsmi/CHANGELOG.md +++ b/projects/amdsmi/CHANGELOG.md @@ -398,6 +398,9 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ### Resolved Issues +- **Fixed CPER component not being redirected to output file issue when using `amd-smi ras --cper --folder --file --follow`**. + - Utlized the AMDSMILogger to redirect to output file when --file option is used + - **Fixed a CPER record count mismatch issue when using the `amd-smi ras --cper --file-limit`**. - Fixed deletion calculation to use files_to_delete = len(folder_files) - file_limit for exact file count management diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py index 04767456eb..386cf0a37d 100755 --- a/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py @@ -1460,15 +1460,34 @@ class AMDSMIHelpers(): return "unknown" return "UNKNOWN" - def display_cper_files_generated(self, entries, device_handle, folder): - # One‐time initialization: print warning & header only once + def display_cper_files_generated(self, entries, device_handle, folder, logger=None): + """ + Display CPER summary lines. If a logger is provided and its destination is + not stdout, append the output to that file instead of printing to stdout. + """ + use_file = ( + logger is not None + and logger.is_human_readable_format() + and logger.destination != 'stdout' + ) + + # One‐time initialization: warning & header only once if not getattr(self, "_cper_display_initialized", False): # Warning if no folder was specified elsewhere if not getattr(self, "_cper_warning_printed", False): - print(f"WARNING: No CPER files will be dumped unless --folder= is specified and cper entries exist.") - self._cper_warning_printed = True + warning = ( + "WARNING: No CPER files will be dumped unless " + "--folder= is specified and cper entries exist." + ) + if use_file: + with logger.destination.open('a', encoding="utf-8") as output_file: + output_file.write(warning + '\n') + else: + print(warning) + self._cper_warning_printed = True - self._print_header(folder) + # Print or log the header + self._print_header(folder, logger if use_file else None) self._cper_display_initialized = True # Loop through all entries in the dictionary. @@ -1476,27 +1495,48 @@ class AMDSMIHelpers(): # Assume 'entry' is a dictionary with keys: "error_severity" and "notify_type". timestamp = entry.get("timestamp", "unknown") gpu_id = self.get_gpu_id_from_device_handle(device_handle) - prefix = self._severity_as_string(entry.get("error_severity", "Unknown"), - entry.get("notify_type", "Unknown"), - False) + prefix = self._severity_as_string( + entry.get("error_severity", "Unknown"), + entry.get("notify_type", "Unknown"), + False + ) output = f"{timestamp:<20} {gpu_id:<7} {prefix:<20}" + if folder: - prefix = self._severity_as_string(entry.get("error_severity", "Unknown"), - entry.get("notify_type", "Unknown"), - True) - cper_data_file = f"{prefix}_{self.get_cper_count() + 1}.cper" + prefix_for_filename = self._severity_as_string( + entry.get("error_severity", "Unknown"), + entry.get("notify_type", "Unknown"), + True + ) + cper_data_file = f"{prefix_for_filename}_{self.get_cper_count() + 1}.cper" afids = self.pvtDumpAfids(cper_data_file) afids_str = ' '.join(map(str, afids)) output += f" {cper_data_file:<17} {afids_str}" - print(output) + if use_file: + with logger.destination.open('a', encoding="utf-8") as output_file: + output_file.write(output + '\n') + else: + print(output) + self.increment_cper_count() - def _print_header(self, folder): - print(f"{'timestamp':<20} {'gpu_id':<7} {'severity':<20}", end="") + def _print_header(self, folder, logger=None): + header = f"{'timestamp':<20} {'gpu_id':<7} {'severity':<20}" if folder: - print(f" {'file_name':<17} {'list of afids'}", end="") - print("") + header += f" {'file_name':<17} {'list of afids'}" + + use_file = ( + logger is not None + and logger.is_human_readable_format() + and logger.destination != 'stdout' + ) + + if use_file: + with logger.destination.open('a', encoding="utf-8") as output_file: + output_file.write(header + '\n') + else: + print(header) def dump_cper_entries(self, folder, entries, cper_data, device_handle, file_limit=None): """ @@ -1747,6 +1787,15 @@ class AMDSMIHelpers(): buffer_size = 1048576 + # Decide where to send human-readable output + dest = getattr(logger, "destination", "stdout") if logger is not None else "stdout" + log_to_file = dest != 'stdout' + if log_to_file: + # destination is usually a Path; fall back to Path(string) if needed + log_path = dest if isinstance(dest, Path) else Path(dest) + else: + log_path = None + gpu_id = self.get_gpu_id_from_device_handle(device_handle) if args.follow and not getattr(self, "_cper_follow_prompted", False): print("Press CTRL + C to stop.") @@ -1780,18 +1829,75 @@ class AMDSMIHelpers(): else: logging.debug(f"Cannot retrieve CPER entries: {e}") break + args.cursor[gpu_idx] = new_cursor if len(entries) == 0: break - if args.folder: - self.dump_cper_entries(args.folder, entries, cper_data, device_handle, args.file_limit) + + # When a file destination is set, temporarily redirect stdout + # so that helper print() calls go into that file. + if log_to_file and log_path is not None: + orig_stdout = sys.stdout + try: + try: + log_path.parent.mkdir(parents=True, exist_ok=True) + except Exception: + pass + with log_path.open('a', encoding='utf-8') as f: + sys.stdout = f + if args.folder: + self.dump_cper_entries( + args.folder, entries, cper_data, device_handle, args.file_limit + ) + else: + self.display_cper_files_generated( + entries, device_handle, args.folder + ) + finally: + sys.stdout = orig_stdout else: - self.display_cper_files_generated(entries, device_handle, args.folder) + if args.folder: + self.dump_cper_entries( + args.folder, entries, cper_data, device_handle, args.file_limit + ) + else: + self.display_cper_files_generated( + entries, device_handle, args.folder + ) + if num_entries == 0 and not args.follow: - if args.folder: - self.dump_cper_entries(args.folder, entries, cper_data, device_handle, args.file_limit) + # If nothing was found, still emit the warning/header logic + # using the same redirection logic. + if log_to_file and log_path is not None: + orig_stdout = sys.stdout + try: + try: + log_path.parent.mkdir(parents=True, exist_ok=True) + except Exception: + pass + with log_path.open('a', encoding='utf-8') as f: + sys.stdout = f + if args.folder: + self.dump_cper_entries( + args.folder, entries, cper_data, device_handle, args.file_limit + ) + else: + self.display_cper_files_generated( + entries, device_handle, args.folder + ) + finally: + sys.stdout = orig_stdout else: - self.display_cper_files_generated(entries, device_handle, args.folder) + if args.folder: + self.dump_cper_entries( + args.folder, entries, cper_data, device_handle, args.file_limit + ) + else: + self.display_cper_files_generated( + entries, device_handle, args.folder + ) + + def get_bitmask_ranges(self, bitmask_dict): ranges = {}