diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 833600367b..23e92a677b 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -198,10 +198,20 @@ class AMDSMICommands(): # Handle No GPU passed if args.gpu == None: args.gpu = self.device_handles - - if not self.group_check_printed: - self.helpers.check_required_groups() - self.group_check_printed = True + + # Perform one-time group check. If it fails, record that fact + # but do NOT abort—just mark that UUID should be "N/A" later. + _group_check_done = False + _group_in_groups = False + if not _group_check_done: + try: + self.helpers.check_required_groups() + _group_in_groups = True + except Exception as e: + _group_in_groups = False + # print the helper's error message exactly once: + print(f"{e}") + _group_check_done = True # Handle multiple GPUs handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.list) @@ -213,14 +223,20 @@ class AMDSMICommands(): # Get gpu_id for logging gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu) - try: - bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - bdf = e.get_error_info() - try: - uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(args.gpu) - except amdsmi_exception.AmdSmiLibraryException as e: - uuid = e.get_error_info() + # Only fetch data if group check passed; otherwise force "N/A" + if _group_in_groups: + try: + bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(args.gpu) + except amdsmi_exception.AmdSmiLibraryException as e: + bdf = "N/A" + try: + uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(args.gpu) + except amdsmi_exception.AmdSmiLibraryException: + uuid = "N/A" + else: + # user not in render/video → everything is N/A + bdf = "N/A" + uuid = "N/A" try: kfd_info = amdsmi_interface.amdsmi_get_gpu_kfd_info(args.gpu) @@ -246,23 +262,33 @@ class AMDSMICommands(): if args.e: try: enumeration_info = amdsmi_interface.amdsmi_get_gpu_enumeration_info(args.gpu) - except: - enumeration_info = {"drm_render": "N/A", - "drm_card": "N/A", - "hip_id": "N/A", - "hip_uuid": "N/A", - "hsa_id": "N/A"} + except amdsmi_exception.AmdSmiLibraryException: + enumeration_info = { + "drm_render": "N/A", + "drm_card": "N/A", + "hsa_id": "N/A", + "hip_id": "N/A", + "hip_uuid": "N/A", + } + + # __Override__ hip_uuid if the group check failed + if not _group_in_groups: + enumeration_info["hip_uuid"] = "N/A" + # now store all the fields exactly once: if enumeration_info['drm_render'] == "N/A": self.logger.store_output(args.gpu, 'render', enumeration_info['drm_render']) else: - self.logger.store_output(args.gpu, 'render', f"renderD{enumeration_info['drm_render']}") + self.logger.store_output(args.gpu, 'render', + f"renderD{enumeration_info['drm_render']}") if enumeration_info['drm_card'] == "N/A": self.logger.store_output(args.gpu, 'card', enumeration_info['drm_card']) else: - self.logger.store_output(args.gpu, 'card', f"card{enumeration_info['drm_card']}") + self.logger.store_output(args.gpu, 'card', + f"card{enumeration_info['drm_card']}") self.logger.store_output(args.gpu, 'hsa_id', enumeration_info['hsa_id']) self.logger.store_output(args.gpu, 'hip_id', enumeration_info['hip_id']) self.logger.store_output(args.gpu, 'hip_uuid', enumeration_info['hip_uuid']) + if multiple_devices: self.logger.store_multiple_device_output() diff --git a/amdsmi_cli/amdsmi_helpers.py b/amdsmi_cli/amdsmi_helpers.py index 39495b4c3b..af6fc4ad6f 100755 --- a/amdsmi_cli/amdsmi_helpers.py +++ b/amdsmi_cli/amdsmi_helpers.py @@ -1070,8 +1070,7 @@ class AMDSMIHelpers(): try: user_groups = {grp.getgrgid(gid).gr_name for gid in os.getgroups()} except Exception as e: - logging.warning("Unable to determine group memberships: %s", e) - return + raise RuntimeError(f"Unable to determine group memberships: {e}") missing_groups = required_groups - user_groups if missing_groups: @@ -1079,8 +1078,7 @@ class AMDSMIHelpers(): "WARNING: User is missing the following required groups: %s. " "Please add user to these groups." ) % ", ".join(sorted(missing_groups)) - print(msg) - logging.warning(msg) + raise RuntimeError(msg) def _severity_as_string(self, error_severity, notify_type, for_filename): if error_severity == "non_fatal_uncorrected": @@ -1345,7 +1343,7 @@ class AMDSMIHelpers(): severity_mask |= (1 << 0) elif sev in ("nonfatal-corrected", "corrected"): # Set bit corresponding to AMDSMI_CPER_SEV_NON_FATAL_CORRECTED (which is 2) - severity_mask |= (1 << 2) + severity_mask |= (1 << 2) buffer_size = 1048576