[SWDEV-532904] CLI lists unusable UUID without sudo (#510)
Signed-off-by: AL Musaffar, Yazen <Yazen.ALMusaffar@amd.com>
[ROCm/amdsmi commit: 01a6158c85]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
395a42cafa
Коммит
64bf2d6ae9
@@ -198,10 +198,20 @@ class AMDSMICommands():
|
||||
# Handle No GPU passed
|
||||
if args.gpu == None:
|
||||
args.gpu = self.device_handles
|
||||
|
||||
if not self.group_check_printed:
|
||||
self.helpers.check_required_groups()
|
||||
self.group_check_printed = True
|
||||
|
||||
# Perform one-time group check. If it fails, record that fact
|
||||
# but do NOT abort—just mark that UUID should be "N/A" later.
|
||||
_group_check_done = False
|
||||
_group_in_groups = False
|
||||
if not _group_check_done:
|
||||
try:
|
||||
self.helpers.check_required_groups()
|
||||
_group_in_groups = True
|
||||
except Exception as e:
|
||||
_group_in_groups = False
|
||||
# print the helper's error message exactly once:
|
||||
print(f"{e}")
|
||||
_group_check_done = True
|
||||
|
||||
# Handle multiple GPUs
|
||||
handled_multiple_gpus, device_handle = self.helpers.handle_gpus(args, self.logger, self.list)
|
||||
@@ -213,14 +223,20 @@ class AMDSMICommands():
|
||||
# Get gpu_id for logging
|
||||
gpu_id = self.helpers.get_gpu_id_from_device_handle(args.gpu)
|
||||
|
||||
try:
|
||||
bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
bdf = e.get_error_info()
|
||||
try:
|
||||
uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
uuid = e.get_error_info()
|
||||
# Only fetch data if group check passed; otherwise force "N/A"
|
||||
if _group_in_groups:
|
||||
try:
|
||||
bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
bdf = "N/A"
|
||||
try:
|
||||
uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(args.gpu)
|
||||
except amdsmi_exception.AmdSmiLibraryException:
|
||||
uuid = "N/A"
|
||||
else:
|
||||
# user not in render/video → everything is N/A
|
||||
bdf = "N/A"
|
||||
uuid = "N/A"
|
||||
|
||||
try:
|
||||
kfd_info = amdsmi_interface.amdsmi_get_gpu_kfd_info(args.gpu)
|
||||
@@ -246,23 +262,33 @@ class AMDSMICommands():
|
||||
if args.e:
|
||||
try:
|
||||
enumeration_info = amdsmi_interface.amdsmi_get_gpu_enumeration_info(args.gpu)
|
||||
except:
|
||||
enumeration_info = {"drm_render": "N/A",
|
||||
"drm_card": "N/A",
|
||||
"hip_id": "N/A",
|
||||
"hip_uuid": "N/A",
|
||||
"hsa_id": "N/A"}
|
||||
except amdsmi_exception.AmdSmiLibraryException:
|
||||
enumeration_info = {
|
||||
"drm_render": "N/A",
|
||||
"drm_card": "N/A",
|
||||
"hsa_id": "N/A",
|
||||
"hip_id": "N/A",
|
||||
"hip_uuid": "N/A",
|
||||
}
|
||||
|
||||
# __Override__ hip_uuid if the group check failed
|
||||
if not _group_in_groups:
|
||||
enumeration_info["hip_uuid"] = "N/A"
|
||||
# now store all the fields exactly once:
|
||||
if enumeration_info['drm_render'] == "N/A":
|
||||
self.logger.store_output(args.gpu, 'render', enumeration_info['drm_render'])
|
||||
else:
|
||||
self.logger.store_output(args.gpu, 'render', f"renderD{enumeration_info['drm_render']}")
|
||||
self.logger.store_output(args.gpu, 'render',
|
||||
f"renderD{enumeration_info['drm_render']}")
|
||||
if enumeration_info['drm_card'] == "N/A":
|
||||
self.logger.store_output(args.gpu, 'card', enumeration_info['drm_card'])
|
||||
else:
|
||||
self.logger.store_output(args.gpu, 'card', f"card{enumeration_info['drm_card']}")
|
||||
self.logger.store_output(args.gpu, 'card',
|
||||
f"card{enumeration_info['drm_card']}")
|
||||
self.logger.store_output(args.gpu, 'hsa_id', enumeration_info['hsa_id'])
|
||||
self.logger.store_output(args.gpu, 'hip_id', enumeration_info['hip_id'])
|
||||
self.logger.store_output(args.gpu, 'hip_uuid', enumeration_info['hip_uuid'])
|
||||
|
||||
|
||||
if multiple_devices:
|
||||
self.logger.store_multiple_device_output()
|
||||
|
||||
@@ -1070,8 +1070,7 @@ class AMDSMIHelpers():
|
||||
try:
|
||||
user_groups = {grp.getgrgid(gid).gr_name for gid in os.getgroups()}
|
||||
except Exception as e:
|
||||
logging.warning("Unable to determine group memberships: %s", e)
|
||||
return
|
||||
raise RuntimeError(f"Unable to determine group memberships: {e}")
|
||||
|
||||
missing_groups = required_groups - user_groups
|
||||
if missing_groups:
|
||||
@@ -1079,8 +1078,7 @@ class AMDSMIHelpers():
|
||||
"WARNING: User is missing the following required groups: %s. "
|
||||
"Please add user to these groups."
|
||||
) % ", ".join(sorted(missing_groups))
|
||||
print(msg)
|
||||
logging.warning(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
def _severity_as_string(self, error_severity, notify_type, for_filename):
|
||||
if error_severity == "non_fatal_uncorrected":
|
||||
@@ -1345,7 +1343,7 @@ class AMDSMIHelpers():
|
||||
severity_mask |= (1 << 0)
|
||||
elif sev in ("nonfatal-corrected", "corrected"):
|
||||
# Set bit corresponding to AMDSMI_CPER_SEV_NON_FATAL_CORRECTED (which is 2)
|
||||
severity_mask |= (1 << 2)
|
||||
severity_mask |= (1 << 2)
|
||||
|
||||
buffer_size = 1048576
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user