[SWDEV-562295] Fix Dmesg errors when using CLI (#822)

* Changes:
  - Modified attempting to open files to check
    permissions -> check read access only.

Do not try to open all paths, may cause driver issues.
Read access is sufficient to check permissions.

Reason: GPUs which support partitioning (memory/compute),
logical devices will not be valid until configured.
See `sudo amd-smi set -h` or applicable APIs
to configure on supported hardware.

Example error dmesg output:
[965358.883112] amdgpu 0000:15:00.0: amdgpu: renderD153 partition 1 not valid!
[965358.883283] amdgpu 0000:15:00.0: amdgpu: renderD154 partition 2 not valid!
[965358.883438] amdgpu 0000:15:00.0: amdgpu: renderD155 partition 3 not valid!
[965358.883594] amdgpu 0000:15:00.0: amdgpu: renderD156 partition 4 not valid!
[965358.883749] amdgpu 0000:15:00.0: amdgpu: renderD157 partition 5 not valid!
[965358.883904] amdgpu 0000:15:00.0: amdgpu: renderD158 partition 6 not valid!
[965358.884060] amdgpu 0000:15:00.0: amdgpu: renderD159 partition 7 not valid!

---------

Signed-off-by: Charis Poag <Charis.Poag@amd.com>

[ROCm/amdsmi commit: d73726698b]
Этот коммит содержится в:
Poag, Charis
2025-11-06 10:24:14 -06:00
коммит произвёл GitHub
родитель 181659ea1f
Коммит ced0642b4b
+55 -5
Просмотреть файл
@@ -32,6 +32,8 @@ import time
import glob
import errno
import pwd
import stat
from typing import Tuple, Optional, Union
from enum import Enum
from pathlib import Path
@@ -1168,14 +1170,46 @@ class AMDSMIHelpers():
except Exception as e:
return {"error": str(e)}
def _try_open(self, path: str):
def _has_read_access(self, path: str) -> Tuple[bool, Optional[int], Optional[str]]:
"""
Check whether the current (real/effective) user can read the given path
without opening it. Returns (ok:bool, errno_or_None, message_or_None)
"""
try:
fd = os.open(path, os.O_RDONLY) # Only read access is needed for permission check
os.close(fd)
return True, None, None
st = os.stat(path)
except OSError as e:
return False, e.errno, e.strerror
# root can always read
if os.geteuid() == 0:
return True, None, None
mode = st.st_mode
uid = st.st_uid
gid = st.st_gid
euid = os.geteuid()
egid = os.getegid()
groups = os.getgroups()
# owner
if euid == uid:
if mode & stat.S_IRUSR:
return True, None, None
return False, errno.EACCES, "Permission denied (owner)"
# group
if gid == egid or gid in groups:
if mode & stat.S_IRGRP:
return True, None, None
return False, errno.EACCES, "Permission denied (group)"
# other
if mode & stat.S_IROTH:
return True, None, None
return False, errno.EACCES, "Permission denied (other)"
def check_required_groups(self, check_render=True, check_video=True):
"""
Check if the current user can access kfd and dri
@@ -1210,7 +1244,23 @@ class AMDSMIHelpers():
denied = []
for path in paths_to_check:
ok, err, msg = self._try_open(path)
# Do not try to open all paths, may cause driver issues.
# Read access is sufficient to check permissions.
#
# Reason: GPUs which support partitioning (memory/compute),
# logical devices will not be valid until configured.
# See `sudo amd-smi set -h` or applicable APIs
# to configure on supported hardware.
#
# Example error dmesg output:
# [965358.883112] amdgpu 0000:15:00.0: amdgpu: renderD153 partition 1 not valid!
# [965358.883283] amdgpu 0000:15:00.0: amdgpu: renderD154 partition 2 not valid!
# [965358.883438] amdgpu 0000:15:00.0: amdgpu: renderD155 partition 3 not valid!
# [965358.883594] amdgpu 0000:15:00.0: amdgpu: renderD156 partition 4 not valid!
# [965358.883749] amdgpu 0000:15:00.0: amdgpu: renderD157 partition 5 not valid!
# [965358.883904] amdgpu 0000:15:00.0: amdgpu: renderD158 partition 6 not valid!
# [965358.884060] amdgpu 0000:15:00.0: amdgpu: renderD159 partition 7 not valid!
ok, err, msg = self._has_read_access(path)
if ok:
continue
# if permission denied or operation not permitted