From 61ed9e13f454ccbad206ff5ca15e39048726da38 Mon Sep 17 00:00:00 2001 From: adapryor Date: Thu, 31 Oct 2024 12:27:04 -0500 Subject: [PATCH 1/2] [SWDEV-412505] Handle mclk permission errors as not supported Signed-off-by: adapryor Change-Id: I25c9af42ed62697f87c70ecaeb153abe53401091 --- include/rocm_smi/rocm_smi_utils.h | 1 + src/rocm_smi.cc | 17 ++++++++++++++--- src/rocm_smi_utils.cc | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/include/rocm_smi/rocm_smi_utils.h b/include/rocm_smi/rocm_smi_utils.h index abf5e98831..ab27d27c14 100755 --- a/include/rocm_smi/rocm_smi_utils.h +++ b/include/rocm_smi/rocm_smi_utils.h @@ -82,6 +82,7 @@ int SameFile(const std::string fileA, const std::string fileB); bool FileExists(char const *filename); std::vector globFilesExist(const std::string& filePattern); int isRegularFile(std::string fname, bool *is_reg); +int isReadOnlyForAll(const std::string& fname, bool *is_read_only); int ReadSysfsStr(std::string path, std::string *retStr); int WriteSysfsStr(std::string path, std::string val); bool IsInteger(const std::string & n_str); diff --git a/src/rocm_smi.cc b/src/rocm_smi.cc index 274fa3c113..da2f7a9702 100755 --- a/src/rocm_smi.cc +++ b/src/rocm_smi.cc @@ -1873,7 +1873,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, return ret; } - int ret_i; + rsmi_status_t status; amd::smi::DevInfoTypes dev_type; const auto & clk_type_it = kClkTypeMap.find(clk_type); @@ -1883,8 +1883,19 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind, return RSMI_STATUS_INVALID_ARGS; } - ret_i = dev->writeDevInfo(dev_type, freq_enable_str); - return amd::smi::ErrnoToRsmiStatus(ret_i); + status = amd::smi::ErrnoToRsmiStatus(dev->writeDevInfo(dev_type, freq_enable_str)); + + // If an operation is not supported, the dev file, ie /sys/class/drm/card1/device/pp_dpm_pcie + // will have read-only perms, and the OS will deny access, before the request hits the driver level + if (status == RSMI_STATUS_PERMISSION){ + bool read_only = false; + int perms = amd::smi::isReadOnlyForAll(dev->path(), &read_only); + if(read_only){ + return RSMI_STATUS_NOT_SUPPORTED; + } + } + + return status; CATCH } diff --git a/src/rocm_smi_utils.cc b/src/rocm_smi_utils.cc index 8437a27f31..bc3f0d3aaa 100755 --- a/src/rocm_smi_utils.cc +++ b/src/rocm_smi_utils.cc @@ -172,6 +172,24 @@ int isRegularFile(std::string fname, bool *is_reg) { return 0; } +int isReadOnlyForAll(const std::string& fname, bool *is_read_only){ + struct stat file_stat; + int ret; + + ret = stat(fname.c_str(), &file_stat); + if (ret) { + return errno; + } + + if (is_read_only != nullptr) { + *is_read_only = (file_stat.st_mode & (S_IRUSR | S_IRGRP | S_IROTH)) && !(file_stat.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)); + } else { + ret = 1; + } + + return ret; +} + int WriteSysfsStr(std::string path, std::string val) { // On success, zero is returned. On error, -1 is returned, and // errno is set to indicate the error. From 35c1d00f5a603988974f7148cf0dfd7c28c06f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20L=C3=B3pez?= Date: Mon, 5 Aug 2024 13:28:29 +0200 Subject: [PATCH 2/2] Updates driverInitialized() to support amdgpu built as module as well as kernel built-in. Fixes ROCm/rocm_smi_lib#102 and is an updated version of ROCm/rocm_smi_lib#104 Change-Id: Icb3abe820bc67035b822358a1c04bd09a7c22b6b Signed-off-by: Galantsev, Dmitrii Reviewed-by: Galantsev, Dmitrii --- python_smi_tools/rocm_smi.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/python_smi_tools/rocm_smi.py b/python_smi_tools/rocm_smi.py index d0eec6a503..c222d480c5 100755 --- a/python_smi_tools/rocm_smi.py +++ b/python_smi_tools/rocm_smi.py @@ -89,14 +89,17 @@ validClockNames.sort() def driverInitialized(): """ Returns true if amdgpu is found in the list of initialized modules """ - driverInitialized = '' - try: - driverInitialized = str(subprocess.check_output("cat /sys/module/amdgpu/initstate |grep live", shell=True)) - except subprocess.CalledProcessError: - pass - if len(driverInitialized) > 0: - return True - return False + driverInitialized = False + if os.path.exists("/sys/module/amdgpu") : + if os.path.exists("/sys/module/amdgpu/initstate"): + # amdgpu is loadable module + with open("/sys/module/amdgpu/initstate") as initstate: + if 'live' in initstate.read(): + driverInitialized = True + else: + # amdgpu is built into the kernel + driverInitialized = True + return driverInitialized def formatJson(device, log):