From efb37d89bc42a318b40f90e7810d9b148d858754 Mon Sep 17 00:00:00 2001 From: "Poag, Charis" Date: Wed, 16 Apr 2025 16:03:42 -0500 Subject: [PATCH] [SWDEV-522992] Make libdrm / libdrm_amdgpu load dynamically (#43) Changes: - Now load libdrm/libdrm_amdgpu dynamically Change-Id: I49fb1f3540b3235a25370f7cfcfb9778db34c2a5 Signed-off-by: Charis Poag [ROCm/rocm_smi_lib commit: ce405476cabf66a884a351cb2e3253bd5c29e06b] --- projects/rocm-smi-lib/CMakeLists.txt | 14 ++- .../rocm-smi-lib/docs/install/install.rst | 2 +- .../include/rocm_smi/amdgpu_drm.h | 13 +-- .../rocm-smi-lib/include/rocm_smi/rocm_smi.h | 2 + .../include/rocm_smi/rocm_smi_lib_loader.h | 86 +++++++++++++++++++ projects/rocm-smi-lib/oam/CMakeLists.txt | 2 +- .../python_smi_tools/rsmiBindings.py | 4 + projects/rocm-smi-lib/rocm_smi/CMakeLists.txt | 2 +- projects/rocm-smi-lib/src/rocm_smi.cc | 52 ++++++++++- .../rocm-smi-lib/src/rocm_smi_lib_loader.cc | 74 ++++++++++++++++ 10 files changed, 238 insertions(+), 13 deletions(-) create mode 100644 projects/rocm-smi-lib/include/rocm_smi/rocm_smi_lib_loader.h create mode 100644 projects/rocm-smi-lib/src/rocm_smi_lib_loader.cc diff --git a/projects/rocm-smi-lib/CMakeLists.txt b/projects/rocm-smi-lib/CMakeLists.txt index 129a5ce859..a35d407e45 100755 --- a/projects/rocm-smi-lib/CMakeLists.txt +++ b/projects/rocm-smi-lib/CMakeLists.txt @@ -1,6 +1,9 @@ # # Minimum version of cmake and C++ required # +message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") +message(" CMake ROCm SMI (Library) [root] ") +message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") cmake_minimum_required(VERSION 3.14) set(ROCM_SMI_LIBS_TARGET "rocm_smi_libraries") @@ -38,8 +41,6 @@ set(SHARE_INSTALL_PREFIX # provide git to utilities find_program (GIT NAMES git) -pkg_check_modules(DRM REQUIRED libdrm) -pkg_check_modules(AMDGPU_DRM REQUIRED libdrm_amdgpu) ## Setup the package version based on git tags. set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver") @@ -165,6 +166,7 @@ set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_gpu_metrics.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_logger.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_properties.cc") +set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_lib_loader.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${SHR_MUTEX_DIR}/shared_mutex.cc") set(CMN_INC_LIST "${COMMON_INC_DIR}/rocm_smi_device.h") @@ -181,6 +183,7 @@ set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_gpu_metrics.h") set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi.h") set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_logger.h") set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_properties.h") +set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_lib_loader.h") set(CMN_INC_LIST ${CMN_INC_LIST} "${SHR_MUTEX_DIR}/shared_mutex.h") ## set components @@ -335,7 +338,8 @@ if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) else() set(CPACK_DEBIAN_PACKAGE_RELEASE "local") endif() -set(CPACK_DEBIAN_PACKAGE_SUGGESTS "sudo, libdrm-dev") +set(CPACK_DEBIAN_PACKAGE_SUGGESTS "sudo, libdrm-dev, libdrm-amdgpu-dev") +set(CPACK_RPM_PACKAGE_SUGGESTS "sudo, libdrm-dev, libdrm-amdgpu-dev") ## Process the Debian install/remove scripts to update the CPACK variables configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY ) @@ -399,3 +403,7 @@ if(NOT BUILD_SHARED_LIBS) cpack_add_component_group("static") cpack_add_component(dev GROUP static) endif() + +message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") +message(" CMake ROCm SMI (Library) [root] END ") +message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") diff --git a/projects/rocm-smi-lib/docs/install/install.rst b/projects/rocm-smi-lib/docs/install/install.rst index 1e57bae48d..9fd922e72d 100644 --- a/projects/rocm-smi-lib/docs/install/install.rst +++ b/projects/rocm-smi-lib/docs/install/install.rst @@ -37,7 +37,7 @@ To build the ROCm SMI library, the following components are required. The following software versions are what was used in development. Earlier versions are not guaranteed to work: -* CMake (v3.5.0) +* CMake (v3.14.0) * g++ (5.4.0) To build the latest documentation, the following are required: diff --git a/projects/rocm-smi-lib/include/rocm_smi/amdgpu_drm.h b/projects/rocm-smi-lib/include/rocm_smi/amdgpu_drm.h index f16c6dbc37..16e8715a02 100644 --- a/projects/rocm-smi-lib/include/rocm_smi/amdgpu_drm.h +++ b/projects/rocm-smi-lib/include/rocm_smi/amdgpu_drm.h @@ -29,10 +29,15 @@ * Keith Whitwell */ -#ifndef __AMDGPU_DRM_H__ -#define __AMDGPU_DRM_H__ +#ifndef __LIBDRM_AMDGPU__ +#define __LIBDRM_AMDGPU__ +#include +#endif -#include "drm.h" +#ifndef __LIBDRM__ +#define __LIBDRM__ +#include +#endif #if defined(__cplusplus) extern "C" { @@ -1295,5 +1300,3 @@ struct drm_color_ctm_3x4 { #if defined(__cplusplus) } #endif - -#endif \ No newline at end of file diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h index d751037a52..c08c5aca07 100755 --- a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi.h @@ -134,6 +134,8 @@ typedef enum { RSMI_STATUS_AMDGPU_RESTART_ERR, //!< Could not successfully restart //!< the amdgpu driver RSMI_STATUS_DRM_ERROR, //!< Error when call libdrm + RSMI_STATUS_FAIL_LOAD_MODULE, //!< Fail to load lib + RSMI_STATUS_FAIL_LOAD_SYMBOL, //!< Fail to load symbol RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred } rsmi_status_t; diff --git a/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_lib_loader.h b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_lib_loader.h new file mode 100644 index 0000000000..9ff9ba7061 --- /dev/null +++ b/projects/rocm-smi-lib/include/rocm_smi/rocm_smi_lib_loader.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef ROCM_SMI_INCLUDE_ROCM_SMI_LIB_LOADER_H_ +#define ROCM_SMI_INCLUDE_ROCM_SMI_LIB_LOADER_H_ +#include +#include + +#include +#include +#include // NOLINT(build/c++11) + +#include "rocm_smi/rocm_smi.h" + + +namespace amd { +namespace smi { +class ROCmSmiLibraryLoader { + public: + ROCmSmiLibraryLoader(); + + rsmi_status_t load(const char* filename); + + template rsmi_status_t load_symbol(T* func_handler, + const char* func_name); + + + rsmi_status_t unload(); + + ~ROCmSmiLibraryLoader(); + + private: + void* libHandler_; + std::mutex library_mutex_; + bool library_loaded_ = false; +}; + +template rsmi_status_t ROCmSmiLibraryLoader::load_symbol( + T* func_handler, + const char* func_name) { + if (!libHandler_) { + return RSMI_STATUS_FAIL_LOAD_MODULE; + } + + if (!func_handler || !func_name) { + return RSMI_STATUS_FAIL_LOAD_SYMBOL; + } + + std::lock_guard guard(library_mutex_); + + *reinterpret_cast(func_handler) = + dlsym(libHandler_, func_name); + if (*func_handler == nullptr) { + char* error = dlerror(); + std::cerr << "ROCmSmiLibraryLoader: Fail to load the symbol " + << func_name << ": " << error << std::endl; + return RSMI_STATUS_FAIL_LOAD_SYMBOL; + } + + return RSMI_STATUS_SUCCESS; +} + +} // namespace smi +} // namespace amd + + +#endif // ROCM_SMI_INCLUDE_ROCM_SMI_LIB_LOADER_H_ diff --git a/projects/rocm-smi-lib/oam/CMakeLists.txt b/projects/rocm-smi-lib/oam/CMakeLists.txt index 4789bd4ae9..181ee1eb96 100644 --- a/projects/rocm-smi-lib/oam/CMakeLists.txt +++ b/projects/rocm-smi-lib/oam/CMakeLists.txt @@ -72,7 +72,7 @@ target_include_directories(${OAM_EXAMPLE_EXE} PRIVATE ${OAM_INC_LIST}) target_link_libraries(${OAM_EXAMPLE_EXE} ${OAM_TARGET}) add_library(${OAM_TARGET} ${CMN_SRC_LIST} ${OAM_SRC_LIST} ${CMN_INC_LIST} ${OAM_INC_LIST}) -target_link_libraries(${OAM_TARGET} pthread rt dl ${DRM_LIBRARIES} ${AMDGPU_DRM_LIBRARIES}) +target_link_libraries(${OAM_TARGET} PRIVATE pthread rt dl) target_include_directories(${OAM_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ${COMMON_PROJ_ROOT}/common/shared_mutex) diff --git a/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py b/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py index d19fdad740..0f78820c8f 100644 --- a/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py +++ b/projects/rocm-smi-lib/python_smi_tools/rsmiBindings.py @@ -55,6 +55,8 @@ class rsmi_status_t(c_int): RSMI_STATUS_SETTING_UNAVAILABLE = 0x12 RSMI_STATUS_AMDGPU_RESTART_ERR = 0x13 RSMI_STATUS_DRM_ERROR = 0x14 + RSMI_STATUS_FAIL_LOAD_MODULE = 0x15 + RSMI_STATUS_FAIL_LOAD_SYMBOL = 0x16 RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF @@ -81,6 +83,8 @@ rsmi_status_verbose_err_out = { rsmi_status_t.RSMI_STATUS_SETTING_UNAVAILABLE: 'Requested setting is unavailable for current device', rsmi_status_t.RSMI_STATUS_AMDGPU_RESTART_ERR: 'Could not successfully restart the amdgpu driver', rsmi_status_t.RSMI_STATUS_DRM_ERROR: 'Error when calling libdrm', + rsmi_status_t.RSMI_STATUS_FAIL_LOAD_MODULE: 'Failed to load a library', + rsmi_status_t.RSMI_STATUS_FAIL_LOAD_SYMBOL: 'Failed to load a library symbol', rsmi_status_t.RSMI_STATUS_UNKNOWN_ERROR: 'Unknown error occured' } diff --git a/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt b/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt index 75070a2085..257309b211 100755 --- a/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt +++ b/projects/rocm-smi-lib/rocm_smi/CMakeLists.txt @@ -81,7 +81,7 @@ add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc") target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET}) add_library(${ROCM_SMI_TARGET} ${CMN_SRC_LIST} ${SMI_SRC_LIST} ${CMN_INC_LIST} ${SMI_INC_LIST}) -target_link_libraries(${ROCM_SMI_TARGET} pthread rt dl ${DRM_LIBRARIES} ${AMDGPU_DRM_LIBRARIES}) +target_link_libraries(${ROCM_SMI_TARGET} PRIVATE pthread rt dl) target_include_directories(${ROCM_SMI_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${COMMON_PROJ_ROOT}/common/shared_mutex) diff --git a/projects/rocm-smi-lib/src/rocm_smi.cc b/projects/rocm-smi-lib/src/rocm_smi.cc index d77b995284..60d59c20db 100755 --- a/projects/rocm-smi-lib/src/rocm_smi.cc +++ b/projects/rocm-smi-lib/src/rocm_smi.cc @@ -77,6 +77,7 @@ #include "rocm_smi/rocm_smi_io_link.h" #include "rocm_smi/rocm_smi64Config.h" #include "rocm_smi/rocm_smi_logger.h" +#include "rocm_smi/rocm_smi_lib_loader.h" using amd::smi::monitorTypesToString; using amd::smi::getRSMIStatusString; @@ -2392,6 +2393,7 @@ rsmi_status_t rsmi_dev_market_name_get(uint32_t dv_ind, char *market_name, uint3 GET_DEV_FROM_INDX dev->index(); std::string render_file_name; + market_name[0] = '\0'; const std::string regex("renderD([0-9]+)"); const std::string renderD_folder = "/sys/class/drm/card" @@ -2404,19 +2406,63 @@ rsmi_status_t rsmi_dev_market_name_get(uint32_t dv_ind, char *market_name, uint3 if (render_name != "") { gpu_fd = open(drm_path.c_str(), O_RDWR | O_CLOEXEC); } else { - market_name[0] = '\0'; return RSMI_STATUS_NOT_SUPPORTED; } + rsmi_status_t status = RSMI_STATUS_NOT_SUPPORTED; + amd::smi::ROCmSmiLibraryLoader libdrm_amdgpu_; + status = libdrm_amdgpu_.load("libdrm_amdgpu.so"); + if (status != RSMI_STATUS_SUCCESS) { + close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); + return status; + } + + // Function pointer typedefs + typedef int (*amdgpu_device_initialize_t)(int fd, uint32_t *major_version, + uint32_t *minor_version, + amdgpu_device_handle *device_handle); + typedef int (*amdgpu_device_deinitialize_t)(amdgpu_device_handle device_handle); + typedef const char* (*amdgpu_get_marketing_name_t)(amdgpu_device_handle device_handle); + amdgpu_device_initialize_t amdgpu_device_initialize = nullptr; + amdgpu_device_deinitialize_t amdgpu_device_deinitialize = nullptr; + amdgpu_get_marketing_name_t amdgpu_get_marketing_name = nullptr; + + status = libdrm_amdgpu_.load_symbol( + reinterpret_cast(&amdgpu_device_initialize), + "amdgpu_device_initialize"); + if (status != RSMI_STATUS_SUCCESS) { + close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); + return status; + } + amdgpu_device_handle device_handle = nullptr; uint32_t major_version, minor_version; int ret = amdgpu_device_initialize(gpu_fd, &major_version, &minor_version, &device_handle); if (ret != 0) { - market_name[0] = '\0'; close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); return RSMI_STATUS_DRM_ERROR; } + status = libdrm_amdgpu_.load_symbol( + reinterpret_cast( + &amdgpu_get_marketing_name), "amdgpu_get_marketing_name"); + if (status != RSMI_STATUS_SUCCESS) { + close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); + return status; + } + + status = libdrm_amdgpu_.load_symbol(reinterpret_cast( + &amdgpu_device_deinitialize), "amdgpu_device_deinitialize"); + if (status != RSMI_STATUS_SUCCESS) { + close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); + return status; + } + // Get the marketing name using libdrm's API const char *name = amdgpu_get_marketing_name(device_handle); if (name != nullptr) { @@ -2427,6 +2473,7 @@ rsmi_status_t rsmi_dev_market_name_get(uint32_t dv_ind, char *market_name, uint3 market_name[std::min(len - 1, ln)] = '\0'; amdgpu_device_deinitialize(device_handle); close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); if (len < (temp_market_name.size() + 1)) { return RSMI_STATUS_INSUFFICIENT_SIZE; } @@ -2434,6 +2481,7 @@ rsmi_status_t rsmi_dev_market_name_get(uint32_t dv_ind, char *market_name, uint3 } amdgpu_device_deinitialize(device_handle); close(gpu_fd); + libdrm_amdgpu_.ROCmSmiLibraryLoader::unload(); return RSMI_STATUS_DRM_ERROR; } diff --git a/projects/rocm-smi-lib/src/rocm_smi_lib_loader.cc b/projects/rocm-smi-lib/src/rocm_smi_lib_loader.cc new file mode 100644 index 0000000000..e9ffb2a59e --- /dev/null +++ b/projects/rocm-smi-lib/src/rocm_smi_lib_loader.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include "rocm_smi/rocm_smi_lib_loader.h" + +namespace amd { +namespace smi { + +ROCmSmiLibraryLoader::ROCmSmiLibraryLoader(): libHandler_(nullptr) { +} + +rsmi_status_t ROCmSmiLibraryLoader::load(const char* filename) { + if (filename == nullptr) { + return RSMI_STATUS_FAIL_LOAD_MODULE; + } + if (libHandler_ || library_loaded_) { + unload(); + } + + std::lock_guard guard(library_mutex_); + // check if already loaded, return success if it is + // dlopen(filename, RTLD_NOLOAD) == null only IFF library is not loaded + void* isLibOpen = dlopen(filename, RTLD_NOLOAD); + if (isLibOpen == nullptr) { + libHandler_ = dlopen(filename, RTLD_LAZY); + if (!libHandler_) { + char* error = dlerror(); + std::cerr << "Fail to open " << filename <<": " << error + << std::endl; + return RSMI_STATUS_FAIL_LOAD_MODULE; + } + } + library_loaded_ = true; + + return RSMI_STATUS_SUCCESS; +} + +rsmi_status_t ROCmSmiLibraryLoader::unload() { + std::lock_guard guard(library_mutex_); + if (libHandler_) { + dlclose(libHandler_); + libHandler_ = nullptr; + library_loaded_ = false; + } + return RSMI_STATUS_SUCCESS; +} + +ROCmSmiLibraryLoader::~ROCmSmiLibraryLoader() { + unload(); +} + +} // namespace smi +} // namespace amd