From 5ba371f285fe8491b2b67de6886ec9ffcebf46bb Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Thu, 23 Jun 2022 09:02:18 -0400 Subject: [PATCH] Load libdrm at run time Remove the compile time dependency on libdrm. Load it at the run time instead. Add the headers missed from smi-lib Change-Id: Ie1ecf293b51425b6a61c502d11a42809dc099f70 --- CMakeLists.txt | 2 + amd_smi/example/amd_smi_example.cc | 4 +- amd_smi/include/amd_smi.h | 156 +++++++++++++++++----- amd_smi/include/impl/amd_smi_drm.h | 14 +- amd_smi/include/impl/amd_smi_gpu_device.h | 8 +- amd_smi/include/impl/amd_smi_lib_loader.h | 82 ++++++++++++ amd_smi/src/amd_smi.cc | 82 ++++++------ amd_smi/src/amd_smi_drm.cc | 62 +++++++-- amd_smi/src/amd_smi_gpu_device.cc | 12 +- amd_smi/src/amd_smi_lib_loader.cc | 87 ++++++++++++ oam/CMakeLists.txt | 2 +- rocm_smi/CMakeLists.txt | 2 +- 12 files changed, 407 insertions(+), 106 deletions(-) create mode 100644 amd_smi/include/impl/amd_smi_lib_loader.h create mode 100644 amd_smi/src/amd_smi_lib_loader.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index 94321839f6..ef94192ec1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,6 +138,7 @@ set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_gpu_device.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_socket.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_system.cc") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_drm.cc") +set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_lib_loader.cc") set(CMN_INC_LIST "${COMMON_INC_DIR}/rocm_smi_device.h") set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_main.h") @@ -158,6 +159,7 @@ set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_INC_DIR}/impl/amd_smi_gpu_device.h") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_INC_DIR}/impl/amd_smi_socket.h") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_INC_DIR}/impl/amd_smi_system.h") set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_INC_DIR}/impl/amd_smi_drm.h") +set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_INC_DIR}/impl/amd_smi_lib_loader.h") add_subdirectory("rocm_smi") add_subdirectory("amd_smi") diff --git a/amd_smi/example/amd_smi_example.cc b/amd_smi/example/amd_smi_example.cc index a6fbe04f61..33e97a1256 100644 --- a/amd_smi/example/amd_smi_example.cc +++ b/amd_smi/example/amd_smi_example.cc @@ -77,9 +77,9 @@ int main() { // For each socket, get identifier and devices for (uint32_t i=0; i < socket_count; i++) { - // Get Socket identifier + // Get Socket info char socket_name[128]; - ret = amdsmi_get_socket_identifier(sockets[i], socket_name, 128); + ret = amdsmi_get_socket_info(sockets[i], socket_name, 128); CHK_AMDSMI_RET(ret) std::cout << "Socket " << socket_name << std::endl; diff --git a/amd_smi/include/amd_smi.h b/amd_smi/include/amd_smi.h index aefc27dee7..63f047401f 100644 --- a/amd_smi/include/amd_smi.h +++ b/amd_smi/include/amd_smi.h @@ -124,6 +124,18 @@ typedef enum { AMDSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter //!< exceeded INT32_MAX + AMDSMI_LIB_START = 1000, //64) + /* Voltage (mV) */ + uint16_t voltage_soc; + uint16_t voltage_gfx; + uint16_t voltage_mem; + /* Driver attached timestamp (in ns) */ uint64_t system_clock_counter; // v1 mod. (moved from top of struct) @@ -992,7 +1034,7 @@ amdsmi_status_t amdsmi_shut_down(void); amdsmi_status_t amdsmi_get_socket_handles(uint32_t *socket_count, amdsmi_socket_handle* socket_handles[]); -amdsmi_status_t amdsmi_get_socket_identifier( +amdsmi_status_t amdsmi_get_socket_info( amdsmi_socket_handle socket_handle, char *name, size_t len); @@ -1017,6 +1059,18 @@ amdsmi_status_t amdsmi_get_device_type(amdsmi_device_handle device_handle, */ #define SMI_MAX_MM_IP_COUNT 8 enum smi_mm_ip { MM_UVD, MM_VCE, MM_VCN, MM__MAX }; +#define SMI_MAX_STRING_LENGTH 64 + + +typedef struct smi_asic_info { + char market_name[SMI_MAX_STRING_LENGTH]; + uint32_t family; /**< Has zero value */ + uint32_t vendor_id; + uint32_t device_id; + uint32_t rev_id; + uint64_t asic_serial; +} smi_asic_info_t; + struct smi_gpu_caps { struct { @@ -1036,6 +1090,19 @@ struct smi_gpu_caps { uint32_t dma_ip_count; }; +typedef struct amdsmi_power_info { + uint32_t power_cap; + uint32_t dpm_cap; +} amdsmi_power_info_t; + +typedef struct amdsmi_vbios_info { + char name[SMI_MAX_STRING_LENGTH]; + uint32_t vbios_version; + char build_date[SMI_MAX_STRING_LENGTH]; + char part_number[SMI_MAX_STRING_LENGTH]; + char vbios_version_string[SMI_MAX_STRING_LENGTH]; +} amdsmi_vbios_info_t; + enum smi_supported_flags { XGMI_FLAG = 1 << 0, @@ -1048,6 +1115,8 @@ enum smi_supported_flags { MAX_FREQUENCY_TARGET_RANGE_FLAG = 1 << 7, }; +amdsmi_status_t amdsmi_get_vbios_info(amdsmi_device_handle device_handle, amdsmi_vbios_info *info); + /** * \brief Returns the device capabilities as currently configured in * the system @@ -1092,6 +1161,54 @@ amdsmi_status_t amdsmi_get_caps_info(amdsmi_device_handle device_handle, amdsmi_status_t amdsmi_fb_usage_get(amdsmi_device_handle device_handle, uint32_t *fb_total, uint32_t *fb_used); +/** + * \brief Returns the ASIC information for the device. + * + * \param [in] device_handle - device which to query + * + * \param [out] info - Reference to static asic information structure. + * Must be allocated by user. + * + * @retval ::AMDSMI_STATUS_SUCCESS call was successful + */ +amdsmi_status_t amdsmi_get_asic_info(amdsmi_device_handle device_handle, smi_asic_info_t *info); + + +enum smi_clock_domain { + CLOCK_DOMAIN_GFX, + CLOCK_DOMAIN_MEM, + CLOCK_DOMAIN_MM, + CLOCK_DOMAIN_MM1, + CLOCK_DOMAIN_MM2, + CLOCK_DOMAIN__MAX +}; + +typedef struct smi_gpu_clock_measure { + uint32_t cur_clk; + uint32_t avg_clk; + uint32_t max_clk; +} smi_gpu_clock_measure_t; + +/** + * \brief Returns the measurements of the clocks in the GPU + * for the GFX and multimedia engines and Memory. This call + * reports the averages over 1s in MHz. + * + * \param [in] device_handle - device which to query + * + * \param [in] domain - Enum representing the domain to query. It should + * be one on the smi_clk_domain + * + * \param [out] info - Reference to the gpu clock structure. + * Must be allocated by user. + * + * @retval ::AMDSMI_STATUS_SUCCESS call was successful + */ +amdsmi_status_t amdsmi_get_clock_measure(amdsmi_device_handle device_handle, enum smi_clock_domain domain, + smi_gpu_clock_measure_t *info); + + + /** @} */ // end of drm query /*****************************************************************************/ @@ -1733,7 +1850,7 @@ amdsmi_dev_energy_count_get(amdsmi_device_handle device_handle, uint64_t *power, * @param[in] sensor_ind a 0-based sensor index. Normally, this will be 0. * If a device has more than one sensor, it could be greater than 0. * - * @param[inout] cap a pointer to a uint64_t that indicates the power cap, + * @param[inout] cap a pointer to a amdsmi_power_info that indicates the power cap, * in microwatts * If this parameter is nullptr, this function will return * ::AMDSMI_STATUS_INVALID_ARGS if the function is supported with the provided, @@ -1746,7 +1863,7 @@ amdsmi_dev_energy_count_get(amdsmi_device_handle device_handle, uint64_t *power, * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid */ amdsmi_status_t -amdsmi_dev_power_cap_get(amdsmi_device_handle device_handle, uint32_t sensor_ind, uint64_t *cap); +amdsmi_dev_power_cap_get(amdsmi_device_handle device_handle, uint32_t sensor_ind, amdsmi_power_info *cap); /** * @brief Get the default power cap for the device specified by @p device_handle. @@ -2821,35 +2938,6 @@ amdsmi_status_t amdsmi_version_str_get(amdsmi_sw_component_t component, char *ver_str, uint32_t len); -/** - * @brief Get the VBIOS identifer string - * - * @details Given a device ID @p device_handle, and a pointer to a char buffer, - * @p vbios, this function will write the VBIOS string (up to @p len - * characters) for device @p device_handle to @p vbios. The caller must ensure that - * it is safe to write at least @p len characters to @p vbios. - * - * @param[in] device_handle a device handle - * - * @param[inout] vbios A pointer to a buffer of char's to which the VBIOS name - * will be written - * If this parameter is nullptr, this function will return - * ::AMDSMI_STATUS_INVALID_ARGS if the function is supported with the provided, - * arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the - * provided arguments. - * - * @param[in] len The number of char's pointed to by @p vbios which can safely - * be written to by this function. - * - * @retval ::AMDSMI_STATUS_SUCCESS call was successful - * @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not - * support this function with the given arguments - * @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid - * - */ -amdsmi_status_t -amdsmi_dev_vbios_version_get(amdsmi_device_handle device_handle, char *vbios, uint32_t len); - /** * @brief Get the firmware versions for a device * diff --git a/amd_smi/include/impl/amd_smi_drm.h b/amd_smi/include/impl/amd_smi_drm.h index c28f7a78af..bcb04818d1 100644 --- a/amd_smi/include/impl/amd_smi_drm.h +++ b/amd_smi/include/impl/amd_smi_drm.h @@ -49,6 +49,7 @@ #include #include // NOLINT #include "amd_smi.h" +#include "impl/amd_smi_lib_loader.h" namespace amd { namespace smi { @@ -58,16 +59,19 @@ class AMDSmiDrm { amdsmi_status_t init(); amdsmi_status_t cleanup(); int get_drm_fd_by_index(uint32_t gpu_index) const; - int amdgpu_query_info(int fd, unsigned info_id, + amdsmi_status_t amdgpu_query_info(int fd, unsigned info_id, unsigned size, void *value); - int amdgpu_query_fw(int fd, unsigned info_id, unsigned fw_type, + amdsmi_status_t amdgpu_query_fw(int fd, unsigned info_id, unsigned fw_type, unsigned size, void *value); - int amdgpu_query_hw_ip(int fd, unsigned info_id, unsigned hw_ip_type, - unsigned size, void *value); - int amdgpu_query_vbios(int fd, void *info); + amdsmi_status_t amdgpu_query_hw_ip(int fd, unsigned info_id, + unsigned hw_ip_type, unsigned size, void *value); + amdsmi_status_t amdgpu_query_vbios(int fd, void *info); private: + using DrmCmdWriteFunc = int (*)(int, unsigned long, void *, unsigned long); std::vector drm_fds_; // drm file descriptor by gpu_index + AMDSmiLibraryLoader lib_loader_; // lazy load libdrm + DrmCmdWriteFunc drm_cmd_write_; // drmCommandWrite std::mutex drm_mutex_; }; diff --git a/amd_smi/include/impl/amd_smi_gpu_device.h b/amd_smi/include/impl/amd_smi_gpu_device.h index 1b7a3e85e3..2ad1ca6b29 100644 --- a/amd_smi/include/impl/amd_smi_gpu_device.h +++ b/amd_smi/include/impl/amd_smi_gpu_device.h @@ -57,13 +57,13 @@ class AMDSmiGPUDevice: public AMDSmiDevice { AMDSmiDevice(AMD_GPU), gpu_id_(gpu_id), drm_(drm) {} uint32_t get_gpu_id() const; - int amdgpu_query_info(unsigned info_id, + amdsmi_status_t amdgpu_query_info(unsigned info_id, unsigned size, void *value) const; - int amdgpu_query_hw_ip(unsigned info_id, unsigned hw_ip_type, + amdsmi_status_t amdgpu_query_hw_ip(unsigned info_id, unsigned hw_ip_type, unsigned size, void *value) const; - int amdgpu_query_fw(unsigned info_id, unsigned fw_type, + amdsmi_status_t amdgpu_query_fw(unsigned info_id, unsigned fw_type, unsigned size, void *value) const; - int amdgpu_query_vbios(void *info) const; + amdsmi_status_t amdgpu_query_vbios(void *info) const; private: uint32_t gpu_id_; AMDSmiDrm& drm_; diff --git a/amd_smi/include/impl/amd_smi_lib_loader.h b/amd_smi/include/impl/amd_smi_lib_loader.h new file mode 100644 index 0000000000..a89d5c0fc8 --- /dev/null +++ b/amd_smi/include/impl/amd_smi_lib_loader.h @@ -0,0 +1,82 @@ +/* +Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_ +#define AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_ +#include +#include +#include +#include +#include // NOLINT(build/c++11) +#include "amd_smi.h" + + +namespace amd { +namespace smi { +class AMDSmiLibraryLoader { + public: + AMDSmiLibraryLoader(); + + amdsmi_status_t load(const char* filename); + + template amdsmi_status_t load_symbol(T* func_handler, + const char* func_name); + + + amdsmi_status_t unload(); + + ~AMDSmiLibraryLoader(); + + private: + void* libHandler_; + std::mutex library_mutex_; +}; + +template amdsmi_status_t AMDSmiLibraryLoader::load_symbol( + T* func_handler, + const char* func_name) { + if (!libHandler_) { + return AMDSMI_STATUS_FAIL_LOAD_MODULE; + } + + if (!func_handler || !func_name) { + return AMDSMI_STATUS_FAIL_LOAD_SYMBOL; + } + + std::lock_guard guard(library_mutex_); + + *reinterpret_cast(func_handler) = + dlsym(libHandler_, func_name); + if (*func_handler == nullptr) { + char* error = dlerror(); + std::cerr << "AMDSmiLibraryLoader: Fail to load the symbol " + << func_name << ": " << error << std::endl; + return AMDSMI_STATUS_FAIL_LOAD_SYMBOL; + } + + return AMDSMI_STATUS_SUCCESS; +} + +} // namespace smi +} // namespace amd + + +#endif // AMD_SMI_INCLUDE_IMPL_AMD_SMI_LIB_LOADER_H_ diff --git a/amd_smi/src/amd_smi.cc b/amd_smi/src/amd_smi.cc index 7e96e4990d..ee5c596211 100644 --- a/amd_smi/src/amd_smi.cc +++ b/amd_smi/src/amd_smi.cc @@ -97,8 +97,25 @@ amdsmi_shut_down() { amdsmi_status_t amdsmi_status_string(amdsmi_status_t status, const char **status_string) { - return static_cast( + if (status <= AMDSMI_LIB_START) { + return static_cast( rsmi_status_string(static_cast(status), status_string)); + } + switch (status) { + case AMDSMI_STATUS_FAIL_LOAD_MODULE: + *status_string = "FAIL_LOAD_MODULE: Fail to load module."; + break; + case AMDSMI_STATUS_FAIL_LOAD_SYMBOL: + *status_string = "FAIL_LOAD_SYMOBL: Fail to load symobl."; + break; + case AMDSMI_STATUS_DRM_ERROR: + *status_string = "DRM_ERROR: Fail to run function in libdrm."; + break; + default: + *status_string = "An unknown error occurred"; + return AMDSMI_STATUS_UNKNOWN_ERROR; + } + return AMDSMI_STATUS_SUCCESS; } amdsmi_status_t amdsmi_get_socket_handles(uint32_t *socket_count, @@ -114,7 +131,7 @@ amdsmi_status_t amdsmi_get_socket_handles(uint32_t *socket_count, return AMDSMI_STATUS_SUCCESS; } -amdsmi_status_t amdsmi_get_socket_identifier( +amdsmi_status_t amdsmi_get_socket_info( amdsmi_socket_handle socket_handle, char *name, size_t len) { if (socket_handle == nullptr || name == nullptr) { @@ -202,23 +219,18 @@ amdsmi_status_t amdsmi_fb_usage_get(amdsmi_device_handle device_handle, amd::smi::AMDSmiGPUDevice* gpu_device = static_cast(device_handle); - int ret = 0; struct drm_amdgpu_info_vram_gtt gtt; uint64_t vram_used = 0; - ret = gpu_device->amdgpu_query_info(AMDGPU_INFO_VRAM_GTT, + r = gpu_device->amdgpu_query_info(AMDGPU_INFO_VRAM_GTT, sizeof(struct drm_amdgpu_memory_info), >t); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; *fb_total = static_cast(gtt.vram_size / (1024 * 1024)); - ret = gpu_device->amdgpu_query_info(AMDGPU_INFO_VRAM_USAGE, + r = gpu_device->amdgpu_query_info(AMDGPU_INFO_VRAM_USAGE, sizeof(vram_used), &vram_used); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; *fb_used = static_cast(vram_used / (1024 * 1024)); @@ -247,74 +259,58 @@ amdsmi_status_t amdsmi_get_caps_info(amdsmi_device_handle device_handle, struct drm_amdgpu_info_device device; unsigned count, j; - int ret = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO, + r = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO, sizeof(struct drm_amdgpu_info_device), &device); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; info->gfx.gfxip_cu_count = device.cu_active_number; - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_INFO, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_INFO, AMDGPU_HW_IP_GFX, sizeof(ip), &ip); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; info->gfx.gfxip_major = ip.hw_ip_version_major; info->gfx.gfxip_minor = ip.hw_ip_version_minor; - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, AMDGPU_HW_IP_GFX, sizeof(unsigned), &count); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; info->gfx_ip_count = count; - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, AMDGPU_HW_IP_DMA, sizeof(unsigned), &count); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; info->dma_ip_count = count; count = 0; /* Count multimedia engines */ - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, AMDGPU_HW_IP_UVD, sizeof(struct drm_amdgpu_info_device), &uvd); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; for (j = 0; j < uvd; j++) info->mm.mm_ip_list[count++] = MM_UVD; - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, AMDGPU_HW_IP_UVD_ENC, sizeof(struct drm_amdgpu_info_device), &uvd_enc); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; for (j = 0; j < uvd_enc; j++) info->mm.mm_ip_list[count++] = MM_UVD; - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, AMDGPU_HW_IP_VCE, sizeof(struct drm_amdgpu_info_device), &vce); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; for (j = 0; j < vce; j++) info->mm.mm_ip_list[count++] = MM_VCE; /* VCN is shared DEC/ENC check only ENC */ - ret = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, + r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_COUNT, AMDGPU_HW_IP_VCN_ENC, sizeof(struct drm_amdgpu_info_device), &vcn_enc); - if (ret) { - return AMDSMI_STATUS_INTERNAL_EXCEPTION; - } + if (r != AMDSMI_STATUS_SUCCESS) return r; for (j = 0; j < vcn_enc; j++) info->mm.mm_ip_list[count++] = MM_VCN; diff --git a/amd_smi/src/amd_smi_drm.cc b/amd_smi/src/amd_smi_drm.cc index 9d6fc4290d..b09f975b9d 100644 --- a/amd_smi/src/amd_smi_drm.cc +++ b/amd_smi/src/amd_smi_drm.cc @@ -62,6 +62,30 @@ amdsmi_status_t AMDSmiDrm::init() { struct dirent *dir = nullptr; int fd = -1; + amdsmi_status_t status = lib_loader_.load("libdrm.so"); + if (status != AMDSMI_STATUS_SUCCESS) { + return status; + } + // load symbol from libdrm + drm_cmd_write_ = nullptr; + status = lib_loader_.load_symbol(&drm_cmd_write_, "drmCommandWrite"); + if (status != AMDSMI_STATUS_SUCCESS) { + return status; + } + using drmGetVersionType = drmVersionPtr (*)(int); // drmGetVersion + using drmFreeVersionType = void (*)(drmVersionPtr); // drmFreeVersion + drmGetVersionType drm_get_version = nullptr; + drmFreeVersionType drm_free_version = nullptr; + status = lib_loader_.load_symbol(&drm_get_version, "drmGetVersion"); + if (status != AMDSMI_STATUS_SUCCESS) { + return status; + } + status = lib_loader_.load_symbol(&drm_free_version, "drmFreeVersion"); + if (status != AMDSMI_STATUS_SUCCESS) { + return status; + } + + auto d = dir_ptr(opendir("/dev/dri/"), &closedir); if (d == nullptr) return AMDSMI_STATUS_INIT_ERROR; @@ -74,7 +98,7 @@ amdsmi_status_t AMDSmiDrm::init() { fd = open(name.get(), O_RDWR | O_CLOEXEC); if (fd < 0) continue; - auto version = drm_version_ptr(drmGetVersion(fd), &drmFreeVersion); + auto version = drm_version_ptr(drm_get_version(fd), drm_free_version); if (strcmp("amdgpu", version->name) || strstr(name.get(), "render") == nullptr) { close(fd); @@ -92,11 +116,13 @@ amdsmi_status_t AMDSmiDrm::cleanup() { close(drm_fds_[i]); } drm_fds_.clear(); + lib_loader_.unload(); return AMDSMI_STATUS_SUCCESS; } -int AMDSmiDrm::amdgpu_query_info(int fd, unsigned info_id, +amdsmi_status_t AMDSmiDrm::amdgpu_query_info(int fd, unsigned info_id, unsigned size, void *value) { + if (drm_cmd_write_ == nullptr) return AMDSMI_STATUS_NOT_SUPPORTED; std::lock_guard guard(drm_mutex_); struct drm_amdgpu_info request; @@ -104,12 +130,16 @@ int AMDSmiDrm::amdgpu_query_info(int fd, unsigned info_id, request.return_pointer = (uintptr_t)value; request.return_size = size; request.query = info_id; - return drmCommandWrite(fd, DRM_AMDGPU_INFO, + int status = drm_cmd_write_(fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); + if (status == 0) return AMDSMI_STATUS_SUCCESS; + return AMDSMI_STATUS_DRM_ERROR; } -int AMDSmiDrm::amdgpu_query_fw(int fd, unsigned info_id, unsigned fw_type, - unsigned size, void *value) { +amdsmi_status_t AMDSmiDrm::amdgpu_query_fw(int fd, unsigned info_id, + unsigned fw_type, unsigned size, void *value) { + if (drm_cmd_write_ == nullptr) return AMDSMI_STATUS_NOT_SUPPORTED; + std::lock_guard guard(drm_mutex_); struct drm_amdgpu_info request; @@ -118,12 +148,16 @@ int AMDSmiDrm::amdgpu_query_fw(int fd, unsigned info_id, unsigned fw_type, request.return_size = size; request.query = info_id; request.query_fw.fw_type = fw_type; - return drmCommandWrite(fd, DRM_AMDGPU_INFO, &request, + int status = drm_cmd_write_(fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); + if (status == 0) return AMDSMI_STATUS_SUCCESS; + return AMDSMI_STATUS_DRM_ERROR; } -int AMDSmiDrm::amdgpu_query_hw_ip(int fd, unsigned info_id, unsigned hw_ip_type, - unsigned size, void *value) { +amdsmi_status_t AMDSmiDrm::amdgpu_query_hw_ip(int fd, unsigned info_id, + unsigned hw_ip_type, unsigned size, void *value) { + if (drm_cmd_write_ == nullptr) return AMDSMI_STATUS_NOT_SUPPORTED; + std::lock_guard guard(drm_mutex_); struct drm_amdgpu_info request; @@ -132,11 +166,15 @@ int AMDSmiDrm::amdgpu_query_hw_ip(int fd, unsigned info_id, unsigned hw_ip_type, request.return_size = size; request.query = info_id; request.query_hw_ip.type = hw_ip_type; - return drmCommandWrite(fd, DRM_AMDGPU_INFO, &request, + int status = drm_cmd_write_(fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); + if (status == 0) return AMDSMI_STATUS_SUCCESS; + return AMDSMI_STATUS_DRM_ERROR; } -int AMDSmiDrm::amdgpu_query_vbios(int fd, void *info) { +amdsmi_status_t AMDSmiDrm::amdgpu_query_vbios(int fd, void *info) { + if (drm_cmd_write_ == nullptr) return AMDSMI_STATUS_NOT_SUPPORTED; + std::lock_guard guard(drm_mutex_); struct drm_amdgpu_info request; @@ -145,8 +183,10 @@ int AMDSmiDrm::amdgpu_query_vbios(int fd, void *info) { request.return_size = sizeof(drm_amdgpu_info_vbios); request.query = AMDGPU_INFO_VBIOS; request.vbios_info.type = AMDGPU_INFO_VBIOS_INFO; - return drmCommandWrite(fd, DRM_AMDGPU_INFO, &request, + int status = drm_cmd_write_(fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); + if (status == 0) return AMDSMI_STATUS_SUCCESS; + return AMDSMI_STATUS_DRM_ERROR; } diff --git a/amd_smi/src/amd_smi_gpu_device.cc b/amd_smi/src/amd_smi_gpu_device.cc index 3bae490d30..868507f997 100644 --- a/amd_smi/src/amd_smi_gpu_device.cc +++ b/amd_smi/src/amd_smi_gpu_device.cc @@ -52,7 +52,7 @@ uint32_t AMDSmiGPUDevice::get_gpu_id() const { return gpu_id_; } -int AMDSmiGPUDevice::amdgpu_query_info(unsigned info_id, +amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_info(unsigned info_id, unsigned size, void *value) const { int fd = drm_.get_drm_fd_by_index(gpu_id_); if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED; @@ -60,21 +60,23 @@ int AMDSmiGPUDevice::amdgpu_query_info(unsigned info_id, return drm_.amdgpu_query_info(fd, info_id, size, value); } -int AMDSmiGPUDevice::amdgpu_query_hw_ip(unsigned info_id, +amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_hw_ip(unsigned info_id, unsigned hw_ip_type, unsigned size, void *value) const { int fd = drm_.get_drm_fd_by_index(gpu_id_); if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED; return drm_.amdgpu_query_hw_ip(fd, info_id, hw_ip_type, size, value); } -int AMDSmiGPUDevice::amdgpu_query_fw(unsigned info_id, unsigned fw_type, - unsigned size, void *value) const { + +amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_fw(unsigned info_id, + unsigned fw_type, unsigned size, void *value) const { int fd = drm_.get_drm_fd_by_index(gpu_id_); if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED; return drm_.amdgpu_query_fw(fd, info_id, fw_type, size, value); } -int AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const { + +amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const { int fd = drm_.get_drm_fd_by_index(gpu_id_); if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED; diff --git a/amd_smi/src/amd_smi_lib_loader.cc b/amd_smi/src/amd_smi_lib_loader.cc new file mode 100644 index 0000000000..e77e383103 --- /dev/null +++ b/amd_smi/src/amd_smi_lib_loader.cc @@ -0,0 +1,87 @@ +/* + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2022, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ + +#include "impl/amd_smi_lib_loader.h" +#include + +namespace amd { +namespace smi { + +AMDSmiLibraryLoader::AMDSmiLibraryLoader(): libHandler_(nullptr) { +} + +amdsmi_status_t AMDSmiLibraryLoader::load(const char* filename) { + if (filename == nullptr) { + return AMDSMI_STATUS_FAIL_LOAD_MODULE; + } + if (libHandler_) { + unload(); + } + + std::lock_guard guard(library_mutex_); + libHandler_ = dlopen(filename, RTLD_LAZY); + if (!libHandler_) { + char* error = dlerror(); + std::cerr << "Fail to open " << filename <<": " << error + << std::endl; + return AMDSMI_STATUS_FAIL_LOAD_MODULE; + } + + return AMDSMI_STATUS_SUCCESS; +} + +amdsmi_status_t AMDSmiLibraryLoader::unload() { + std::lock_guard guard(library_mutex_); + if (libHandler_) { + dlclose(libHandler_); + libHandler_ = nullptr; + } + return AMDSMI_STATUS_SUCCESS; +} + +AMDSmiLibraryLoader::~AMDSmiLibraryLoader() { + unload(); +} + +} // namespace rdc +} // namespace amd diff --git a/oam/CMakeLists.txt b/oam/CMakeLists.txt index 10f9f01b9b..4eb70221ce 100644 --- a/oam/CMakeLists.txt +++ b/oam/CMakeLists.txt @@ -72,7 +72,7 @@ target_include_directories(${OAM_EXAMPLE_EXE} PRIVATE ${OAM_INC_LIST}) target_link_libraries(${OAM_EXAMPLE_EXE} ${OAM_TARGET}) add_library(${OAM_TARGET} ${CMN_SRC_LIST} ${OAM_SRC_LIST} ${CMN_INC_LIST} ${OAM_INC_LIST}) -target_link_libraries(${OAM_TARGET} pthread rt drm) +target_link_libraries(${OAM_TARGET} pthread rt dl) target_include_directories(${OAM_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ${COMMON_PROJ_ROOT}/common/shared_mutex) diff --git a/rocm_smi/CMakeLists.txt b/rocm_smi/CMakeLists.txt index a03aa30c64..c3311ddeaf 100755 --- a/rocm_smi/CMakeLists.txt +++ b/rocm_smi/CMakeLists.txt @@ -76,7 +76,7 @@ add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc") target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET}) add_library(${ROCM_SMI_TARGET} ${CMN_SRC_LIST} ${SMI_SRC_LIST} ${CMN_INC_LIST} ${SMI_INC_LIST}) -target_link_libraries(${ROCM_SMI_TARGET} pthread rt drm) +target_link_libraries(${ROCM_SMI_TARGET} pthread rt dl) target_include_directories(${ROCM_SMI_TARGET} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${COMMON_PROJ_ROOT}/common/shared_mutex)