Refactor rsmi to support oam
Change-Id: Idc524e01ba06eb5c8d1682becaf5bf8ced5bffcf
[ROCm/rocm_smi_lib commit: 6594f8f58b]
Tento commit je obsažen v:
@@ -3,6 +3,8 @@
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.5.0)
|
||||
|
||||
set(AMD_SMI_LIBS_TARGET "amd_smi_libraries")
|
||||
|
||||
## Set default module path if not already set
|
||||
if(NOT DEFINED CMAKE_MODULE_PATH)
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/")
|
||||
@@ -15,6 +17,18 @@ set(ROCM_SMI_COMPONENT "lib${ROCM_SMI}")
|
||||
set(ROCM_SMI_TARGET "${ROCM_SMI}64")
|
||||
set(ROCM_SMI_LIB_NAME "lib${ROCM_SMI_TARGET}")
|
||||
|
||||
# provide git to utilities
|
||||
find_program (GIT NAMES git)
|
||||
|
||||
## Setup the package version based on git tags.
|
||||
set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver")
|
||||
get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
message("Package version: ${PKG_VERSION_STR}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_PATCH "0")
|
||||
set(${AMD_SMI_LIBS_TARGET}_VERSION_BUILD "0")
|
||||
|
||||
# The following default version values should be updated as appropriate for
|
||||
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
|
||||
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
|
||||
@@ -24,54 +38,18 @@ set(PKG_VERSION_MINOR 0)
|
||||
set(PKG_VERSION_PATCH 0)
|
||||
set(PKG_VERSION_NUM_COMMIT 0)
|
||||
|
||||
################# Determine the library version #########################
|
||||
## Setup the package version based on git tags.
|
||||
set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver")
|
||||
set(SO_VERSION_GIT_TAG_PREFIX "rsmi_so_ver")
|
||||
|
||||
# provide git to utilities
|
||||
find_program (GIT NAMES git)
|
||||
|
||||
get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
# VERSION_* variables should be set by get_version_from_tag
|
||||
message("Package version: ${PKG_VERSION_STR}")
|
||||
|
||||
# Debian package specific variables
|
||||
# Set a default value for the package version
|
||||
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
|
||||
# VERSION_* variables should be set by get_version_from_tag
|
||||
if ( ${ROCM_PATCH_VERSION} )
|
||||
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
|
||||
else()
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
|
||||
endif ()
|
||||
set(${ROCM_SMI}_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(${ROCM_SMI}_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(${ROCM_SMI}_VERSION_PATCH "0")
|
||||
set(${ROCM_SMI}_VERSION_BUILD "0")
|
||||
message("SOVERSION: ${SO_VERSION_STRING}")
|
||||
|
||||
## Define default variable and variables for the optional build target
|
||||
## rocm_smi_lib-dev
|
||||
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
CACHE STRING "Location of rocm_smi source code.")
|
||||
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX "/opt/rocm"
|
||||
CACHE STRING "Default installation directory.")
|
||||
endif ()
|
||||
set(COMMON_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
CACHE STRING "Location source code common root.")
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm"
|
||||
CACHE STRING "Default packaging prefix.")
|
||||
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
|
||||
|
||||
project(${ROCM_SMI_TARGET})
|
||||
|
||||
# Create a configure file to get version info from within library
|
||||
configure_file(
|
||||
"${PROJECT_SOURCE_DIR}/src/${ROCM_SMI_TARGET}Config.in"
|
||||
"${PROJECT_SOURCE_DIR}/include/rocm_smi/${ROCM_SMI_TARGET}Config.h")
|
||||
|
||||
if (NOT DEFINED CPACK_PACKAGE_VENDOR)
|
||||
set(CPACK_PACKAGE_VENDOR "AMD")
|
||||
endif()
|
||||
@@ -82,14 +60,19 @@ endif()
|
||||
|
||||
if (NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
|
||||
"ROCm System Management Interface library")
|
||||
"AMD System Management libraries")
|
||||
endif()
|
||||
|
||||
if (NOT ROCM_SMI_PACKAGE)
|
||||
set(ROCM_SMI_PACKAGE rocm_smi_lib64)
|
||||
if (NOT AMD_SMI_PACKAGE)
|
||||
set(AMD_SMI_PACKAGE rocm-smi-lib64)
|
||||
endif()
|
||||
|
||||
set(CPACK_PACKAGE_FILE_NAME "${ROCM_SMI_PACKAGE}-${PKG_VERSION_STR}")
|
||||
set(CPACK_PACKAGE_FILE_NAME "${AMD_SMI_PACKAGE}-${PKG_VERSION_STR}")
|
||||
|
||||
project(${AMD_SMI_LIBS_TARGET})
|
||||
|
||||
set(COMMON_PROJ_ROOT ${PROJECT_SOURCE_DIR})
|
||||
|
||||
## Verbose output.
|
||||
set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
|
||||
@@ -128,108 +111,47 @@ else ()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
|
||||
endif ()
|
||||
|
||||
set(SRC_DIR "src")
|
||||
set(INC_DIR "include/rocm_smi")
|
||||
set(COMMON_SRC_DIR "${PROJECT_SOURCE_DIR}/src")
|
||||
set(COMMON_INC_DIR "${PROJECT_SOURCE_DIR}/include/rocm_smi")
|
||||
set(SHR_MUTEX_DIR "${PROJECT_SOURCE_DIR}/third_party/shared_mutex")
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/shared_mutex)
|
||||
set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_counters.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_kfd.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_io_link.cc")
|
||||
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.cc")
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/third_party/shared_mutex)
|
||||
|
||||
set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_monitor.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_counters.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_kfd.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_io_link.h")
|
||||
set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h")
|
||||
set(CMN_SRC_LIST "${COMMON_SRC_DIR}/rocm_smi_device.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_main.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_monitor.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_power_mon.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_utils.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_counters.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_kfd.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_io_link.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${SHR_MUTEX_DIR}/shared_mutex.cc")
|
||||
|
||||
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
|
||||
set(CMN_INC_LIST "${COMMON_INC_DIR}/rocm_smi_device.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_main.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_monitor.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_power_mon.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_utils.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_common.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_exception.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_counters.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_kfd.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_io_link.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${SHR_MUTEX_DIR}/shared_mutex.h")
|
||||
|
||||
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
|
||||
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST})
|
||||
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
|
||||
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
|
||||
SOVERSION "${VERSION_MAJOR}")
|
||||
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
|
||||
VERSION "${SO_VERSION_STRING}")
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
add_custom_command(
|
||||
TARGET ${ROCM_SMI_TARGET}
|
||||
POST_BUILD COMMAND ${CMAKE_STRIP} lib${ROCM_SMI_TARGET}.so)
|
||||
endif ()
|
||||
|
||||
## Add symlinks from top level ROCm lib dir to rocm-smi lib so files
|
||||
add_custom_target ( so-link ALL WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so so-link )
|
||||
add_custom_target ( so-major-link ALL WORKING_DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND}
|
||||
-E create_symlink
|
||||
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR}
|
||||
so-major-link )
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-link DESTINATION lib RENAME
|
||||
${ROCM_SMI_LIB_NAME}.so )
|
||||
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-major-link DESTINATION lib
|
||||
RENAME ${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR} )
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
install(TARGETS ${ROCM_SMI_TARGET}
|
||||
LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
|
||||
install(FILES ${SOURCE_DIR}/include/rocm_smi/rocm_smi.h
|
||||
DESTINATION rocm_smi/include/rocm_smi)
|
||||
install(FILES ${SOURCE_DIR}/include/rocm_smi/kfd_ioctl.h
|
||||
DESTINATION rocm_smi/include/rocm_smi)
|
||||
add_subdirectory("rocm_smi")
|
||||
add_subdirectory("oam")
|
||||
|
||||
# Generate Doxygen documentation
|
||||
find_package(Doxygen)
|
||||
find_package(LATEX COMPONENTS PDFLATEX)
|
||||
if (DOXYGEN_FOUND AND LATEX_FOUND)
|
||||
set (RSMI_MANUAL_NAME "ROCm_SMI_Manual")
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
|
||||
"${INC_DIR}/rocm_smi.h"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
|
||||
COMMAND make > /dev/null
|
||||
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/docs/${RSMI_MANUAL_NAME}_new.pdf
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/latex)
|
||||
|
||||
add_custom_target(docs DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf)
|
||||
|
||||
add_dependencies(${ROCM_SMI_TARGET} docs)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
|
||||
DESTINATION ${ROCM_SMI}/docs/${RSMI_MANUAL_NAME}.pdf)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/docs/README.md
|
||||
DESTINATION ${ROCM_SMI}/docs/)
|
||||
else()
|
||||
message("Doxygen or Latex is not found. Will not generate documents.")
|
||||
endif(DOXYGEN_FOUND AND LATEX_FOUND)
|
||||
# install(TARGETS ${ROCM_SMI_TARGET}
|
||||
# LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
|
||||
# install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/rocm_smi.h
|
||||
# DESTINATION rocm_smi/include/rocm_smi)
|
||||
|
||||
## Add the packaging directives for the runtime library.
|
||||
|
||||
@@ -237,7 +159,7 @@ endif(DOXYGEN_FOUND AND LATEX_FOUND)
|
||||
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm")
|
||||
set (CPACK_DEBIAN_PACKAGE_NAME ${ROCM_SMI_PACKAGE})
|
||||
set (CPACK_DEBIAN_PACKAGE_NAME ${AMD_SMI_PACKAGE})
|
||||
set (CPACK_DEBIAN_PACKAGE_VERSION ${PKG_VERSION_STR})
|
||||
|
||||
# RPM package specific variables
|
||||
@@ -245,9 +167,8 @@ set(CPACK_RPM_PRE_INSTALL_SCRIPT_FILE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post")
|
||||
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun")
|
||||
set (CPACK_RPM_PACKAGE_NAME ${ROCM_SMI_PACKAGE})
|
||||
set (CPACK_RPM_PACKAGE_NAME ${AMD_SMI_PACKAGE})
|
||||
set (CPACK_RPM_PACKAGE_VERSION ${PKG_VERSION_STR})
|
||||
|
||||
include (CPack)
|
||||
|
||||
|
||||
|
||||
@@ -103,7 +103,7 @@ endfunction()
|
||||
|
||||
function(num_change_since_prev_pkg VERSION_PREFIX)
|
||||
find_program(get_commits NAMES version_util.sh
|
||||
PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules)
|
||||
PATHS ${COMMON_PROJ_ROOT}/cmake_modules)
|
||||
if (get_commits)
|
||||
execute_process( COMMAND ${get_commits} -c ${VERSION_PREFIX}
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
|
||||
@@ -50,6 +50,76 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#define CHECK_DV_IND_RANGE \
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
|
||||
if (dv_ind >= smi.monitor_devices().size()) { \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} \
|
||||
|
||||
#define GET_DEV_FROM_INDX \
|
||||
CHECK_DV_IND_RANGE \
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
|
||||
assert(dev != nullptr);
|
||||
|
||||
|
||||
#define GET_DEV_AND_KFDNODE_FROM_INDX \
|
||||
GET_DEV_FROM_INDX \
|
||||
std::shared_ptr<amd::smi::KFDNode> kfd_node; \
|
||||
if (smi.kfd_node_map().find(dev->kfd_gpu_id()) == \
|
||||
smi.kfd_node_map().end()) { \
|
||||
return RSMI_INITIALIZATION_ERROR; \
|
||||
} \
|
||||
kfd_node = smi.kfd_node_map()[dev->kfd_gpu_id()];
|
||||
|
||||
#define REQUIRE_ROOT_ACCESS \
|
||||
if (amd::smi::RocmSMI::getInstance().euid()) { \
|
||||
return RSMI_STATUS_PERMISSION; \
|
||||
}
|
||||
|
||||
#define DEVICE_MUTEX \
|
||||
amd::smi::pthread_wrap _pw(*amd::smi::GetMutex(dv_ind)); \
|
||||
amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance(); \
|
||||
bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1); \
|
||||
amd::smi::ScopedPthread _lock(_pw, blocking_); \
|
||||
if (!blocking_ && _lock.mutex_not_acquired()) { \
|
||||
return RSMI_STATUS_BUSY; \
|
||||
}
|
||||
|
||||
/* This group of macros is used to facilitate checking of support for rsmi_dev*
|
||||
* "getter" functions. When the return buffer is set to nullptr, the macro will
|
||||
* check the previously gathered device support data to see if the function,
|
||||
* with possible variants (e.g., memory types, firware types,...) and
|
||||
* subvariants (e.g. monitors/sensors) are supported.
|
||||
*/
|
||||
// This macro assumes dev already available
|
||||
#define CHK_API_SUPPORT_ONLY(RT_PTR, VR, SUB_VR) \
|
||||
if ((RT_PTR) == nullptr) { \
|
||||
try { \
|
||||
if (!dev->DeviceAPISupported(__FUNCTION__, (VR), (SUB_VR))) { \
|
||||
return RSMI_STATUS_NOT_SUPPORTED; \
|
||||
} \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} catch (const amd::smi::rsmi_exception& e) { \
|
||||
debug_print( \
|
||||
"Exception caught when checking if API is supported %s.\n", \
|
||||
e.what()); \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_SUPPORT(RT_PTR, VR, SUB_VR) \
|
||||
GET_DEV_FROM_INDX \
|
||||
CHK_API_SUPPORT_ONLY((RT_PTR), (VR), (SUB_VR))
|
||||
|
||||
#define CHK_SUPPORT_NAME_ONLY(RT_PTR) \
|
||||
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT) \
|
||||
|
||||
#define CHK_SUPPORT_VAR(RT_PTR, VR) \
|
||||
CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT) \
|
||||
|
||||
#define CHK_SUPPORT_SUBVAR_ONLY(RT_PTR, SUB_VR) \
|
||||
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR)) \
|
||||
|
||||
#define DBG_FILE_ERROR(FN, WR_STR) \
|
||||
if (env_ && env_->debug_output_bitfield & RSMI_DEBUG_SYSFS_FILE_PATHS) { \
|
||||
std::cout << "*****" << __FUNCTION__ << std::endl; \
|
||||
|
||||
@@ -47,6 +47,9 @@
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define debug_print(fmt, ...) \
|
||||
@@ -62,6 +65,8 @@
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
pthread_mutex_t *GetMutex(uint32_t dv_ind);
|
||||
|
||||
int SameFile(const std::string fileA, const std::string fileB);
|
||||
bool FileExists(char const *filename);
|
||||
int isRegularFile(std::string fname, bool *is_reg);
|
||||
@@ -71,6 +76,12 @@ int WriteSysfsStr(std::string path, std::string val);
|
||||
|
||||
bool IsInteger(const std::string & n_str);
|
||||
|
||||
rsmi_status_t handleException();
|
||||
rsmi_status_t
|
||||
GetDevValueVec(amd::smi::DevInfoTypes type,
|
||||
uint32_t dv_ind, std::vector<std::string> *val_vec);
|
||||
rsmi_status_t ErrnoToRsmiStatus(uint32_t err);
|
||||
|
||||
struct pthread_wrap {
|
||||
public:
|
||||
explicit pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {}
|
||||
|
||||
Spustitelný soubor
+108
@@ -0,0 +1,108 @@
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" CMake OAM (Library) ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
## Verbose output.
|
||||
set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
|
||||
# Required Defines first:
|
||||
|
||||
message("")
|
||||
message("Build Configuration:")
|
||||
# message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
|
||||
# message("------------Compiler: " ${CMAKE_CXX_COMPILER})
|
||||
# message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
|
||||
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
|
||||
# message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
|
||||
# message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
|
||||
# message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
|
||||
# message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
|
||||
# message("--------RSMI Inc Dir: " ${OAM_INC_DIR})
|
||||
# message("")
|
||||
|
||||
set(OAM_ROOT "${PROJECT_SOURCE_DIR}/oam")
|
||||
set(OAM_NAME "oam")
|
||||
set(OAM_COMPONENT "lib${OAM_NAME}")
|
||||
set(OAM_TARGET "${OAM_NAME}")
|
||||
|
||||
################# Determine the library version #########################
|
||||
set(SO_VERSION_GIT_TAG_PREFIX "oam_so_ver")
|
||||
|
||||
# VERSION_* variables should be set by get_version_from_tag
|
||||
message("Package version: ${PKG_VERSION_STR}")
|
||||
|
||||
# Debian package specific variables
|
||||
# Set a default value for the package version
|
||||
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
|
||||
# VERSION_* variables should be set by get_version_from_tag
|
||||
if ( ${ROCM_PATCH_VERSION} )
|
||||
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
|
||||
else()
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
|
||||
endif ()
|
||||
set(${OAM_NAME}_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(${OAM_NAME}_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(${OAM_NAME}_VERSION_PATCH "0")
|
||||
set(${OAM_NAME}_VERSION_BUILD "0")
|
||||
message("SOVERSION: ${SO_VERSION_STRING}")
|
||||
|
||||
|
||||
# Create a configure file to get version info from within library
|
||||
configure_file(
|
||||
"${OAM_ROOT}/src/${OAM_TARGET}Config.in"
|
||||
"${OAM_ROOT}/include/oam/${OAM_TARGET}Config.h")
|
||||
|
||||
set(OAM_SRC_DIR "src")
|
||||
set(OAM_INC_DIR "include")
|
||||
set(OAM_DOCS_DIR "docs")
|
||||
|
||||
set(OAM_SRC_LIST ${CMN_SRC_LIST} "${OAM_SRC_DIR}/amd_oam.cc")
|
||||
|
||||
set(OAM_INC_LIST ${COMMON_INC_DIR} "${OAM_INC_DIR}")
|
||||
set(OAM_EXAMPLE_EXE "oam_ex")
|
||||
|
||||
add_executable(${OAM_EXAMPLE_EXE} "example/oam_example.c")
|
||||
target_include_directories(${OAM_EXAMPLE_EXE} PRIVATE ${OAM_INC_LIST})
|
||||
target_link_libraries(${OAM_EXAMPLE_EXE} ${OAM_TARGET})
|
||||
add_library(${OAM_TARGET} SHARED ${CMN_SRC_LIST} ${OAM_SRC_LIST}
|
||||
${CMN_INC_LIST} ${OAM_INC_LIST})
|
||||
target_link_libraries(${OAM_TARGET} pthread rt)
|
||||
target_include_directories(${OAM_TARGET} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include ${COMMON_PROJ_ROOT}/common/shared_mutex)
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${OAM_TARGET} PROPERTY
|
||||
SOVERSION "${VERSION_MAJOR}")
|
||||
set_property(TARGET ${OAM_TARGET} PROPERTY
|
||||
VERSION "${SO_VERSION_STRING}")
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
add_custom_command(
|
||||
TARGET ${OAM_TARGET}
|
||||
POST_BUILD COMMAND ${CMAKE_STRIP} lib${OAM_TARGET}.so)
|
||||
endif ()
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
install(TARGETS ${OAM_TARGET}
|
||||
LIBRARY DESTINATION ${OAM_NAME}/lib COMPONENT ${OAM_COMPONENT})
|
||||
install(FILES ${COMMON_SRC_ROOT}/oam/include/oam/oam_mapi.h
|
||||
${COMMON_SRC_ROOT}/oam/include/oam/amd_oam.h
|
||||
DESTINATION oam/include/oam)
|
||||
|
||||
# Generate Doxygen documentation
|
||||
if (DOXYGEN_FOUND)
|
||||
configure_file(${OAM_DOCS_DIR}/docs/rsmi_doxygen.cfg
|
||||
${OAM_DOCS_DIR}/Doxyfile @ONLY)
|
||||
add_custom_target(doc
|
||||
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMENT "Generating AMD OAM API documentation with Doxygen" VERBATIM)
|
||||
endif(DOXYGEN_FOUND)
|
||||
|
||||
Spustitelný soubor
+30
@@ -0,0 +1,30 @@
|
||||
#include <stdio.h>
|
||||
#include "oam/oam_mapi.h"
|
||||
#include "oam/amd_oam.h"
|
||||
|
||||
const oam_ops_t amd_oam_ops = {
|
||||
.init = amdoam_init,
|
||||
.free = amdoam_free,
|
||||
// .get_mapi_version = amdoam_get_mapi_version,
|
||||
.discover_devices = amdoam_discover_devices,
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
uint32_t dev_cnt = 0;
|
||||
oam_mapi_version_t version;
|
||||
|
||||
if (amd_oam_ops.init(version)) {
|
||||
printf("init failed\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// amd_oam_ops.get_mapi_version(&version);
|
||||
if (!amd_oam_ops.discover_devices(&dev_cnt))
|
||||
printf("%d AMD devices are discovered\n", dev_cnt);
|
||||
|
||||
amd_oam_ops.free();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Spustitelný soubor
+43
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2020 Open Compute Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef OAM_INCLUDE_OAM_AMD_OAM_H_
|
||||
#define OAM_INCLUDE_OAM_AMD_OAM_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#include <cstdint>
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#endif // __cplusplus
|
||||
|
||||
int amdoam_init(oam_mapi_version_t version);
|
||||
int amdoam_free(void);
|
||||
// int amdoam_get_mapi_version(oam_mapi_version_t *version);
|
||||
int amdoam_discover_devices(int *device_count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
#endif // OAM_INCLUDE_OAM_AMD_OAM_H_
|
||||
Spustitelný soubor
+647
@@ -0,0 +1,647 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2020 Open Compute Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef OAM_INCLUDE_OAM_OAM_MAPI_H_
|
||||
#define OAM_INCLUDE_OAM_OAM_MAPI_H_
|
||||
|
||||
/**
|
||||
* \file oam_mapi.h
|
||||
* \brief OAM management and monitoring library API definitions
|
||||
*/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
|
||||
/**
|
||||
* \struct oam_mapi_version_t
|
||||
* \brief OAM library API version
|
||||
* \details TBD
|
||||
* All the libraries versions are expected to be backward compatible.
|
||||
* The major version increment indicates a new API has been added.
|
||||
* Minor version increment indicates an interface change.
|
||||
*/
|
||||
typedef struct oam_mapi_version {
|
||||
uint32_t major;
|
||||
uint32_t minor;
|
||||
} oam_mapi_version_t;
|
||||
|
||||
/**
|
||||
* \struct oam_dev_properties_t
|
||||
* \brief Local identifier for the device
|
||||
* \details Immutable device identifier
|
||||
* This is unique within the chassis.
|
||||
*/
|
||||
typedef struct oam_dev_id {
|
||||
/*!< local identifier for the device */
|
||||
int device_id;
|
||||
} oam_dev_id_t;
|
||||
|
||||
/**
|
||||
* \struct oam_dev_properties_t
|
||||
* \brief Network identifier for the device
|
||||
* \details Immutable network identifier for the device.
|
||||
* This is unique across the entire network.
|
||||
*/
|
||||
typedef struct oam_net_dev_id {
|
||||
/*!< unique network identifier for the device */
|
||||
int network_id;
|
||||
} oam_net_dev_id_t;
|
||||
|
||||
/*
|
||||
* various lengths for device properties
|
||||
*/
|
||||
#define DEVICE_VENDOR_LEN 128
|
||||
#define DEVICE_NAME_LEN 128
|
||||
#define DEVICE_SKU_LEN 128
|
||||
#define BOARD_NAME_LEN 128
|
||||
#define BOARD_REVISION_LEN 128
|
||||
#define BOARD_SERIAL_NUM_LEN 128
|
||||
|
||||
/**
|
||||
* \struct oam_dev_properties_t
|
||||
* \brief TBD
|
||||
* \details TBD
|
||||
*/
|
||||
typedef struct oam_dev_properties {
|
||||
/*!< unique network identifier for the device */
|
||||
oam_dev_id_t device_id;
|
||||
/*!< vendor name */
|
||||
char device_vendor[DEVICE_VENDOR_LEN];
|
||||
/*!< Device name */
|
||||
char device_name[DEVICE_NAME_LEN];
|
||||
/*!< SKU name */
|
||||
char sku_name[DEVICE_SKU_LEN];
|
||||
/*!< Board name */
|
||||
char board_name[BOARD_NAME_LEN];
|
||||
/*!< Board revision */
|
||||
char board_revision[BOARD_REVISION_LEN];
|
||||
/*!<
|
||||
* Board Serial Number or UUID any other identifier, which can be used
|
||||
* to identify devices uniquely and physically.
|
||||
*/
|
||||
char board_serial_number[BOARD_SERIAL_NUM_LEN];
|
||||
} oam_dev_properties_t;
|
||||
|
||||
/**
|
||||
* \struct oam_sensor_count_t
|
||||
* \brief TBD
|
||||
* \details TBD
|
||||
* Various sensor related information
|
||||
*/
|
||||
typedef struct oam_sensor_count {
|
||||
uint32_t num_temperature_sensors;
|
||||
uint32_t num_power_sensors;
|
||||
uint32_t num_voltage_sensors;
|
||||
uint32_t num_current_sensors;
|
||||
uint32_t num_fans;
|
||||
} oam_sensor_count_t;
|
||||
|
||||
/**
|
||||
* \enum oam_sensor_type_t
|
||||
* \brief Sensor types
|
||||
* \details This enumerated type defines available sensors types.
|
||||
*/
|
||||
typedef enum oam_sensor_type {
|
||||
OAM_SENSOR_TYPE_POWER = 0,
|
||||
OAM_SENSOR_TYPE_VOLTAGE,
|
||||
OAM_SENSOR_TYPE_CURRENT,
|
||||
OAM_SENSOR_TYPE_TEMP,
|
||||
OAM_SENSOR_TYPE_FAN_SPEED,
|
||||
OAM_SENSOR_TYPE_UNKNOWN = 0xFF
|
||||
} oam_sensor_type_t;
|
||||
|
||||
/**
|
||||
* \enum oam_power_sensor_scale_t
|
||||
* \brief scale for power measurements
|
||||
* \details This enumerated type defines available scales for power measurements
|
||||
*/
|
||||
typedef enum oam_power_sensor_scale {
|
||||
OAM_POWER_SCALE_uW = 0,
|
||||
OAM_POWER_SCALE_mW,
|
||||
OAM_POWER_SCALE_W,
|
||||
} oam_power_sensor_scale_t;
|
||||
|
||||
/**
|
||||
* \enum oam_voltage_sensor_scale_t
|
||||
* \brief scale for voltage measurements
|
||||
* \details This enumerated type defines available scales for voltage measurements
|
||||
*/
|
||||
typedef enum oam_voltage_sensor_scale {
|
||||
OAM_VOLTAGE_SCALE_uV = 0,
|
||||
OAM_VOLTAGE_SCALE_mV,
|
||||
OAM_VOLTAGE_SCALE_V,
|
||||
} oam_voltage_sensor_scale_t;
|
||||
|
||||
/**
|
||||
* \enum oam_current_sensor_scale_t
|
||||
* \brief scale for current measurements
|
||||
* \details This enumerated type defines available scales for current measurements
|
||||
*/
|
||||
typedef enum oam_current_sensor_scale {
|
||||
OAM_CURRENT_SCALE_uA = 0,
|
||||
OAM_CURRENT_SCALE_mA,
|
||||
OAM_CURRENT_SCALE_A,
|
||||
} oam_current_sensor_scale_t;
|
||||
|
||||
/**
|
||||
* \enum oam_temp_sensor_scale_t
|
||||
* \brief scale for temp measurements
|
||||
* \details This enumerated type defines available scales for temp measurements
|
||||
*/
|
||||
typedef enum oam_temp_sensor_scale {
|
||||
OAM_TEMP_SCALE_C = 0,
|
||||
OAM_TEMP_SCALE_F
|
||||
} oam_temp_sensor_scale_t;
|
||||
|
||||
/**
|
||||
* \enum oam_fan_sensor_scale_t
|
||||
* \brief scale for power measurements
|
||||
* \details This enumerated type defines available scales for power measurements
|
||||
*/
|
||||
typedef enum oam_fan_sensor_scale {
|
||||
OAM_FAN_SPEED_Hz = 0,
|
||||
OAM_FAN_SPEED_KHz,
|
||||
OAM_FAN_SPEED_MHz
|
||||
} oam_fan_sensor_scale_t;
|
||||
|
||||
typedef union oam_sensor_scale {
|
||||
oam_power_sensor_scale_t power_scale;
|
||||
oam_voltage_sensor_scale_t volate_scale;
|
||||
oam_current_sensor_scale_t current_scale;
|
||||
oam_temp_sensor_scale_t temp_scale;
|
||||
oam_fan_sensor_scale_t fan_scale;
|
||||
} oam_sensor_scale_t;
|
||||
|
||||
/**
|
||||
* \struct oam_dev_handle_t
|
||||
* \brief Device handle
|
||||
* \details Device handle obtained using open call
|
||||
* The same handle is used by all the APIs which are used to perform
|
||||
* specific operation on that device.
|
||||
*/
|
||||
typedef struct oam_dev_handle {
|
||||
void *handle;
|
||||
} oam_dev_handle_t;
|
||||
|
||||
/**
|
||||
* \enum oam_dev_mode_t
|
||||
* \brief Device open modes
|
||||
* \details This enumerated type defines modes in which the device can be opened
|
||||
* For some operations e.g. health check user should open the device
|
||||
* in exclusive mode, so that if there are many applications using the same
|
||||
* device there are no side effects.
|
||||
*/
|
||||
typedef enum oam_dev_mode {
|
||||
OAM_DEV_MODE_EXCLUSIVE = 0,
|
||||
OAM_DEV_MODE_NONEXLUSIVE = 1,
|
||||
OAM_DEV_MODE_UNKNOWN = 0xFF
|
||||
} oam_dev_mode_t;
|
||||
|
||||
/**
|
||||
* \def OAM_SENSOR_NAME_MAX
|
||||
* \brief length of sensor name
|
||||
*/
|
||||
#define OAM_SENSOR_NAME_MAX 256
|
||||
|
||||
/**
|
||||
* \struct oam_sensor_info_t
|
||||
* \brief Sensor information
|
||||
* \details Device handle obtained using open call
|
||||
* The same handle is used by all the APIs which are used to perform
|
||||
* specific operation on that device.
|
||||
*/
|
||||
typedef struct oam_sensor_info {
|
||||
char sensor_name[OAM_SENSOR_NAME_MAX];
|
||||
oam_sensor_type_t sensor_type;
|
||||
oam_sensor_scale_t scale;
|
||||
int32_t value;
|
||||
} oam_sensor_info_t;
|
||||
|
||||
/**
|
||||
* \struct oam_dev_error_count_t
|
||||
* \brief Device error information
|
||||
* \details Various types of errors reported by device.
|
||||
*/
|
||||
typedef struct oam_dev_error_count {
|
||||
uint32_t total_error_count;
|
||||
uint32_t fatal_error_count;
|
||||
uint32_t unknown_error_count;
|
||||
uint32_t ecc_error_count;
|
||||
} oam_dev_error_count_t;
|
||||
|
||||
/**
|
||||
* \struct oam_firmware_version_t
|
||||
* \brief Device error information
|
||||
* \details Structure to store various firmware versions of OAM module
|
||||
*/
|
||||
typedef struct oam_firmware_version {
|
||||
oam_mapi_version_t device_boot_fw_version;
|
||||
oam_mapi_version_t device_fw_version;
|
||||
oam_mapi_version_t board_boot_fw_version;
|
||||
oam_mapi_version_t board_fw_version;
|
||||
} oam_firmware_version_t;
|
||||
|
||||
/**
|
||||
* \struct oam_pci_info_t
|
||||
* \brief PCI information for the device
|
||||
* \details Structure to store PCI (Domain, BDF) information of the device
|
||||
*/
|
||||
typedef struct oam_pci_info {
|
||||
uint16_t domain;
|
||||
uint8_t bus;
|
||||
uint8_t device;
|
||||
uint8_t function;
|
||||
} oam_pci_info_t;
|
||||
|
||||
/**
|
||||
* \enum oam_net_port_state_t
|
||||
* \brief Network port state
|
||||
* \details This enumerated type defines various states of the network port
|
||||
*/
|
||||
typedef enum oam_net_port_state {
|
||||
OAM_NET_PORT_DISABLED = 0,
|
||||
OAM_NET_PORT_ENABLED = 1
|
||||
} oam_net_port_state_t;
|
||||
|
||||
/**
|
||||
* \enum oam_net_port_status_t
|
||||
* \brief Network port status
|
||||
* \details This enumerated type defines various status of the network port
|
||||
*/
|
||||
typedef enum oam_net_port_status {
|
||||
OAM_NET_PORT_UP = 0,
|
||||
OAM_NET_PORT_DOWN = 1,
|
||||
} oam_net_port_status_t;
|
||||
|
||||
/**
|
||||
* \enum oam_net_port_id_t
|
||||
* \brief Network port identifiers
|
||||
* \details This enumerated type defines various identifiers for network ports
|
||||
*/
|
||||
typedef enum oam_net_port_id {
|
||||
OAM_NET_PORT0 = 0,
|
||||
OAM_NET_PORT1 = 1,
|
||||
OAM_NET_PORT2 = 2,
|
||||
OAM_NET_PORT_MAX = 0xFFFF
|
||||
} oam_net_port_id_t;
|
||||
|
||||
/**
|
||||
* \enum oam_firmware_modes_t
|
||||
* \brief Supported mode to update firmware on device
|
||||
* \details This enumerated type defines various modes which are supported by
|
||||
* the device to update firmware.
|
||||
*/
|
||||
typedef enum oam_firmware_modes {
|
||||
OAM_DOWNLOAD_ONLY = 0,
|
||||
OAM_DOWNLOAD_ACTIVATE = 1
|
||||
} oam_firmware_modes_t;
|
||||
|
||||
/**
|
||||
* \def OAM_NET_PORT_NAME
|
||||
* \brief length of network port name
|
||||
*/
|
||||
#define OAM_NET_PORT_NAME 256
|
||||
|
||||
/**
|
||||
* \struct oam_net_port_desc
|
||||
* \brief Network port description
|
||||
* \details Structure to store additional details about the network port
|
||||
*/
|
||||
typedef struct oam_net_port_desc {
|
||||
char name[OAM_NET_PORT_NAME];
|
||||
} oam_net_port_desc_t;
|
||||
|
||||
/**
|
||||
* \def OAM_DEV_HOST_NAME
|
||||
* \brief length of host name
|
||||
*/
|
||||
#define OAM_DEV_HOST_NAME 256
|
||||
|
||||
/**
|
||||
* \struct oam_net_dev_info_t
|
||||
* \brief Information about the device on a network
|
||||
* \details Structure to store additional details about the network device
|
||||
* on a particular network.
|
||||
*/
|
||||
typedef struct oam_net_dev_info {
|
||||
oam_net_dev_id_t net_dev_id;
|
||||
char host_name[OAM_DEV_HOST_NAME];
|
||||
oam_pci_info_t pci_info;
|
||||
} oam_net_dev_info_t;
|
||||
|
||||
/**
|
||||
* \struct oam_neighbour_info_t
|
||||
* \brief Information about device neighburs
|
||||
* \details Structure to store information about device neighbours on the
|
||||
* network
|
||||
*/
|
||||
typedef struct oam_neighbour_info {
|
||||
oam_net_port_id_t device_port;
|
||||
oam_net_dev_info_t device_info;
|
||||
} oam_neighbour_info_t;
|
||||
|
||||
/**
|
||||
* \enum oam_dev_tpc_id_t
|
||||
* \brief TPC identifiers
|
||||
* \details This enumerated type defines various identifiers for TPCs
|
||||
*/
|
||||
typedef enum oam_dev_tpc_id {
|
||||
OAM_DEV_TPC0,
|
||||
OAM_DEV_TPC1,
|
||||
OAM_DEV_TPC2,
|
||||
OAM_DEV_TPC_MAX
|
||||
} oam_dev_tpc_id_t;
|
||||
|
||||
/**
|
||||
* \def OAM_TPC_NAME
|
||||
* \brief length of TPC name
|
||||
*/
|
||||
#define OAM_TPC_NAME 256
|
||||
|
||||
/**
|
||||
* \struct oam_tpc_desc_t
|
||||
* \brief TPC description
|
||||
* \details Structure to store information about TPC e.g. name corresponding
|
||||
* to the id etc.
|
||||
*/
|
||||
typedef struct oam_tpc_desc {
|
||||
char name[256];
|
||||
} oam_tpc_desc_t;
|
||||
|
||||
/**
|
||||
* \struct oam_dev_tpc_stats_t
|
||||
* \brief TPC statistical information
|
||||
* \details Structure to store information about TPC statistical information
|
||||
* e.g. TPC utilization
|
||||
*/
|
||||
typedef struct oam_dev_tpc_stats {
|
||||
double util;
|
||||
} oam_dev_tpc_stats_t;
|
||||
|
||||
/**
|
||||
* \enum oam_dev_mem_id_t
|
||||
* \brief Device memory identifiers
|
||||
* \details This enumerated type defines various identifiers for device memories
|
||||
*/
|
||||
typedef enum oam_dev_mem_id {
|
||||
OAM_DEV_MEM0,
|
||||
OAM_DEV_MEM1,
|
||||
OAM_DEV_MEM2,
|
||||
OAM_DEV_MEM_MAX
|
||||
} oam_dev_mem_id_t;
|
||||
|
||||
/**
|
||||
* \struct oam_mem_desc_t
|
||||
* \brief Device memory description
|
||||
* \details Structure to store additional details about device memories port
|
||||
*/
|
||||
typedef struct oam_mem_desc {
|
||||
char name[256];
|
||||
} oam_mem_desc_t;
|
||||
|
||||
/**
|
||||
* \struct oam_dev_mem_stats_t
|
||||
* \brief Device memory statistical information
|
||||
* \details Structure to store various statastical information about device
|
||||
* memory.
|
||||
*/
|
||||
typedef struct oam_dev_mem_stats {
|
||||
uint32_t total_mem;
|
||||
uint32_t allocated_mem;
|
||||
uint32_t free_mem;
|
||||
} oam_dev_mem_stats_t;
|
||||
|
||||
/**
|
||||
* \struct oam_net_port_pkt_stats_t
|
||||
* \brief Device network port statistical information
|
||||
* \details Structure to store various statastical information about the network
|
||||
* packets on a given port.
|
||||
*/
|
||||
typedef struct oam_net_port_pkt_stats {
|
||||
uint64_t rx_count;
|
||||
uint64_t tx_count;
|
||||
uint64_t rx_errors;
|
||||
uint64_t tx_errors;
|
||||
} oam_net_port_pkt_stats_t;
|
||||
|
||||
/**
|
||||
* \struct oam_ops_t
|
||||
* \brief OAM Device operations
|
||||
* \details Structure provides list of APIs which needs to be
|
||||
* supported by the OAM library.
|
||||
*/
|
||||
typedef struct oam_ops {
|
||||
/*!<
|
||||
* to initialise library instance and perform version compatibility
|
||||
* check
|
||||
*/
|
||||
int (*init)(oam_mapi_version_t version);
|
||||
int (*free)(void);
|
||||
|
||||
/*!<
|
||||
* To get error description from the error code
|
||||
*/
|
||||
int (*get_error_description)(int error_code, const char **error_description);
|
||||
|
||||
/*!<
|
||||
* To retrieve the OAM Management interface version
|
||||
*/
|
||||
int (*get_mapi_version)(oam_mapi_version_t *version);
|
||||
|
||||
/*!<
|
||||
* To retrieve the number of devices present/discovered by the library
|
||||
*/
|
||||
int (*discover_devices)(int *device_count);
|
||||
|
||||
/*!<
|
||||
* To retrieve device properties for each discovered devices
|
||||
*/
|
||||
int (*get_dev_properties)(oam_dev_properties_t *devices);
|
||||
|
||||
/*!<
|
||||
* To retrieve PCI properties of the device
|
||||
*/
|
||||
int (*get_pci_properties)(oam_dev_id_t *device_id, oam_pci_info_t *pci_info);
|
||||
|
||||
/*!<
|
||||
* To query the number of various sensors present
|
||||
*/
|
||||
int (*get_sensors_count)(oam_dev_id_t *device_id,
|
||||
oam_sensor_count_t *sensor_count);
|
||||
|
||||
/*!<
|
||||
* Open the device and obtain handle
|
||||
*/
|
||||
int (*open_device)(oam_dev_id_t *dev_id, oam_dev_mode_t mode,
|
||||
oam_dev_handle_t *handle);
|
||||
int (*close_device)(oam_dev_handle_t *handle);
|
||||
|
||||
|
||||
/*!<
|
||||
* To read various sensor values for a given sensor type
|
||||
*/
|
||||
int (*get_sensors_info)(oam_dev_handle_t *handle,
|
||||
oam_sensor_type_t type,
|
||||
uint32_t num_sensors,
|
||||
oam_sensor_info_t sensor_info[]);
|
||||
/*!<
|
||||
* To read current error count of the device
|
||||
*/
|
||||
int (*get_device_error_count)(oam_dev_handle_t *handle,
|
||||
oam_dev_error_count_t *count);
|
||||
|
||||
/*!<
|
||||
* To update firmware on the device
|
||||
* fw_image contains a null terminated string which specifies complete
|
||||
* path where the firmware image is located
|
||||
*/
|
||||
int (*download_firmware)(oam_dev_id_t *device_id, char *fw_image,
|
||||
oam_firmware_modes_t mode);
|
||||
|
||||
/*!<
|
||||
* To query firmware versions
|
||||
*/
|
||||
int (*get_firmware_version)(oam_dev_id_t *device_id,
|
||||
oam_firmware_version_t *version);
|
||||
|
||||
|
||||
/*!<
|
||||
* to get network id from device id
|
||||
*/
|
||||
int (*get_net_dev_id)(oam_dev_id_t *device_id, oam_net_dev_id_t *net_device);
|
||||
|
||||
/*!<
|
||||
* Network management APIs.
|
||||
*/
|
||||
|
||||
/*!<
|
||||
* discover network.
|
||||
*/
|
||||
int (*discover_network)(int *net_dev_count);
|
||||
int (*get_dev_net_properties)(oam_net_dev_info_t *net_dev_info);
|
||||
|
||||
int (*get_neighbour_count)(oam_dev_id_t *device,
|
||||
oam_net_port_id_t local_port_id,
|
||||
uint32_t *neighbor_count);
|
||||
|
||||
int (*get_neighbours_info)(oam_dev_id_t *device,
|
||||
oam_net_port_id_t local_port_id,
|
||||
uint32_t *neighbors_count,
|
||||
oam_neighbour_info_t *neighbours_info);
|
||||
|
||||
int (*configure_network)(oam_net_dev_id_t *net_devices,
|
||||
uint32_t *net_device_count,
|
||||
char *network_name);
|
||||
|
||||
int (*destroy_network)(char *network_name);
|
||||
|
||||
int (*query_network)(char *network_name, oam_net_dev_info_t *devices,
|
||||
uint32_t *device_count);
|
||||
|
||||
int (*get_network_count)(uint32_t *network_count);
|
||||
int (*list_networks)(char *network_names[]);
|
||||
|
||||
/*!<
|
||||
* Various statistics related to blocks
|
||||
*/
|
||||
|
||||
/*!<
|
||||
* To query number of ports
|
||||
*/
|
||||
int (*get_net_port_count)(oam_dev_handle_t *handle, uint32_t *count,
|
||||
oam_net_port_id_t *port_ids);
|
||||
|
||||
int (*get_net_port_desc)(oam_dev_handle_t *handle, oam_net_port_id_t *port,
|
||||
oam_net_port_desc_t *desc);
|
||||
|
||||
int (*get_net_port_state)(oam_dev_handle_t *handle, oam_net_port_id_t *port,
|
||||
oam_net_port_state_t *state);
|
||||
|
||||
int (*check_net_port_status)(oam_dev_handle_t *handle,
|
||||
oam_net_port_id_t *port,
|
||||
oam_net_port_status_t *status);
|
||||
int (*get_net_port_pkt_stats)(oam_dev_handle_t *handle,
|
||||
oam_net_port_id_t *port,
|
||||
uint32_t duration_sec,
|
||||
oam_net_port_pkt_stats_t *stats);
|
||||
|
||||
int (*query_net_port_bandwidth)(oam_dev_handle_t *handle,
|
||||
oam_net_port_id_t *port,
|
||||
uint32_t duration_sec,
|
||||
double *bandwidth);
|
||||
|
||||
int (*get_tpc_count)(oam_dev_handle_t *handle, uint32_t *count,
|
||||
oam_dev_tpc_id_t *tpc_ids);
|
||||
|
||||
int (*get_tpc_desc)(oam_dev_handle_t *handle, oam_dev_tpc_id_t *tpc_id,
|
||||
oam_tpc_desc_t *desc);
|
||||
|
||||
int (*get_tpc_stats)(oam_dev_handle_t *handle,
|
||||
oam_dev_tpc_id_t *port,
|
||||
oam_dev_tpc_stats_t *stats,
|
||||
uint32_t duration_sec);
|
||||
|
||||
int (*get_mem_count)(oam_dev_handle_t *handle, uint32_t *count,
|
||||
oam_dev_mem_id_t *mem_ids);
|
||||
|
||||
int (*get_mem_desc)(oam_dev_handle_t *handle, oam_dev_mem_id_t *tpc_id,
|
||||
oam_mem_desc_t *desc);
|
||||
|
||||
int (*get_mem_stats)(oam_dev_handle_t *handle, oam_dev_mem_id_t *mem_id,
|
||||
oam_dev_mem_stats_t *stats);
|
||||
|
||||
/*!<
|
||||
* To check the health of the individual components, libraries
|
||||
* generates test workload to check if the block is functioning properly
|
||||
* or not. So no other workload should be running while calling these
|
||||
* APIs
|
||||
*/
|
||||
int (*check_tpc_health)(oam_dev_id_t *device_id, oam_dev_tpc_id_t *tpc_id);
|
||||
int (*check_net_port_health)(oam_dev_id_t *device_id,
|
||||
oam_net_port_id_t *port);
|
||||
int (*check_mem_health)(oam_dev_id_t *device_id, oam_dev_mem_id_t *port);
|
||||
|
||||
/*
|
||||
* Following needs more attention, will work on in next
|
||||
int (*get_fan_speed)(oam_dev_t *oam);
|
||||
int (*set_fan_speed)(oam_dev_t *oam, int speed);
|
||||
|
||||
int (*get_power_cap)(oam_dev_t *oam);
|
||||
int (*set_power_cap)(oam_dev_t *oam, int power);
|
||||
|
||||
int (*get_telemetry)(oam_dev_t *oam);
|
||||
*/
|
||||
} oam_ops_t;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // OAM_INCLUDE_OAM_OAM_MAPI_H_
|
||||
Spustitelný soubor
+161
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* MIT License
|
||||
*
|
||||
* Copyright (c) 2020 Open Compute Project
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "rocm_smi/rocm_smi_common.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_counters.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
#include "oam/oam_mapi.h"
|
||||
#include "oam/amd_oam.h"
|
||||
|
||||
#define TRY try {
|
||||
#define CATCH } catch (...) {return handleRSMIException();}
|
||||
|
||||
|
||||
static int handleRSMIException() {
|
||||
rsmi_status_t ret;
|
||||
ret = amd::smi::handleException();
|
||||
|
||||
// TODO(x): convert RSMI return to OAM return
|
||||
// For now, just return int equiv.
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
int amdoam_init(oam_mapi_version_t version) {
|
||||
TRY
|
||||
|
||||
// TODO(x): handle version argument
|
||||
(void)version;
|
||||
|
||||
rsmi_status_t ret = rsmi_init(0);
|
||||
|
||||
return 0;
|
||||
CATCH
|
||||
}
|
||||
|
||||
int amdoam_free(void) {
|
||||
rsmi_status_t ret = rsmi_shut_down();
|
||||
|
||||
// TODO(x) convert rsmi return to oam return val
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
|
||||
int amdoam_discover_devices(int *device_count) {
|
||||
uint32_t dv_cnt;
|
||||
|
||||
if (device_count == nullptr) {
|
||||
return -1; // TODO(x): return appropriate OAM code
|
||||
}
|
||||
|
||||
rsmi_status_t ret = rsmi_num_monitor_devices(&dv_cnt);
|
||||
|
||||
*device_count = static_cast<int>(dv_cnt);
|
||||
|
||||
// TODO(x) convert rsmi return to oam return val
|
||||
return static_cast<int>(ret);
|
||||
}
|
||||
|
||||
// TODO(x): This function doesn't work for OAM. It's just a version
|
||||
// of rsmi_dev_ecc_count_get(), which has similar functionality.
|
||||
// The purpose here is just to drive refactoring; e.g., making macros
|
||||
// available and previously static functions global.
|
||||
int
|
||||
get_device_error_count(oam_dev_handle_t *handle,
|
||||
oam_dev_error_count_t *count) {
|
||||
std::vector<std::string> val_vec;
|
||||
rsmi_status_t ret;
|
||||
|
||||
TRY
|
||||
// TODO(x): replace with final code...
|
||||
// Below, we are just returning errors for RSMI_GPU_BLOCK_GFX as a
|
||||
// placeholder
|
||||
(void)handle; // Just ignore for now
|
||||
|
||||
rsmi_gpu_block_t block = RSMI_GPU_BLOCK_GFX;
|
||||
|
||||
// The macro CHK_SUPPORT_VAR assumes the existence of a device index variable
|
||||
// "dv_ind". Presumably, the device index will come from the "handle"
|
||||
// pointer. Since I don't know how that will be implemented, for now we
|
||||
// will just make up a device index:
|
||||
uint32_t dv_ind = 0;
|
||||
CHK_SUPPORT_VAR(count, block)
|
||||
|
||||
amd::smi::DevInfoTypes type;
|
||||
switch (block) {
|
||||
case RSMI_GPU_BLOCK_UMC:
|
||||
type = amd::smi::kDevErrCntUMC;
|
||||
break;
|
||||
|
||||
case RSMI_GPU_BLOCK_SDMA:
|
||||
type = amd::smi::kDevErrCntSDMA;
|
||||
break;
|
||||
|
||||
case RSMI_GPU_BLOCK_GFX:
|
||||
type = amd::smi::kDevErrCntGFX;
|
||||
break;
|
||||
|
||||
default:
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = GetDevValueVec(type, dv_ind, &val_vec);
|
||||
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
assert(val_vec.size() == 2);
|
||||
|
||||
std::string junk;
|
||||
std::istringstream fs1(val_vec[0]);
|
||||
|
||||
fs1 >> junk;
|
||||
assert(junk == "ue:");
|
||||
fs1 >> count->total_error_count;
|
||||
|
||||
std::istringstream fs2(val_vec[1]);
|
||||
|
||||
fs2 >> junk;
|
||||
assert(junk == "ce:");
|
||||
fs2 >> count->total_error_count;
|
||||
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
Spustitelný soubor
+56
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* ROC Runtime Conformance Release License
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
|
||||
#define INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
|
||||
|
||||
// This file is generated on build.
|
||||
|
||||
#define rocm_smi_VERSION_MAJOR @rocm_smi_VERSION_MAJOR@
|
||||
#define rocm_smi_VERSION_MINOR @rocm_smi_VERSION_MINOR@
|
||||
#define rocm_smi_VERSION_PATCH @rocm_smi_VERSION_PATCH@
|
||||
#define rocm_smi_VERSION_BUILD "@rocm_smi_VERSION_BUILD@"
|
||||
|
||||
#endif // INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
|
||||
Spustitelný soubor
+143
@@ -0,0 +1,143 @@
|
||||
#
|
||||
# Minimum version of cmake required
|
||||
#
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" CMake ROCm SMI (Library) ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
## Verbose output.
|
||||
set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
|
||||
# Required Defines first:
|
||||
|
||||
message("")
|
||||
message("Build Configuration:")
|
||||
# message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
|
||||
# message("------------Compiler: " ${CMAKE_CXX_COMPILER})
|
||||
# message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
|
||||
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
|
||||
# message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
|
||||
# message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
|
||||
# message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
|
||||
# message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
|
||||
# message("--------RSMI Inc Dir: " ${RSMI_INC_DIR})
|
||||
# message("")
|
||||
|
||||
set(ROCM_SMI "rocm_smi")
|
||||
set(ROCM_SMI_COMPONENT "lib${ROCM_SMI}")
|
||||
set(ROCM_SMI_TARGET "${ROCM_SMI}64")
|
||||
|
||||
################# Determine the library version #########################
|
||||
set(SO_VERSION_GIT_TAG_PREFIX "rsmi_so_ver")
|
||||
|
||||
# VERSION_* variables should be set by get_version_from_tag
|
||||
message("Package version: ${PKG_VERSION_STR}")
|
||||
|
||||
# Debian package specific variables
|
||||
# Set a default value for the package version
|
||||
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
|
||||
|
||||
# VERSION_* variables should be set by get_version_from_tag
|
||||
if ( ${ROCM_PATCH_VERSION} )
|
||||
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
|
||||
else()
|
||||
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
|
||||
endif ()
|
||||
set(${ROCM_SMI}_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(${ROCM_SMI}_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(${ROCM_SMI}_VERSION_PATCH "0")
|
||||
set(${ROCM_SMI}_VERSION_BUILD "0")
|
||||
message("SOVERSION: ${SO_VERSION_STRING}")
|
||||
|
||||
|
||||
# Create a configure file to get version info from within library
|
||||
configure_file(
|
||||
"${PROJECT_SOURCE_DIR}/src/${ROCM_SMI_TARGET}Config.in"
|
||||
"${PROJECT_SOURCE_DIR}/include/rocm_smi/${ROCM_SMI_TARGET}Config.h")
|
||||
|
||||
set(RSMI_SRC_DIR "src")
|
||||
set(RSMI_INC_DIR "include")
|
||||
set(RSMI_DOCS_DIR "docs")
|
||||
|
||||
# Add any rocm_smi_lib specific source files here
|
||||
set(SMI_SRC_LIST ${CMN_SRC_LIST})
|
||||
|
||||
# Add any rocm_smi_lib specific headers here
|
||||
set(SMI_INC_LIST "")
|
||||
|
||||
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
|
||||
|
||||
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
|
||||
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
add_library(${ROCM_SMI_TARGET} SHARED ${CMN_SRC_LIST} ${SMI_SRC_LIST}
|
||||
${CMN_INC_LIST} ${SMI_INC_LIST})
|
||||
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
|
||||
target_include_directories(${ROCM_SMI_TARGET} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR} ${COMMON_PROJ_ROOT}/common/shared_mutex)
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
|
||||
SOVERSION "${VERSION_MAJOR}")
|
||||
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
|
||||
VERSION "${SO_VERSION_STRING}")
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
|
||||
add_custom_command(
|
||||
TARGET ${ROCM_SMI_TARGET}
|
||||
POST_BUILD COMMAND ${CMAKE_STRIP} lib${ROCM_SMI_TARGET}.so)
|
||||
endif ()
|
||||
|
||||
## Add symlinks from top level ROCm lib dir to rocm-smi lib so files
|
||||
add_custom_target ( so-link ALL WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink
|
||||
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so so-link )
|
||||
add_custom_target ( so-major-link ALL WORKING_DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND}
|
||||
-E create_symlink
|
||||
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR}
|
||||
so-major-link )
|
||||
|
||||
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-link DESTINATION lib RENAME
|
||||
${ROCM_SMI_LIB_NAME}.so )
|
||||
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-major-link DESTINATION lib
|
||||
RENAME ${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR} )
|
||||
|
||||
## Add the install directives for the runtime library.
|
||||
install(TARGETS ${ROCM_SMI_TARGET}
|
||||
LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
|
||||
install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/rocm_smi.h
|
||||
DESTINATION rocm_smi/include/rocm_smi)
|
||||
install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/kfd_ioctl.h
|
||||
DESTINATION rocm_smi/include/rocm_smi)
|
||||
# Generate Doxygen documentation
|
||||
if (DOXYGEN_FOUND AND LATEX_FOUND)
|
||||
set (RSMI_MANUAL_NAME "ROCm_SMI_Manual")
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
|
||||
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
|
||||
"${INC_DIR}/rocm_smi.h"
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
|
||||
COMMAND make > /dev/null
|
||||
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/docs/${RSMI_MANUAL_NAME}_new.pdf
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/latex)
|
||||
|
||||
add_custom_target(docs DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf)
|
||||
|
||||
add_dependencies(${ROCM_SMI_TARGET} docs)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
|
||||
DESTINATION ${ROCM_SMI}/docs/${RSMI_MANUAL_NAME}.pdf)
|
||||
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/docs/README.md
|
||||
DESTINATION ${ROCM_SMI}/docs/)
|
||||
else()
|
||||
message("Doxygen or Latex is not found. Will not generate documents.")
|
||||
endif(DOXYGEN_FOUND AND LATEX_FOUND)
|
||||
|
||||
@@ -76,129 +76,8 @@
|
||||
|
||||
static const uint32_t kMaxOverdriveLevel = 20;
|
||||
|
||||
static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
|
||||
switch (err) {
|
||||
case 0: return RSMI_STATUS_SUCCESS;
|
||||
case ESRCH: return RSMI_STATUS_NOT_FOUND;
|
||||
case EACCES: return RSMI_STATUS_PERMISSION;
|
||||
case EPERM:
|
||||
case ENOENT: return RSMI_STATUS_NOT_SUPPORTED;
|
||||
case EBADF:
|
||||
case EISDIR: return RSMI_STATUS_FILE_ERROR;
|
||||
case EINTR: return RSMI_STATUS_INTERRUPT;
|
||||
case EIO: return RSMI_STATUS_UNEXPECTED_SIZE;
|
||||
case ENXIO: return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
case EBUSY: return RSMI_STATUS_BUSY;
|
||||
default: return RSMI_STATUS_UNKNOWN_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
static rsmi_status_t handleException() {
|
||||
try {
|
||||
throw;
|
||||
} catch (const std::bad_alloc& e) {
|
||||
return RSMI_STATUS_OUT_OF_RESOURCES;
|
||||
} catch (const amd::smi::rsmi_exception& e) {
|
||||
debug_print("Exception caught: %s.\n", e.what());
|
||||
return e.error_code();
|
||||
} catch (const std::exception& e) {
|
||||
debug_print("Exception caught: %s\n", e.what());
|
||||
return RSMI_STATUS_INTERNAL_EXCEPTION;
|
||||
} catch (const std::nested_exception& e) {
|
||||
debug_print("Callback threw.\n");
|
||||
return RSMI_STATUS_INTERNAL_EXCEPTION;
|
||||
} catch (int erno) {
|
||||
return errno_to_rsmi_status(erno);
|
||||
} catch (...) {
|
||||
debug_print("Unknown exception caught.\n");
|
||||
return RSMI_STATUS_INTERNAL_EXCEPTION;
|
||||
}
|
||||
}
|
||||
|
||||
#define TRY try {
|
||||
#define CATCH } catch (...) {return handleException();}
|
||||
|
||||
#define CHECK_DV_IND_RANGE \
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
|
||||
if (dv_ind >= smi.monitor_devices().size()) { \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} \
|
||||
|
||||
#define GET_DEV_FROM_INDX \
|
||||
CHECK_DV_IND_RANGE \
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
|
||||
assert(dev != nullptr);
|
||||
|
||||
|
||||
#define GET_DEV_AND_KFDNODE_FROM_INDX \
|
||||
GET_DEV_FROM_INDX \
|
||||
std::shared_ptr<amd::smi::KFDNode> kfd_node; \
|
||||
if (smi.kfd_node_map().find(dev->kfd_gpu_id()) == \
|
||||
smi.kfd_node_map().end()) { \
|
||||
return RSMI_INITIALIZATION_ERROR; \
|
||||
} \
|
||||
kfd_node = smi.kfd_node_map()[dev->kfd_gpu_id()];
|
||||
|
||||
#define REQUIRE_ROOT_ACCESS \
|
||||
if (amd::smi::RocmSMI::getInstance().euid()) { \
|
||||
return RSMI_STATUS_PERMISSION; \
|
||||
}
|
||||
|
||||
#define DEVICE_MUTEX \
|
||||
amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \
|
||||
amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance(); \
|
||||
bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1); \
|
||||
amd::smi::ScopedPthread _lock(_pw, blocking_); \
|
||||
if (!blocking_ && _lock.mutex_not_acquired()) { \
|
||||
return RSMI_STATUS_BUSY; \
|
||||
}
|
||||
|
||||
/* This group of macros is used to facilitate checking of support for rsmi_dev*
|
||||
* "getter" functions. When the return buffer is set to nullptr, the macro will
|
||||
* check the previously gathered device support data to see if the function,
|
||||
* with possible variants (e.g., memory types, firware types,...) and
|
||||
* subvariants (e.g. monitors/sensors) are supported.
|
||||
*/
|
||||
// This macro assumes dev already available
|
||||
#define CHK_API_SUPPORT_ONLY(RT_PTR, VR, SUB_VR) \
|
||||
if ((RT_PTR) == nullptr) { \
|
||||
try { \
|
||||
if (!dev->DeviceAPISupported(__FUNCTION__, (VR), (SUB_VR))) { \
|
||||
return RSMI_STATUS_NOT_SUPPORTED; \
|
||||
} \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} catch (const amd::smi::rsmi_exception& e) { \
|
||||
debug_print( \
|
||||
"Exception caught when checking if API is supported %s.\n", \
|
||||
e.what()); \
|
||||
return RSMI_STATUS_INVALID_ARGS; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define CHK_SUPPORT(RT_PTR, VR, SUB_VR) \
|
||||
GET_DEV_FROM_INDX \
|
||||
CHK_API_SUPPORT_ONLY((RT_PTR), (VR), (SUB_VR))
|
||||
|
||||
#define CHK_SUPPORT_NAME_ONLY(RT_PTR) \
|
||||
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT)
|
||||
|
||||
#define CHK_SUPPORT_VAR(RT_PTR, VR) \
|
||||
CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT)
|
||||
|
||||
#define CHK_SUPPORT_SUBVAR_ONLY(RT_PTR, SUB_VR) \
|
||||
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR))
|
||||
|
||||
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
if (dv_ind >= smi.monitor_devices().size()) {
|
||||
return nullptr;
|
||||
}
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
|
||||
assert(dev != nullptr);
|
||||
|
||||
return dev->mutex();
|
||||
}
|
||||
#define CATCH } catch (...) {return amd::smi::handleException();}
|
||||
|
||||
static uint64_t get_multiplier_from_str(char units_char) {
|
||||
uint32_t multiplier = 0;
|
||||
@@ -404,7 +283,7 @@ static rsmi_status_t get_dev_value_str(amd::smi::DevInfoTypes type,
|
||||
GET_DEV_FROM_INDX
|
||||
int ret = dev->readDevInfo(type, val_str);
|
||||
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
static rsmi_status_t get_dev_value_int(amd::smi::DevInfoTypes type,
|
||||
uint32_t dv_ind, uint64_t *val_int) {
|
||||
@@ -415,7 +294,7 @@ static rsmi_status_t get_dev_value_int(amd::smi::DevInfoTypes type,
|
||||
GET_DEV_FROM_INDX
|
||||
int ret = dev->readDevInfo(type, val_int);
|
||||
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
static rsmi_status_t get_dev_value_line(amd::smi::DevInfoTypes type,
|
||||
@@ -427,7 +306,7 @@ static rsmi_status_t get_dev_value_line(amd::smi::DevInfoTypes type,
|
||||
GET_DEV_FROM_INDX
|
||||
int ret = dev->readDevInfoLine(type, val_str);
|
||||
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
static rsmi_status_t set_dev_value(amd::smi::DevInfoTypes type,
|
||||
@@ -435,7 +314,7 @@ static rsmi_status_t set_dev_value(amd::smi::DevInfoTypes type,
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
int ret = dev->writeDevInfo(type, val);
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
|
||||
@@ -452,7 +331,7 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
|
||||
|
||||
int ret = dev->monitor()->readMonitor(type, sensor_ind, &val_str);
|
||||
if (ret) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
if (!amd::smi::IsInteger(val_str)) {
|
||||
@@ -480,7 +359,7 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
|
||||
|
||||
int ret = dev->monitor()->readMonitor(type, sensor_ind, &val_str);
|
||||
if (ret) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
if (!amd::smi::IsInteger(val_str)) {
|
||||
@@ -504,7 +383,7 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
|
||||
int ret = dev->monitor()->writeMonitor(type, sensor_ind,
|
||||
std::to_string(val));
|
||||
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
|
||||
@@ -517,7 +396,7 @@ static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
|
||||
|
||||
uint32_t ret = smi.DiscoverAMDPowerMonitors();
|
||||
if (ret != 0) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
|
||||
@@ -526,20 +405,9 @@ static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
|
||||
|
||||
ret = dev->power_monitor()->readPowerValue(type, val);
|
||||
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
static rsmi_status_t get_dev_value_vec(amd::smi::DevInfoTypes type,
|
||||
uint32_t dv_ind, std::vector<std::string> *val_vec) {
|
||||
assert(val_vec != nullptr);
|
||||
if (val_vec == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
int ret = dev->readDevInfo(type, val_vec);
|
||||
return errno_to_rsmi_status(ret);
|
||||
}
|
||||
static bool is_power_of_2(uint64_t n) {
|
||||
return n && !(n & (n - 1));
|
||||
}
|
||||
@@ -654,7 +522,7 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind,
|
||||
*enabled_blks = strtoul(tmp_str.c_str(), nullptr, 16);
|
||||
assert(errno == 0);
|
||||
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
CATCH
|
||||
}
|
||||
|
||||
@@ -732,7 +600,7 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
|
||||
|
||||
DEVICE_MUTEX
|
||||
|
||||
ret = get_dev_value_vec(type, dv_ind, &val_vec);
|
||||
ret = GetDevValueVec(type, dv_ind, &val_vec);
|
||||
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
@@ -828,7 +696,7 @@ get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) {
|
||||
val_u64 = strtoul(val_str.c_str(), nullptr, 16);
|
||||
assert(errno == 0);
|
||||
if (errno != 0) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
}
|
||||
if (val_u64 > 0xFFFF) {
|
||||
return RSMI_STATUS_UNEXPECTED_SIZE;
|
||||
@@ -951,7 +819,7 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type,
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
ret = get_dev_value_vec(type, dv_ind, &val_vec);
|
||||
ret = GetDevValueVec(type, dv_ind, &val_vec);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1001,7 +869,7 @@ static rsmi_status_t get_power_profiles(uint32_t dv_ind,
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
ret = get_dev_value_vec(amd::smi::kDevPowerProfileMode, dv_ind, &val_vec);
|
||||
ret = GetDevValueVec(amd::smi::kDevPowerProfileMode, dv_ind, &val_vec);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1085,7 +953,7 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
ret = get_dev_value_vec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
|
||||
ret = GetDevValueVec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1186,7 +1054,7 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind,
|
||||
THROW_IF_NULLPTR_DEREF(p)
|
||||
THROW_IF_NULLPTR_DEREF(num_regions)
|
||||
|
||||
ret = get_dev_value_vec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
|
||||
ret = GetDevValueVec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1395,7 +1263,7 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block,
|
||||
|
||||
ret = get_dev_value_int(dev_type, dv_ind, fw_version);
|
||||
if (ret != 0) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
@@ -1487,7 +1355,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
|
||||
}
|
||||
|
||||
ret_i = dev->writeDevInfo(dev_type, freq_enable_str);
|
||||
return errno_to_rsmi_status(ret_i);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret_i);
|
||||
|
||||
CATCH
|
||||
}
|
||||
@@ -1743,7 +1611,7 @@ rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len) {
|
||||
// Retrieve vbios and store in vbios_value string
|
||||
int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &vbios_value);
|
||||
if (ret != 0) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
if (vbios_value.length() == 16) {
|
||||
sku_value = vbios_value.substr(4, 6);
|
||||
@@ -1779,7 +1647,7 @@ rsmi_dev_vram_vendor_get(uint32_t dv_ind, char *brand, uint32_t len) {
|
||||
int ret = dev->readDevInfo(amd::smi::kDevVramVendor, &val_str);
|
||||
|
||||
if (ret != 0) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
uint32_t ln = static_cast<uint32_t>(val_str.copy(brand, len));
|
||||
@@ -1893,7 +1761,7 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) {
|
||||
uint32_t ret_i;
|
||||
ret_i = dev->writeDevInfo(amd::smi::kDevPCIEClk, freq_enable_str);
|
||||
|
||||
return errno_to_rsmi_status(ret_i);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret_i);
|
||||
|
||||
CATCH
|
||||
}
|
||||
@@ -2565,7 +2433,7 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) {
|
||||
int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str);
|
||||
|
||||
if (ret != 0) {
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
uint32_t ln = static_cast<uint32_t>(val_str.copy(vbios, len));
|
||||
@@ -2629,7 +2497,7 @@ rsmi_version_str_get(rsmi_sw_component_t component, char *ver_str,
|
||||
err = uname(&buf);
|
||||
|
||||
if (err != 0) {
|
||||
return errno_to_rsmi_status(err);
|
||||
return amd::smi::ErrnoToRsmiStatus(err);
|
||||
}
|
||||
|
||||
val_str = buf.release;
|
||||
@@ -2744,7 +2612,7 @@ rsmi_dev_counter_destroy(rsmi_event_handle_t evnt_handle) {
|
||||
ret = evt->stopCounter();
|
||||
|
||||
delete evt;
|
||||
return errno_to_rsmi_status(ret);;
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);;
|
||||
CATCH
|
||||
}
|
||||
|
||||
@@ -2755,7 +2623,7 @@ rsmi_counter_control(rsmi_event_handle_t evt_handle,
|
||||
|
||||
amd::smi::evt::Event *evt =
|
||||
reinterpret_cast<amd::smi::evt::Event *>(evt_handle);
|
||||
amd::smi::pthread_wrap _pw(*get_mutex(evt->dev_ind()));
|
||||
amd::smi::pthread_wrap _pw(*amd::smi::GetMutex(evt->dev_ind()));
|
||||
amd::smi::ScopedPthread _lock(_pw);
|
||||
|
||||
REQUIRE_ROOT_ACCESS
|
||||
@@ -2779,7 +2647,7 @@ rsmi_counter_control(rsmi_event_handle_t evt_handle,
|
||||
assert(!"Unexpected perf counter command");
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
|
||||
CATCH
|
||||
}
|
||||
@@ -2810,7 +2678,7 @@ rsmi_counter_read(rsmi_event_handle_t evt_handle,
|
||||
ret = evt->getValue(value);
|
||||
}
|
||||
|
||||
return errno_to_rsmi_status(ret);
|
||||
return amd::smi::ErrnoToRsmiStatus(ret);
|
||||
CATCH
|
||||
}
|
||||
|
||||
@@ -2868,7 +2736,7 @@ rsmi_compute_process_info_get(rsmi_process_info_t *procs,
|
||||
int err = amd::smi::GetProcessInfo(procs, *num_items, &procs_found);
|
||||
|
||||
if (err) {
|
||||
return errno_to_rsmi_status(err);
|
||||
return amd::smi::ErrnoToRsmiStatus(err);
|
||||
}
|
||||
|
||||
if (procs && *num_items < procs_found) {
|
||||
@@ -2896,7 +2764,7 @@ rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
|
||||
int err = amd::smi::GetProcessGPUs(pid, &gpu_set);
|
||||
|
||||
if (err) {
|
||||
return errno_to_rsmi_status(err);
|
||||
return amd::smi::ErrnoToRsmiStatus(err);
|
||||
}
|
||||
|
||||
uint32_t i = 0;
|
||||
@@ -2936,7 +2804,7 @@ rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages,
|
||||
|
||||
std::vector<std::string> val_vec;
|
||||
|
||||
ret = get_dev_value_vec(amd::smi::kDevMemPageBad, dv_ind, &val_vec);
|
||||
ret = GetDevValueVec(amd::smi::kDevMemPageBad, dv_ind, &val_vec);
|
||||
|
||||
if (ret == RSMI_STATUS_FILE_ERROR) {
|
||||
return RSMI_STATUS_NOT_SUPPORTED;
|
||||
@@ -3017,7 +2885,7 @@ rsmi_compute_process_info_by_pid_get(uint32_t pid,
|
||||
int err = amd::smi::GetProcessInfoForPID(pid, proc, &gpu_set);
|
||||
|
||||
if (err) {
|
||||
return errno_to_rsmi_status(err);
|
||||
return amd::smi::ErrnoToRsmiStatus(err);
|
||||
}
|
||||
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
@@ -3534,7 +3402,7 @@ rsmi_event_notification_init(uint32_t dv_ind) {
|
||||
|
||||
int ret = ioctl(smi.kfd_notif_evt_fh(), AMDKFD_IOC_SMI_EVENTS, &args);
|
||||
if (ret < 0) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
}
|
||||
if (args.anon_fd < 1) {
|
||||
return RSMI_STATUS_NO_DATA;
|
||||
@@ -3544,7 +3412,7 @@ rsmi_event_notification_init(uint32_t dv_ind) {
|
||||
FILE *anon_file_ptr = fdopen(args.anon_fd, "r");
|
||||
if (anon_file_ptr == nullptr) {
|
||||
close(dev->evt_notif_anon_fd());
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
}
|
||||
dev->set_evt_notif_anon_file_ptr(anon_file_ptr);
|
||||
|
||||
@@ -3564,7 +3432,7 @@ rsmi_event_notification_mask_set(uint32_t dv_ind, uint64_t mask) {
|
||||
ssize_t ret = write(dev->evt_notif_anon_fd(), &mask, sizeof(uint64_t));
|
||||
|
||||
if (ret == -1) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
}
|
||||
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
@@ -3645,7 +3513,7 @@ rsmi_event_notification_get(int timeout_ms,
|
||||
fill_data_buffer(false);
|
||||
|
||||
if (*num_elem < buffer_size && errno != EAGAIN) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
} else if (*num_elem >= buffer_size) {
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -3655,7 +3523,7 @@ rsmi_event_notification_get(int timeout_ms,
|
||||
if (p_ret > 0) {
|
||||
fill_data_buffer(true);
|
||||
} else if (p_ret < 0) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
}
|
||||
if (*num_elem == 0) {
|
||||
return RSMI_STATUS_NO_DATA;
|
||||
@@ -3684,7 +3552,7 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) {
|
||||
int ret = close(smi.kfd_notif_evt_fh());
|
||||
smi.set_kfd_notif_evt_fh(-1);
|
||||
if (ret < 0) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
return amd::smi::ErrnoToRsmiStatus(errno);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3700,7 +3568,7 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) {
|
||||
rsmi_status_t
|
||||
rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds) {
|
||||
// DEVICE_MUTEX
|
||||
amd::smi::pthread_wrap _pw(*get_mutex(dv_ind));
|
||||
amd::smi::pthread_wrap _pw(*amd::smi::GetMutex(dv_ind));
|
||||
amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance();
|
||||
bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1);
|
||||
amd::smi::ScopedPthread _lock(_pw, blocking_);
|
||||
|
||||
@@ -50,6 +50,13 @@
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -151,5 +158,68 @@ bool IsInteger(const std::string & n_str) {
|
||||
|
||||
return (*tmp == 0);
|
||||
}
|
||||
|
||||
rsmi_status_t handleException() {
|
||||
try {
|
||||
throw;
|
||||
} catch (const std::bad_alloc& e) {
|
||||
debug_print("RSMI exception: BadAlloc\n");
|
||||
return RSMI_STATUS_OUT_OF_RESOURCES;
|
||||
} catch (const amd::smi::rsmi_exception& e) {
|
||||
debug_print("Exception caught: %s.\n", e.what());
|
||||
return e.error_code();
|
||||
} catch (const std::exception& e) {
|
||||
debug_print("Exception caught: %s\n", e.what());
|
||||
return RSMI_STATUS_INTERNAL_EXCEPTION;
|
||||
} catch (const std::nested_exception& e) {
|
||||
debug_print("Callback threw.\n");
|
||||
return RSMI_STATUS_INTERNAL_EXCEPTION;
|
||||
} catch (...) {
|
||||
debug_print("Unknown exception caught.\n");
|
||||
return RSMI_STATUS_INTERNAL_EXCEPTION;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_t *GetMutex(uint32_t dv_ind) {
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
if (dv_ind >= smi.monitor_devices().size()) {
|
||||
return nullptr;
|
||||
}
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
|
||||
assert(dev != nullptr);
|
||||
|
||||
return dev->mutex();
|
||||
}
|
||||
|
||||
rsmi_status_t GetDevValueVec(amd::smi::DevInfoTypes type,
|
||||
uint32_t dv_ind, std::vector<std::string> *val_vec) {
|
||||
assert(val_vec != nullptr);
|
||||
if (val_vec == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
GET_DEV_FROM_INDX
|
||||
|
||||
int ret = dev->readDevInfo(type, val_vec);
|
||||
return ErrnoToRsmiStatus(ret);
|
||||
}
|
||||
|
||||
rsmi_status_t ErrnoToRsmiStatus(uint32_t err) {
|
||||
switch (err) {
|
||||
case 0: return RSMI_STATUS_SUCCESS;
|
||||
case ESRCH: return RSMI_STATUS_NOT_FOUND;
|
||||
case EACCES: return RSMI_STATUS_PERMISSION;
|
||||
case EPERM:
|
||||
case ENOENT: return RSMI_STATUS_NOT_SUPPORTED;
|
||||
case EBADF:
|
||||
case EISDIR: return RSMI_STATUS_FILE_ERROR;
|
||||
case EINTR: return RSMI_STATUS_INTERRUPT;
|
||||
case EIO: return RSMI_STATUS_UNEXPECTED_SIZE;
|
||||
case ENXIO: return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
case EBUSY: return RSMI_STATUS_BUSY;
|
||||
default: return RSMI_STATUS_UNKNOWN_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
vendorováno
Odkázat v novém úkolu
Zablokovat Uživatele