Refactor rsmi to support oam

Change-Id: Idc524e01ba06eb5c8d1682becaf5bf8ced5bffcf


[ROCm/rocm_smi_lib commit: 6594f8f58b]
Tento commit je obsažen v:
Chris Freehill
2020-06-20 17:00:06 -05:00
rodič 4c94842508
revize 98b976ef3e
20 změnil soubory, kde provedl 1438 přidání a 310 odebrání
+59 -138
Zobrazit soubor
@@ -3,6 +3,8 @@
#
cmake_minimum_required(VERSION 3.5.0)
set(AMD_SMI_LIBS_TARGET "amd_smi_libraries")
## Set default module path if not already set
if(NOT DEFINED CMAKE_MODULE_PATH)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/")
@@ -15,6 +17,18 @@ set(ROCM_SMI_COMPONENT "lib${ROCM_SMI}")
set(ROCM_SMI_TARGET "${ROCM_SMI}64")
set(ROCM_SMI_LIB_NAME "lib${ROCM_SMI_TARGET}")
# provide git to utilities
find_program (GIT NAMES git)
## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver")
get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
message("Package version: ${PKG_VERSION_STR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_MINOR "${VERSION_MINOR}")
set(${AMD_SMI_LIBS_TARGET}_VERSION_PATCH "0")
set(${AMD_SMI_LIBS_TARGET}_VERSION_BUILD "0")
# The following default version values should be updated as appropriate for
# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR).
# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden
@@ -24,54 +38,18 @@ set(PKG_VERSION_MINOR 0)
set(PKG_VERSION_PATCH 0)
set(PKG_VERSION_NUM_COMMIT 0)
################# Determine the library version #########################
## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "rsmi_pkg_ver")
set(SO_VERSION_GIT_TAG_PREFIX "rsmi_so_ver")
# provide git to utilities
find_program (GIT NAMES git)
get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
# VERSION_* variables should be set by get_version_from_tag
message("Package version: ${PKG_VERSION_STR}")
# Debian package specific variables
# Set a default value for the package version
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
# VERSION_* variables should be set by get_version_from_tag
if ( ${ROCM_PATCH_VERSION} )
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
else()
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
endif ()
set(${ROCM_SMI}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${ROCM_SMI}_VERSION_MINOR "${VERSION_MINOR}")
set(${ROCM_SMI}_VERSION_PATCH "0")
set(${ROCM_SMI}_VERSION_BUILD "0")
message("SOVERSION: ${SO_VERSION_STRING}")
## Define default variable and variables for the optional build target
## rocm_smi_lib-dev
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "Location of rocm_smi source code.")
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
set(CMAKE_INSTALL_PREFIX "/opt/rocm"
CACHE STRING "Default installation directory.")
endif ()
set(COMMON_SRC_ROOT ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "Location source code common root.")
set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm"
CACHE STRING "Default packaging prefix.")
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
project(${ROCM_SMI_TARGET})
# Create a configure file to get version info from within library
configure_file(
"${PROJECT_SOURCE_DIR}/src/${ROCM_SMI_TARGET}Config.in"
"${PROJECT_SOURCE_DIR}/include/rocm_smi/${ROCM_SMI_TARGET}Config.h")
if (NOT DEFINED CPACK_PACKAGE_VENDOR)
set(CPACK_PACKAGE_VENDOR "AMD")
endif()
@@ -82,14 +60,19 @@ endif()
if (NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
"ROCm System Management Interface library")
"AMD System Management libraries")
endif()
if (NOT ROCM_SMI_PACKAGE)
set(ROCM_SMI_PACKAGE rocm_smi_lib64)
if (NOT AMD_SMI_PACKAGE)
set(AMD_SMI_PACKAGE rocm-smi-lib64)
endif()
set(CPACK_PACKAGE_FILE_NAME "${ROCM_SMI_PACKAGE}-${PKG_VERSION_STR}")
set(CPACK_PACKAGE_FILE_NAME "${AMD_SMI_PACKAGE}-${PKG_VERSION_STR}")
project(${AMD_SMI_LIBS_TARGET})
set(COMMON_PROJ_ROOT ${PROJECT_SOURCE_DIR})
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
@@ -128,108 +111,47 @@ else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif ()
set(SRC_DIR "src")
set(INC_DIR "include/rocm_smi")
set(COMMON_SRC_DIR "${PROJECT_SOURCE_DIR}/src")
set(COMMON_INC_DIR "${PROJECT_SOURCE_DIR}/include/rocm_smi")
set(SHR_MUTEX_DIR "${PROJECT_SOURCE_DIR}/third_party/shared_mutex")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/src/shared_mutex)
set(SMI_SRC_LIST "${SRC_DIR}/rocm_smi_device.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_main.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_monitor.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_power_mon.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_utils.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_counters.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_kfd.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/rocm_smi_io_link.cc")
set(SMI_SRC_LIST ${SMI_SRC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.cc")
${CMAKE_CURRENT_SOURCE_DIR}/third_party/shared_mutex)
set(SMI_INC_LIST "${INC_DIR}/rocm_smi_device.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_main.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_monitor.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_power_mon.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_utils.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_common.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_exception.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_counters.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_kfd.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${INC_DIR}/rocm_smi_io_link.h")
set(SMI_INC_LIST ${SMI_INC_LIST} "${SRC_DIR}/shared_mutex/shared_mutex.h")
set(CMN_SRC_LIST "${COMMON_SRC_DIR}/rocm_smi_device.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_main.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_monitor.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_power_mon.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_utils.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_counters.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_kfd.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi_io_link.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_SRC_DIR}/rocm_smi.cc")
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${SHR_MUTEX_DIR}/shared_mutex.cc")
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
set(CMN_INC_LIST "${COMMON_INC_DIR}/rocm_smi_device.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_main.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_monitor.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_power_mon.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_utils.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_common.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_exception.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_counters.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_kfd.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi_io_link.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${COMMON_INC_DIR}/rocm_smi.h")
set(CMN_INC_LIST ${CMN_INC_LIST} "${SHR_MUTEX_DIR}/shared_mutex.h")
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
add_library(${ROCM_SMI_TARGET} SHARED ${SMI_SRC_LIST} ${SMI_INC_LIST})
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
## Set the VERSION and SOVERSION values
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
SOVERSION "${VERSION_MAJOR}")
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
VERSION "${SO_VERSION_STRING}")
## If the library is a release, strip the target library
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
add_custom_command(
TARGET ${ROCM_SMI_TARGET}
POST_BUILD COMMAND ${CMAKE_STRIP} lib${ROCM_SMI_TARGET}.so)
endif ()
## Add symlinks from top level ROCm lib dir to rocm-smi lib so files
add_custom_target ( so-link ALL WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so so-link )
add_custom_target ( so-major-link ALL WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND}
-E create_symlink
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR}
so-major-link )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-link DESTINATION lib RENAME
${ROCM_SMI_LIB_NAME}.so )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-major-link DESTINATION lib
RENAME ${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR} )
## Add the install directives for the runtime library.
install(TARGETS ${ROCM_SMI_TARGET}
LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
install(FILES ${SOURCE_DIR}/include/rocm_smi/rocm_smi.h
DESTINATION rocm_smi/include/rocm_smi)
install(FILES ${SOURCE_DIR}/include/rocm_smi/kfd_ioctl.h
DESTINATION rocm_smi/include/rocm_smi)
add_subdirectory("rocm_smi")
add_subdirectory("oam")
# Generate Doxygen documentation
find_package(Doxygen)
find_package(LATEX COMPONENTS PDFLATEX)
if (DOXYGEN_FOUND AND LATEX_FOUND)
set (RSMI_MANUAL_NAME "ROCm_SMI_Manual")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
"${INC_DIR}/rocm_smi.h"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
COMMAND make > /dev/null
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
${CMAKE_CURRENT_SOURCE_DIR}/docs/${RSMI_MANUAL_NAME}_new.pdf
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/latex)
add_custom_target(docs DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf)
add_dependencies(${ROCM_SMI_TARGET} docs)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
DESTINATION ${ROCM_SMI}/docs/${RSMI_MANUAL_NAME}.pdf)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/docs/README.md
DESTINATION ${ROCM_SMI}/docs/)
else()
message("Doxygen or Latex is not found. Will not generate documents.")
endif(DOXYGEN_FOUND AND LATEX_FOUND)
# install(TARGETS ${ROCM_SMI_TARGET}
# LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
# install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/rocm_smi.h
# DESTINATION rocm_smi/include/rocm_smi)
## Add the packaging directives for the runtime library.
@@ -237,7 +159,7 @@ endif(DOXYGEN_FOUND AND LATEX_FOUND)
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA
"${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm")
set (CPACK_DEBIAN_PACKAGE_NAME ${ROCM_SMI_PACKAGE})
set (CPACK_DEBIAN_PACKAGE_NAME ${AMD_SMI_PACKAGE})
set (CPACK_DEBIAN_PACKAGE_VERSION ${PKG_VERSION_STR})
# RPM package specific variables
@@ -245,9 +167,8 @@ set(CPACK_RPM_PRE_INSTALL_SCRIPT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post")
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun")
set (CPACK_RPM_PACKAGE_NAME ${ROCM_SMI_PACKAGE})
set (CPACK_RPM_PACKAGE_NAME ${AMD_SMI_PACKAGE})
set (CPACK_RPM_PACKAGE_VERSION ${PKG_VERSION_STR})
include (CPack)
+1 -1
Zobrazit soubor
@@ -103,7 +103,7 @@ endfunction()
function(num_change_since_prev_pkg VERSION_PREFIX)
find_program(get_commits NAMES version_util.sh
PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules)
PATHS ${COMMON_PROJ_ROOT}/cmake_modules)
if (get_commits)
execute_process( COMMAND ${get_commits} -c ${VERSION_PREFIX}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+70
Zobrazit soubor
@@ -50,6 +50,76 @@
#include <vector>
#include <string>
#define CHECK_DV_IND_RANGE \
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
if (dv_ind >= smi.monitor_devices().size()) { \
return RSMI_STATUS_INVALID_ARGS; \
} \
#define GET_DEV_FROM_INDX \
CHECK_DV_IND_RANGE \
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
assert(dev != nullptr);
#define GET_DEV_AND_KFDNODE_FROM_INDX \
GET_DEV_FROM_INDX \
std::shared_ptr<amd::smi::KFDNode> kfd_node; \
if (smi.kfd_node_map().find(dev->kfd_gpu_id()) == \
smi.kfd_node_map().end()) { \
return RSMI_INITIALIZATION_ERROR; \
} \
kfd_node = smi.kfd_node_map()[dev->kfd_gpu_id()];
#define REQUIRE_ROOT_ACCESS \
if (amd::smi::RocmSMI::getInstance().euid()) { \
return RSMI_STATUS_PERMISSION; \
}
#define DEVICE_MUTEX \
amd::smi::pthread_wrap _pw(*amd::smi::GetMutex(dv_ind)); \
amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance(); \
bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1); \
amd::smi::ScopedPthread _lock(_pw, blocking_); \
if (!blocking_ && _lock.mutex_not_acquired()) { \
return RSMI_STATUS_BUSY; \
}
/* This group of macros is used to facilitate checking of support for rsmi_dev*
* "getter" functions. When the return buffer is set to nullptr, the macro will
* check the previously gathered device support data to see if the function,
* with possible variants (e.g., memory types, firware types,...) and
* subvariants (e.g. monitors/sensors) are supported.
*/
// This macro assumes dev already available
#define CHK_API_SUPPORT_ONLY(RT_PTR, VR, SUB_VR) \
if ((RT_PTR) == nullptr) { \
try { \
if (!dev->DeviceAPISupported(__FUNCTION__, (VR), (SUB_VR))) { \
return RSMI_STATUS_NOT_SUPPORTED; \
} \
return RSMI_STATUS_INVALID_ARGS; \
} catch (const amd::smi::rsmi_exception& e) { \
debug_print( \
"Exception caught when checking if API is supported %s.\n", \
e.what()); \
return RSMI_STATUS_INVALID_ARGS; \
} \
}
#define CHK_SUPPORT(RT_PTR, VR, SUB_VR) \
GET_DEV_FROM_INDX \
CHK_API_SUPPORT_ONLY((RT_PTR), (VR), (SUB_VR))
#define CHK_SUPPORT_NAME_ONLY(RT_PTR) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT) \
#define CHK_SUPPORT_VAR(RT_PTR, VR) \
CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT) \
#define CHK_SUPPORT_SUBVAR_ONLY(RT_PTR, SUB_VR) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR)) \
#define DBG_FILE_ERROR(FN, WR_STR) \
if (env_ && env_->debug_output_bitfield & RSMI_DEBUG_SYSFS_FILE_PATHS) { \
std::cout << "*****" << __FUNCTION__ << std::endl; \
+11
Zobrazit soubor
@@ -47,6 +47,9 @@
#include <string>
#include <cstdint>
#include <vector>
#include "rocm_smi/rocm_smi_device.h"
#ifdef NDEBUG
#define debug_print(fmt, ...) \
@@ -62,6 +65,8 @@
namespace amd {
namespace smi {
pthread_mutex_t *GetMutex(uint32_t dv_ind);
int SameFile(const std::string fileA, const std::string fileB);
bool FileExists(char const *filename);
int isRegularFile(std::string fname, bool *is_reg);
@@ -71,6 +76,12 @@ int WriteSysfsStr(std::string path, std::string val);
bool IsInteger(const std::string & n_str);
rsmi_status_t handleException();
rsmi_status_t
GetDevValueVec(amd::smi::DevInfoTypes type,
uint32_t dv_ind, std::vector<std::string> *val_vec);
rsmi_status_t ErrnoToRsmiStatus(uint32_t err);
struct pthread_wrap {
public:
explicit pthread_wrap(pthread_mutex_t &p_mut) : mutex_(p_mut) {}
+108
Zobrazit soubor
@@ -0,0 +1,108 @@
#
# Minimum version of cmake required
#
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" CMake OAM (Library) ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
# Required Defines first:
message("")
message("Build Configuration:")
# message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
# message("------------Compiler: " ${CMAKE_CXX_COMPILER})
# message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
# message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
# message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
# message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
# message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
# message("--------RSMI Inc Dir: " ${OAM_INC_DIR})
# message("")
set(OAM_ROOT "${PROJECT_SOURCE_DIR}/oam")
set(OAM_NAME "oam")
set(OAM_COMPONENT "lib${OAM_NAME}")
set(OAM_TARGET "${OAM_NAME}")
################# Determine the library version #########################
set(SO_VERSION_GIT_TAG_PREFIX "oam_so_ver")
# VERSION_* variables should be set by get_version_from_tag
message("Package version: ${PKG_VERSION_STR}")
# Debian package specific variables
# Set a default value for the package version
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
# VERSION_* variables should be set by get_version_from_tag
if ( ${ROCM_PATCH_VERSION} )
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
else()
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
endif ()
set(${OAM_NAME}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${OAM_NAME}_VERSION_MINOR "${VERSION_MINOR}")
set(${OAM_NAME}_VERSION_PATCH "0")
set(${OAM_NAME}_VERSION_BUILD "0")
message("SOVERSION: ${SO_VERSION_STRING}")
# Create a configure file to get version info from within library
configure_file(
"${OAM_ROOT}/src/${OAM_TARGET}Config.in"
"${OAM_ROOT}/include/oam/${OAM_TARGET}Config.h")
set(OAM_SRC_DIR "src")
set(OAM_INC_DIR "include")
set(OAM_DOCS_DIR "docs")
set(OAM_SRC_LIST ${CMN_SRC_LIST} "${OAM_SRC_DIR}/amd_oam.cc")
set(OAM_INC_LIST ${COMMON_INC_DIR} "${OAM_INC_DIR}")
set(OAM_EXAMPLE_EXE "oam_ex")
add_executable(${OAM_EXAMPLE_EXE} "example/oam_example.c")
target_include_directories(${OAM_EXAMPLE_EXE} PRIVATE ${OAM_INC_LIST})
target_link_libraries(${OAM_EXAMPLE_EXE} ${OAM_TARGET})
add_library(${OAM_TARGET} SHARED ${CMN_SRC_LIST} ${OAM_SRC_LIST}
${CMN_INC_LIST} ${OAM_INC_LIST})
target_link_libraries(${OAM_TARGET} pthread rt)
target_include_directories(${OAM_TARGET} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include ${COMMON_PROJ_ROOT}/common/shared_mutex)
## Set the VERSION and SOVERSION values
set_property(TARGET ${OAM_TARGET} PROPERTY
SOVERSION "${VERSION_MAJOR}")
set_property(TARGET ${OAM_TARGET} PROPERTY
VERSION "${SO_VERSION_STRING}")
## If the library is a release, strip the target library
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
add_custom_command(
TARGET ${OAM_TARGET}
POST_BUILD COMMAND ${CMAKE_STRIP} lib${OAM_TARGET}.so)
endif ()
## Add the install directives for the runtime library.
install(TARGETS ${OAM_TARGET}
LIBRARY DESTINATION ${OAM_NAME}/lib COMPONENT ${OAM_COMPONENT})
install(FILES ${COMMON_SRC_ROOT}/oam/include/oam/oam_mapi.h
${COMMON_SRC_ROOT}/oam/include/oam/amd_oam.h
DESTINATION oam/include/oam)
# Generate Doxygen documentation
if (DOXYGEN_FOUND)
configure_file(${OAM_DOCS_DIR}/docs/rsmi_doxygen.cfg
${OAM_DOCS_DIR}/Doxyfile @ONLY)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating AMD OAM API documentation with Doxygen" VERBATIM)
endif(DOXYGEN_FOUND)
+30
Zobrazit soubor
@@ -0,0 +1,30 @@
#include <stdio.h>
#include "oam/oam_mapi.h"
#include "oam/amd_oam.h"
const oam_ops_t amd_oam_ops = {
.init = amdoam_init,
.free = amdoam_free,
// .get_mapi_version = amdoam_get_mapi_version,
.discover_devices = amdoam_discover_devices,
};
int main()
{
uint32_t dev_cnt = 0;
oam_mapi_version_t version;
if (amd_oam_ops.init(version)) {
printf("init failed\n");
return -1;
}
// amd_oam_ops.get_mapi_version(&version);
if (!amd_oam_ops.discover_devices(&dev_cnt))
printf("%d AMD devices are discovered\n", dev_cnt);
amd_oam_ops.free();
return 0;
}
+43
Zobrazit soubor
@@ -0,0 +1,43 @@
/*
* MIT License
*
* Copyright (c) 2020 Open Compute Project
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef OAM_INCLUDE_OAM_AMD_OAM_H_
#define OAM_INCLUDE_OAM_AMD_OAM_H_
#ifdef __cplusplus
extern "C" {
#include <cstdint>
#else
#include <stdint.h>
#endif // __cplusplus
int amdoam_init(oam_mapi_version_t version);
int amdoam_free(void);
// int amdoam_get_mapi_version(oam_mapi_version_t *version);
int amdoam_discover_devices(int *device_count);
#ifdef __cplusplus
}
#endif // __cplusplus
#endif // OAM_INCLUDE_OAM_AMD_OAM_H_
+647
Zobrazit soubor
@@ -0,0 +1,647 @@
/*
* MIT License
*
* Copyright (c) 2020 Open Compute Project
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef OAM_INCLUDE_OAM_OAM_MAPI_H_
#define OAM_INCLUDE_OAM_OAM_MAPI_H_
/**
* \file oam_mapi.h
* \brief OAM management and monitoring library API definitions
*/
#if defined(__cplusplus)
extern "C" {
#endif
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>
/**
* \struct oam_mapi_version_t
* \brief OAM library API version
* \details TBD
* All the libraries versions are expected to be backward compatible.
* The major version increment indicates a new API has been added.
* Minor version increment indicates an interface change.
*/
typedef struct oam_mapi_version {
uint32_t major;
uint32_t minor;
} oam_mapi_version_t;
/**
* \struct oam_dev_properties_t
* \brief Local identifier for the device
* \details Immutable device identifier
* This is unique within the chassis.
*/
typedef struct oam_dev_id {
/*!< local identifier for the device */
int device_id;
} oam_dev_id_t;
/**
* \struct oam_dev_properties_t
* \brief Network identifier for the device
* \details Immutable network identifier for the device.
* This is unique across the entire network.
*/
typedef struct oam_net_dev_id {
/*!< unique network identifier for the device */
int network_id;
} oam_net_dev_id_t;
/*
* various lengths for device properties
*/
#define DEVICE_VENDOR_LEN 128
#define DEVICE_NAME_LEN 128
#define DEVICE_SKU_LEN 128
#define BOARD_NAME_LEN 128
#define BOARD_REVISION_LEN 128
#define BOARD_SERIAL_NUM_LEN 128
/**
* \struct oam_dev_properties_t
* \brief TBD
* \details TBD
*/
typedef struct oam_dev_properties {
/*!< unique network identifier for the device */
oam_dev_id_t device_id;
/*!< vendor name */
char device_vendor[DEVICE_VENDOR_LEN];
/*!< Device name */
char device_name[DEVICE_NAME_LEN];
/*!< SKU name */
char sku_name[DEVICE_SKU_LEN];
/*!< Board name */
char board_name[BOARD_NAME_LEN];
/*!< Board revision */
char board_revision[BOARD_REVISION_LEN];
/*!<
* Board Serial Number or UUID any other identifier, which can be used
* to identify devices uniquely and physically.
*/
char board_serial_number[BOARD_SERIAL_NUM_LEN];
} oam_dev_properties_t;
/**
* \struct oam_sensor_count_t
* \brief TBD
* \details TBD
* Various sensor related information
*/
typedef struct oam_sensor_count {
uint32_t num_temperature_sensors;
uint32_t num_power_sensors;
uint32_t num_voltage_sensors;
uint32_t num_current_sensors;
uint32_t num_fans;
} oam_sensor_count_t;
/**
* \enum oam_sensor_type_t
* \brief Sensor types
* \details This enumerated type defines available sensors types.
*/
typedef enum oam_sensor_type {
OAM_SENSOR_TYPE_POWER = 0,
OAM_SENSOR_TYPE_VOLTAGE,
OAM_SENSOR_TYPE_CURRENT,
OAM_SENSOR_TYPE_TEMP,
OAM_SENSOR_TYPE_FAN_SPEED,
OAM_SENSOR_TYPE_UNKNOWN = 0xFF
} oam_sensor_type_t;
/**
* \enum oam_power_sensor_scale_t
* \brief scale for power measurements
* \details This enumerated type defines available scales for power measurements
*/
typedef enum oam_power_sensor_scale {
OAM_POWER_SCALE_uW = 0,
OAM_POWER_SCALE_mW,
OAM_POWER_SCALE_W,
} oam_power_sensor_scale_t;
/**
* \enum oam_voltage_sensor_scale_t
* \brief scale for voltage measurements
* \details This enumerated type defines available scales for voltage measurements
*/
typedef enum oam_voltage_sensor_scale {
OAM_VOLTAGE_SCALE_uV = 0,
OAM_VOLTAGE_SCALE_mV,
OAM_VOLTAGE_SCALE_V,
} oam_voltage_sensor_scale_t;
/**
* \enum oam_current_sensor_scale_t
* \brief scale for current measurements
* \details This enumerated type defines available scales for current measurements
*/
typedef enum oam_current_sensor_scale {
OAM_CURRENT_SCALE_uA = 0,
OAM_CURRENT_SCALE_mA,
OAM_CURRENT_SCALE_A,
} oam_current_sensor_scale_t;
/**
* \enum oam_temp_sensor_scale_t
* \brief scale for temp measurements
* \details This enumerated type defines available scales for temp measurements
*/
typedef enum oam_temp_sensor_scale {
OAM_TEMP_SCALE_C = 0,
OAM_TEMP_SCALE_F
} oam_temp_sensor_scale_t;
/**
* \enum oam_fan_sensor_scale_t
* \brief scale for power measurements
* \details This enumerated type defines available scales for power measurements
*/
typedef enum oam_fan_sensor_scale {
OAM_FAN_SPEED_Hz = 0,
OAM_FAN_SPEED_KHz,
OAM_FAN_SPEED_MHz
} oam_fan_sensor_scale_t;
typedef union oam_sensor_scale {
oam_power_sensor_scale_t power_scale;
oam_voltage_sensor_scale_t volate_scale;
oam_current_sensor_scale_t current_scale;
oam_temp_sensor_scale_t temp_scale;
oam_fan_sensor_scale_t fan_scale;
} oam_sensor_scale_t;
/**
* \struct oam_dev_handle_t
* \brief Device handle
* \details Device handle obtained using open call
* The same handle is used by all the APIs which are used to perform
* specific operation on that device.
*/
typedef struct oam_dev_handle {
void *handle;
} oam_dev_handle_t;
/**
* \enum oam_dev_mode_t
* \brief Device open modes
* \details This enumerated type defines modes in which the device can be opened
* For some operations e.g. health check user should open the device
* in exclusive mode, so that if there are many applications using the same
* device there are no side effects.
*/
typedef enum oam_dev_mode {
OAM_DEV_MODE_EXCLUSIVE = 0,
OAM_DEV_MODE_NONEXLUSIVE = 1,
OAM_DEV_MODE_UNKNOWN = 0xFF
} oam_dev_mode_t;
/**
* \def OAM_SENSOR_NAME_MAX
* \brief length of sensor name
*/
#define OAM_SENSOR_NAME_MAX 256
/**
* \struct oam_sensor_info_t
* \brief Sensor information
* \details Device handle obtained using open call
* The same handle is used by all the APIs which are used to perform
* specific operation on that device.
*/
typedef struct oam_sensor_info {
char sensor_name[OAM_SENSOR_NAME_MAX];
oam_sensor_type_t sensor_type;
oam_sensor_scale_t scale;
int32_t value;
} oam_sensor_info_t;
/**
* \struct oam_dev_error_count_t
* \brief Device error information
* \details Various types of errors reported by device.
*/
typedef struct oam_dev_error_count {
uint32_t total_error_count;
uint32_t fatal_error_count;
uint32_t unknown_error_count;
uint32_t ecc_error_count;
} oam_dev_error_count_t;
/**
* \struct oam_firmware_version_t
* \brief Device error information
* \details Structure to store various firmware versions of OAM module
*/
typedef struct oam_firmware_version {
oam_mapi_version_t device_boot_fw_version;
oam_mapi_version_t device_fw_version;
oam_mapi_version_t board_boot_fw_version;
oam_mapi_version_t board_fw_version;
} oam_firmware_version_t;
/**
* \struct oam_pci_info_t
* \brief PCI information for the device
* \details Structure to store PCI (Domain, BDF) information of the device
*/
typedef struct oam_pci_info {
uint16_t domain;
uint8_t bus;
uint8_t device;
uint8_t function;
} oam_pci_info_t;
/**
* \enum oam_net_port_state_t
* \brief Network port state
* \details This enumerated type defines various states of the network port
*/
typedef enum oam_net_port_state {
OAM_NET_PORT_DISABLED = 0,
OAM_NET_PORT_ENABLED = 1
} oam_net_port_state_t;
/**
* \enum oam_net_port_status_t
* \brief Network port status
* \details This enumerated type defines various status of the network port
*/
typedef enum oam_net_port_status {
OAM_NET_PORT_UP = 0,
OAM_NET_PORT_DOWN = 1,
} oam_net_port_status_t;
/**
* \enum oam_net_port_id_t
* \brief Network port identifiers
* \details This enumerated type defines various identifiers for network ports
*/
typedef enum oam_net_port_id {
OAM_NET_PORT0 = 0,
OAM_NET_PORT1 = 1,
OAM_NET_PORT2 = 2,
OAM_NET_PORT_MAX = 0xFFFF
} oam_net_port_id_t;
/**
* \enum oam_firmware_modes_t
* \brief Supported mode to update firmware on device
* \details This enumerated type defines various modes which are supported by
* the device to update firmware.
*/
typedef enum oam_firmware_modes {
OAM_DOWNLOAD_ONLY = 0,
OAM_DOWNLOAD_ACTIVATE = 1
} oam_firmware_modes_t;
/**
* \def OAM_NET_PORT_NAME
* \brief length of network port name
*/
#define OAM_NET_PORT_NAME 256
/**
* \struct oam_net_port_desc
* \brief Network port description
* \details Structure to store additional details about the network port
*/
typedef struct oam_net_port_desc {
char name[OAM_NET_PORT_NAME];
} oam_net_port_desc_t;
/**
* \def OAM_DEV_HOST_NAME
* \brief length of host name
*/
#define OAM_DEV_HOST_NAME 256
/**
* \struct oam_net_dev_info_t
* \brief Information about the device on a network
* \details Structure to store additional details about the network device
* on a particular network.
*/
typedef struct oam_net_dev_info {
oam_net_dev_id_t net_dev_id;
char host_name[OAM_DEV_HOST_NAME];
oam_pci_info_t pci_info;
} oam_net_dev_info_t;
/**
* \struct oam_neighbour_info_t
* \brief Information about device neighburs
* \details Structure to store information about device neighbours on the
* network
*/
typedef struct oam_neighbour_info {
oam_net_port_id_t device_port;
oam_net_dev_info_t device_info;
} oam_neighbour_info_t;
/**
* \enum oam_dev_tpc_id_t
* \brief TPC identifiers
* \details This enumerated type defines various identifiers for TPCs
*/
typedef enum oam_dev_tpc_id {
OAM_DEV_TPC0,
OAM_DEV_TPC1,
OAM_DEV_TPC2,
OAM_DEV_TPC_MAX
} oam_dev_tpc_id_t;
/**
* \def OAM_TPC_NAME
* \brief length of TPC name
*/
#define OAM_TPC_NAME 256
/**
* \struct oam_tpc_desc_t
* \brief TPC description
* \details Structure to store information about TPC e.g. name corresponding
* to the id etc.
*/
typedef struct oam_tpc_desc {
char name[256];
} oam_tpc_desc_t;
/**
* \struct oam_dev_tpc_stats_t
* \brief TPC statistical information
* \details Structure to store information about TPC statistical information
* e.g. TPC utilization
*/
typedef struct oam_dev_tpc_stats {
double util;
} oam_dev_tpc_stats_t;
/**
* \enum oam_dev_mem_id_t
* \brief Device memory identifiers
* \details This enumerated type defines various identifiers for device memories
*/
typedef enum oam_dev_mem_id {
OAM_DEV_MEM0,
OAM_DEV_MEM1,
OAM_DEV_MEM2,
OAM_DEV_MEM_MAX
} oam_dev_mem_id_t;
/**
* \struct oam_mem_desc_t
* \brief Device memory description
* \details Structure to store additional details about device memories port
*/
typedef struct oam_mem_desc {
char name[256];
} oam_mem_desc_t;
/**
* \struct oam_dev_mem_stats_t
* \brief Device memory statistical information
* \details Structure to store various statastical information about device
* memory.
*/
typedef struct oam_dev_mem_stats {
uint32_t total_mem;
uint32_t allocated_mem;
uint32_t free_mem;
} oam_dev_mem_stats_t;
/**
* \struct oam_net_port_pkt_stats_t
* \brief Device network port statistical information
* \details Structure to store various statastical information about the network
* packets on a given port.
*/
typedef struct oam_net_port_pkt_stats {
uint64_t rx_count;
uint64_t tx_count;
uint64_t rx_errors;
uint64_t tx_errors;
} oam_net_port_pkt_stats_t;
/**
* \struct oam_ops_t
* \brief OAM Device operations
* \details Structure provides list of APIs which needs to be
* supported by the OAM library.
*/
typedef struct oam_ops {
/*!<
* to initialise library instance and perform version compatibility
* check
*/
int (*init)(oam_mapi_version_t version);
int (*free)(void);
/*!<
* To get error description from the error code
*/
int (*get_error_description)(int error_code, const char **error_description);
/*!<
* To retrieve the OAM Management interface version
*/
int (*get_mapi_version)(oam_mapi_version_t *version);
/*!<
* To retrieve the number of devices present/discovered by the library
*/
int (*discover_devices)(int *device_count);
/*!<
* To retrieve device properties for each discovered devices
*/
int (*get_dev_properties)(oam_dev_properties_t *devices);
/*!<
* To retrieve PCI properties of the device
*/
int (*get_pci_properties)(oam_dev_id_t *device_id, oam_pci_info_t *pci_info);
/*!<
* To query the number of various sensors present
*/
int (*get_sensors_count)(oam_dev_id_t *device_id,
oam_sensor_count_t *sensor_count);
/*!<
* Open the device and obtain handle
*/
int (*open_device)(oam_dev_id_t *dev_id, oam_dev_mode_t mode,
oam_dev_handle_t *handle);
int (*close_device)(oam_dev_handle_t *handle);
/*!<
* To read various sensor values for a given sensor type
*/
int (*get_sensors_info)(oam_dev_handle_t *handle,
oam_sensor_type_t type,
uint32_t num_sensors,
oam_sensor_info_t sensor_info[]);
/*!<
* To read current error count of the device
*/
int (*get_device_error_count)(oam_dev_handle_t *handle,
oam_dev_error_count_t *count);
/*!<
* To update firmware on the device
* fw_image contains a null terminated string which specifies complete
* path where the firmware image is located
*/
int (*download_firmware)(oam_dev_id_t *device_id, char *fw_image,
oam_firmware_modes_t mode);
/*!<
* To query firmware versions
*/
int (*get_firmware_version)(oam_dev_id_t *device_id,
oam_firmware_version_t *version);
/*!<
* to get network id from device id
*/
int (*get_net_dev_id)(oam_dev_id_t *device_id, oam_net_dev_id_t *net_device);
/*!<
* Network management APIs.
*/
/*!<
* discover network.
*/
int (*discover_network)(int *net_dev_count);
int (*get_dev_net_properties)(oam_net_dev_info_t *net_dev_info);
int (*get_neighbour_count)(oam_dev_id_t *device,
oam_net_port_id_t local_port_id,
uint32_t *neighbor_count);
int (*get_neighbours_info)(oam_dev_id_t *device,
oam_net_port_id_t local_port_id,
uint32_t *neighbors_count,
oam_neighbour_info_t *neighbours_info);
int (*configure_network)(oam_net_dev_id_t *net_devices,
uint32_t *net_device_count,
char *network_name);
int (*destroy_network)(char *network_name);
int (*query_network)(char *network_name, oam_net_dev_info_t *devices,
uint32_t *device_count);
int (*get_network_count)(uint32_t *network_count);
int (*list_networks)(char *network_names[]);
/*!<
* Various statistics related to blocks
*/
/*!<
* To query number of ports
*/
int (*get_net_port_count)(oam_dev_handle_t *handle, uint32_t *count,
oam_net_port_id_t *port_ids);
int (*get_net_port_desc)(oam_dev_handle_t *handle, oam_net_port_id_t *port,
oam_net_port_desc_t *desc);
int (*get_net_port_state)(oam_dev_handle_t *handle, oam_net_port_id_t *port,
oam_net_port_state_t *state);
int (*check_net_port_status)(oam_dev_handle_t *handle,
oam_net_port_id_t *port,
oam_net_port_status_t *status);
int (*get_net_port_pkt_stats)(oam_dev_handle_t *handle,
oam_net_port_id_t *port,
uint32_t duration_sec,
oam_net_port_pkt_stats_t *stats);
int (*query_net_port_bandwidth)(oam_dev_handle_t *handle,
oam_net_port_id_t *port,
uint32_t duration_sec,
double *bandwidth);
int (*get_tpc_count)(oam_dev_handle_t *handle, uint32_t *count,
oam_dev_tpc_id_t *tpc_ids);
int (*get_tpc_desc)(oam_dev_handle_t *handle, oam_dev_tpc_id_t *tpc_id,
oam_tpc_desc_t *desc);
int (*get_tpc_stats)(oam_dev_handle_t *handle,
oam_dev_tpc_id_t *port,
oam_dev_tpc_stats_t *stats,
uint32_t duration_sec);
int (*get_mem_count)(oam_dev_handle_t *handle, uint32_t *count,
oam_dev_mem_id_t *mem_ids);
int (*get_mem_desc)(oam_dev_handle_t *handle, oam_dev_mem_id_t *tpc_id,
oam_mem_desc_t *desc);
int (*get_mem_stats)(oam_dev_handle_t *handle, oam_dev_mem_id_t *mem_id,
oam_dev_mem_stats_t *stats);
/*!<
* To check the health of the individual components, libraries
* generates test workload to check if the block is functioning properly
* or not. So no other workload should be running while calling these
* APIs
*/
int (*check_tpc_health)(oam_dev_id_t *device_id, oam_dev_tpc_id_t *tpc_id);
int (*check_net_port_health)(oam_dev_id_t *device_id,
oam_net_port_id_t *port);
int (*check_mem_health)(oam_dev_id_t *device_id, oam_dev_mem_id_t *port);
/*
* Following needs more attention, will work on in next
int (*get_fan_speed)(oam_dev_t *oam);
int (*set_fan_speed)(oam_dev_t *oam, int speed);
int (*get_power_cap)(oam_dev_t *oam);
int (*set_power_cap)(oam_dev_t *oam, int power);
int (*get_telemetry)(oam_dev_t *oam);
*/
} oam_ops_t;
#ifdef __cplusplus
}
#endif
#endif // OAM_INCLUDE_OAM_OAM_MAPI_H_
+161
Zobrazit soubor
@@ -0,0 +1,161 @@
/*
* MIT License
*
* Copyright (c) 2020 Open Compute Project
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <assert.h>
#include <sstream>
#include "rocm_smi/rocm_smi_common.h"
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_device.h"
#include "rocm_smi/rocm_smi_utils.h"
#include "rocm_smi/rocm_smi_exception.h"
#include "rocm_smi/rocm_smi_counters.h"
#include "rocm_smi/rocm_smi_kfd.h"
#include "rocm_smi/rocm_smi.h"
#include "oam/oam_mapi.h"
#include "oam/amd_oam.h"
#define TRY try {
#define CATCH } catch (...) {return handleRSMIException();}
static int handleRSMIException() {
rsmi_status_t ret;
ret = amd::smi::handleException();
// TODO(x): convert RSMI return to OAM return
// For now, just return int equiv.
return static_cast<int>(ret);
}
int amdoam_init(oam_mapi_version_t version) {
TRY
// TODO(x): handle version argument
(void)version;
rsmi_status_t ret = rsmi_init(0);
return 0;
CATCH
}
int amdoam_free(void) {
rsmi_status_t ret = rsmi_shut_down();
// TODO(x) convert rsmi return to oam return val
return static_cast<int>(ret);
}
int amdoam_discover_devices(int *device_count) {
uint32_t dv_cnt;
if (device_count == nullptr) {
return -1; // TODO(x): return appropriate OAM code
}
rsmi_status_t ret = rsmi_num_monitor_devices(&dv_cnt);
*device_count = static_cast<int>(dv_cnt);
// TODO(x) convert rsmi return to oam return val
return static_cast<int>(ret);
}
// TODO(x): This function doesn't work for OAM. It's just a version
// of rsmi_dev_ecc_count_get(), which has similar functionality.
// The purpose here is just to drive refactoring; e.g., making macros
// available and previously static functions global.
int
get_device_error_count(oam_dev_handle_t *handle,
oam_dev_error_count_t *count) {
std::vector<std::string> val_vec;
rsmi_status_t ret;
TRY
// TODO(x): replace with final code...
// Below, we are just returning errors for RSMI_GPU_BLOCK_GFX as a
// placeholder
(void)handle; // Just ignore for now
rsmi_gpu_block_t block = RSMI_GPU_BLOCK_GFX;
// The macro CHK_SUPPORT_VAR assumes the existence of a device index variable
// "dv_ind". Presumably, the device index will come from the "handle"
// pointer. Since I don't know how that will be implemented, for now we
// will just make up a device index:
uint32_t dv_ind = 0;
CHK_SUPPORT_VAR(count, block)
amd::smi::DevInfoTypes type;
switch (block) {
case RSMI_GPU_BLOCK_UMC:
type = amd::smi::kDevErrCntUMC;
break;
case RSMI_GPU_BLOCK_SDMA:
type = amd::smi::kDevErrCntSDMA;
break;
case RSMI_GPU_BLOCK_GFX:
type = amd::smi::kDevErrCntGFX;
break;
default:
return RSMI_STATUS_NOT_SUPPORTED;
}
DEVICE_MUTEX
ret = GetDevValueVec(type, dv_ind, &val_vec);
if (ret == RSMI_STATUS_FILE_ERROR) {
return RSMI_STATUS_NOT_SUPPORTED;
}
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
assert(val_vec.size() == 2);
std::string junk;
std::istringstream fs1(val_vec[0]);
fs1 >> junk;
assert(junk == "ue:");
fs1 >> count->total_error_count;
std::istringstream fs2(val_vec[1]);
fs2 >> junk;
assert(junk == "ce:");
fs2 >> count->total_error_count;
return ret;
CATCH
}
+56
Zobrazit soubor
@@ -0,0 +1,56 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2017, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
#define INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
// This file is generated on build.
#define rocm_smi_VERSION_MAJOR @rocm_smi_VERSION_MAJOR@
#define rocm_smi_VERSION_MINOR @rocm_smi_VERSION_MINOR@
#define rocm_smi_VERSION_PATCH @rocm_smi_VERSION_PATCH@
#define rocm_smi_VERSION_BUILD "@rocm_smi_VERSION_BUILD@"
#endif // INCLUDE_ROCM_SMI_ROCM_SMI64CONFIG_H_
+143
Zobrazit soubor
@@ -0,0 +1,143 @@
#
# Minimum version of cmake required
#
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" CMake ROCm SMI (Library) ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
# Required Defines first:
message("")
message("Build Configuration:")
# message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
# message("------------Compiler: " ${CMAKE_CXX_COMPILER})
# message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
# message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
# message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
# message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
# message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
# message("--------RSMI Inc Dir: " ${RSMI_INC_DIR})
# message("")
set(ROCM_SMI "rocm_smi")
set(ROCM_SMI_COMPONENT "lib${ROCM_SMI}")
set(ROCM_SMI_TARGET "${ROCM_SMI}64")
################# Determine the library version #########################
set(SO_VERSION_GIT_TAG_PREFIX "rsmi_so_ver")
# VERSION_* variables should be set by get_version_from_tag
message("Package version: ${PKG_VERSION_STR}")
# Debian package specific variables
# Set a default value for the package version
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
# VERSION_* variables should be set by get_version_from_tag
if ( ${ROCM_PATCH_VERSION} )
set ( VERSION_PATCH ${ROCM_PATCH_VERSION})
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
else()
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
endif ()
set(${ROCM_SMI}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${ROCM_SMI}_VERSION_MINOR "${VERSION_MINOR}")
set(${ROCM_SMI}_VERSION_PATCH "0")
set(${ROCM_SMI}_VERSION_BUILD "0")
message("SOVERSION: ${SO_VERSION_STRING}")
# Create a configure file to get version info from within library
configure_file(
"${PROJECT_SOURCE_DIR}/src/${ROCM_SMI_TARGET}Config.in"
"${PROJECT_SOURCE_DIR}/include/rocm_smi/${ROCM_SMI_TARGET}Config.h")
set(RSMI_SRC_DIR "src")
set(RSMI_INC_DIR "include")
set(RSMI_DOCS_DIR "docs")
# Add any rocm_smi_lib specific source files here
set(SMI_SRC_LIST ${CMN_SRC_LIST})
# Add any rocm_smi_lib specific headers here
set(SMI_INC_LIST "")
set(SMI_EXAMPLE_EXE "rocm_smi_ex")
add_executable(${SMI_EXAMPLE_EXE} "example/rocm_smi_example.cc")
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
add_library(${ROCM_SMI_TARGET} SHARED ${CMN_SRC_LIST} ${SMI_SRC_LIST}
${CMN_INC_LIST} ${SMI_INC_LIST})
target_link_libraries(${ROCM_SMI_TARGET} pthread rt)
target_include_directories(${ROCM_SMI_TARGET} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR} ${COMMON_PROJ_ROOT}/common/shared_mutex)
## Set the VERSION and SOVERSION values
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
SOVERSION "${VERSION_MAJOR}")
set_property(TARGET ${ROCM_SMI_TARGET} PROPERTY
VERSION "${SO_VERSION_STRING}")
## If the library is a release, strip the target library
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
add_custom_command(
TARGET ${ROCM_SMI_TARGET}
POST_BUILD COMMAND ${CMAKE_STRIP} lib${ROCM_SMI_TARGET}.so)
endif ()
## Add symlinks from top level ROCm lib dir to rocm-smi lib so files
add_custom_target ( so-link ALL WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so so-link )
add_custom_target ( so-major-link ALL WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR} COMMAND ${CMAKE_COMMAND}
-E create_symlink
../${ROCM_SMI}/lib/${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR}
so-major-link )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-link DESTINATION lib RENAME
${ROCM_SMI_LIB_NAME}.so )
install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/so-major-link DESTINATION lib
RENAME ${ROCM_SMI_LIB_NAME}.so.${VERSION_MAJOR} )
## Add the install directives for the runtime library.
install(TARGETS ${ROCM_SMI_TARGET}
LIBRARY DESTINATION ${ROCM_SMI}/lib COMPONENT ${ROCM_SMI_COMPONENT})
install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/rocm_smi.h
DESTINATION rocm_smi/include/rocm_smi)
install(FILES ${COMMON_SRC_ROOT}/include/rocm_smi/kfd_ioctl.h
DESTINATION rocm_smi/include/rocm_smi)
# Generate Doxygen documentation
if (DOXYGEN_FOUND AND LATEX_FOUND)
set (RSMI_MANUAL_NAME "ROCm_SMI_Manual")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/docs/rsmi_doxygen.cfg
"${INC_DIR}/rocm_smi.h"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
COMMAND make > /dev/null
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
${CMAKE_CURRENT_SOURCE_DIR}/docs/${RSMI_MANUAL_NAME}_new.pdf
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.tex
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/latex)
add_custom_target(docs DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf)
add_dependencies(${ROCM_SMI_TARGET} docs)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/latex/refman.pdf
DESTINATION ${ROCM_SMI}/docs/${RSMI_MANUAL_NAME}.pdf)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/docs/README.md
DESTINATION ${ROCM_SMI}/docs/)
else()
message("Doxygen or Latex is not found. Will not generate documents.")
endif(DOXYGEN_FOUND AND LATEX_FOUND)
+39 -171
Zobrazit soubor
@@ -76,129 +76,8 @@
static const uint32_t kMaxOverdriveLevel = 20;
static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
switch (err) {
case 0: return RSMI_STATUS_SUCCESS;
case ESRCH: return RSMI_STATUS_NOT_FOUND;
case EACCES: return RSMI_STATUS_PERMISSION;
case EPERM:
case ENOENT: return RSMI_STATUS_NOT_SUPPORTED;
case EBADF:
case EISDIR: return RSMI_STATUS_FILE_ERROR;
case EINTR: return RSMI_STATUS_INTERRUPT;
case EIO: return RSMI_STATUS_UNEXPECTED_SIZE;
case ENXIO: return RSMI_STATUS_UNEXPECTED_DATA;
case EBUSY: return RSMI_STATUS_BUSY;
default: return RSMI_STATUS_UNKNOWN_ERROR;
}
}
static rsmi_status_t handleException() {
try {
throw;
} catch (const std::bad_alloc& e) {
return RSMI_STATUS_OUT_OF_RESOURCES;
} catch (const amd::smi::rsmi_exception& e) {
debug_print("Exception caught: %s.\n", e.what());
return e.error_code();
} catch (const std::exception& e) {
debug_print("Exception caught: %s\n", e.what());
return RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (const std::nested_exception& e) {
debug_print("Callback threw.\n");
return RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (int erno) {
return errno_to_rsmi_status(erno);
} catch (...) {
debug_print("Unknown exception caught.\n");
return RSMI_STATUS_INTERNAL_EXCEPTION;
}
}
#define TRY try {
#define CATCH } catch (...) {return handleException();}
#define CHECK_DV_IND_RANGE \
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); \
if (dv_ind >= smi.monitor_devices().size()) { \
return RSMI_STATUS_INVALID_ARGS; \
} \
#define GET_DEV_FROM_INDX \
CHECK_DV_IND_RANGE \
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
assert(dev != nullptr);
#define GET_DEV_AND_KFDNODE_FROM_INDX \
GET_DEV_FROM_INDX \
std::shared_ptr<amd::smi::KFDNode> kfd_node; \
if (smi.kfd_node_map().find(dev->kfd_gpu_id()) == \
smi.kfd_node_map().end()) { \
return RSMI_INITIALIZATION_ERROR; \
} \
kfd_node = smi.kfd_node_map()[dev->kfd_gpu_id()];
#define REQUIRE_ROOT_ACCESS \
if (amd::smi::RocmSMI::getInstance().euid()) { \
return RSMI_STATUS_PERMISSION; \
}
#define DEVICE_MUTEX \
amd::smi::pthread_wrap _pw(*get_mutex(dv_ind)); \
amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance(); \
bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1); \
amd::smi::ScopedPthread _lock(_pw, blocking_); \
if (!blocking_ && _lock.mutex_not_acquired()) { \
return RSMI_STATUS_BUSY; \
}
/* This group of macros is used to facilitate checking of support for rsmi_dev*
* "getter" functions. When the return buffer is set to nullptr, the macro will
* check the previously gathered device support data to see if the function,
* with possible variants (e.g., memory types, firware types,...) and
* subvariants (e.g. monitors/sensors) are supported.
*/
// This macro assumes dev already available
#define CHK_API_SUPPORT_ONLY(RT_PTR, VR, SUB_VR) \
if ((RT_PTR) == nullptr) { \
try { \
if (!dev->DeviceAPISupported(__FUNCTION__, (VR), (SUB_VR))) { \
return RSMI_STATUS_NOT_SUPPORTED; \
} \
return RSMI_STATUS_INVALID_ARGS; \
} catch (const amd::smi::rsmi_exception& e) { \
debug_print( \
"Exception caught when checking if API is supported %s.\n", \
e.what()); \
return RSMI_STATUS_INVALID_ARGS; \
} \
}
#define CHK_SUPPORT(RT_PTR, VR, SUB_VR) \
GET_DEV_FROM_INDX \
CHK_API_SUPPORT_ONLY((RT_PTR), (VR), (SUB_VR))
#define CHK_SUPPORT_NAME_ONLY(RT_PTR) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT)
#define CHK_SUPPORT_VAR(RT_PTR, VR) \
CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT)
#define CHK_SUPPORT_SUBVAR_ONLY(RT_PTR, SUB_VR) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR))
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
if (dv_ind >= smi.monitor_devices().size()) {
return nullptr;
}
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
assert(dev != nullptr);
return dev->mutex();
}
#define CATCH } catch (...) {return amd::smi::handleException();}
static uint64_t get_multiplier_from_str(char units_char) {
uint32_t multiplier = 0;
@@ -404,7 +283,7 @@ static rsmi_status_t get_dev_value_str(amd::smi::DevInfoTypes type,
GET_DEV_FROM_INDX
int ret = dev->readDevInfo(type, val_str);
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
static rsmi_status_t get_dev_value_int(amd::smi::DevInfoTypes type,
uint32_t dv_ind, uint64_t *val_int) {
@@ -415,7 +294,7 @@ static rsmi_status_t get_dev_value_int(amd::smi::DevInfoTypes type,
GET_DEV_FROM_INDX
int ret = dev->readDevInfo(type, val_int);
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
static rsmi_status_t get_dev_value_line(amd::smi::DevInfoTypes type,
@@ -427,7 +306,7 @@ static rsmi_status_t get_dev_value_line(amd::smi::DevInfoTypes type,
GET_DEV_FROM_INDX
int ret = dev->readDevInfoLine(type, val_str);
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
static rsmi_status_t set_dev_value(amd::smi::DevInfoTypes type,
@@ -435,7 +314,7 @@ static rsmi_status_t set_dev_value(amd::smi::DevInfoTypes type,
GET_DEV_FROM_INDX
int ret = dev->writeDevInfo(type, val);
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
@@ -452,7 +331,7 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
int ret = dev->monitor()->readMonitor(type, sensor_ind, &val_str);
if (ret) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
if (!amd::smi::IsInteger(val_str)) {
@@ -480,7 +359,7 @@ static rsmi_status_t get_dev_mon_value(amd::smi::MonitorTypes type,
int ret = dev->monitor()->readMonitor(type, sensor_ind, &val_str);
if (ret) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
if (!amd::smi::IsInteger(val_str)) {
@@ -504,7 +383,7 @@ static rsmi_status_t set_dev_mon_value(amd::smi::MonitorTypes type,
int ret = dev->monitor()->writeMonitor(type, sensor_ind,
std::to_string(val));
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
@@ -517,7 +396,7 @@ static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
uint32_t ret = smi.DiscoverAMDPowerMonitors();
if (ret != 0) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
@@ -526,20 +405,9 @@ static rsmi_status_t get_power_mon_value(amd::smi::PowerMonTypes type,
ret = dev->power_monitor()->readPowerValue(type, val);
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
static rsmi_status_t get_dev_value_vec(amd::smi::DevInfoTypes type,
uint32_t dv_ind, std::vector<std::string> *val_vec) {
assert(val_vec != nullptr);
if (val_vec == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
GET_DEV_FROM_INDX
int ret = dev->readDevInfo(type, val_vec);
return errno_to_rsmi_status(ret);
}
static bool is_power_of_2(uint64_t n) {
return n && !(n & (n - 1));
}
@@ -654,7 +522,7 @@ rsmi_status_t rsmi_dev_ecc_enabled_get(uint32_t dv_ind,
*enabled_blks = strtoul(tmp_str.c_str(), nullptr, 16);
assert(errno == 0);
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
CATCH
}
@@ -732,7 +600,7 @@ rsmi_dev_ecc_count_get(uint32_t dv_ind, rsmi_gpu_block_t block,
DEVICE_MUTEX
ret = get_dev_value_vec(type, dv_ind, &val_vec);
ret = GetDevValueVec(type, dv_ind, &val_vec);
if (ret == RSMI_STATUS_FILE_ERROR) {
return RSMI_STATUS_NOT_SUPPORTED;
@@ -828,7 +696,7 @@ get_id(uint32_t dv_ind, amd::smi::DevInfoTypes typ, uint16_t *id) {
val_u64 = strtoul(val_str.c_str(), nullptr, 16);
assert(errno == 0);
if (errno != 0) {
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
}
if (val_u64 > 0xFFFF) {
return RSMI_STATUS_UNEXPECTED_SIZE;
@@ -951,7 +819,7 @@ static rsmi_status_t get_frequencies(amd::smi::DevInfoTypes type,
return RSMI_STATUS_INVALID_ARGS;
}
ret = get_dev_value_vec(type, dv_ind, &val_vec);
ret = GetDevValueVec(type, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
@@ -1001,7 +869,7 @@ static rsmi_status_t get_power_profiles(uint32_t dv_ind,
return RSMI_STATUS_INVALID_ARGS;
}
ret = get_dev_value_vec(amd::smi::kDevPowerProfileMode, dv_ind, &val_vec);
ret = GetDevValueVec(amd::smi::kDevPowerProfileMode, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
@@ -1085,7 +953,7 @@ static rsmi_status_t get_od_clk_volt_info(uint32_t dv_ind,
return RSMI_STATUS_INVALID_ARGS;
}
ret = get_dev_value_vec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
ret = GetDevValueVec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
@@ -1186,7 +1054,7 @@ static rsmi_status_t get_od_clk_volt_curve_regions(uint32_t dv_ind,
THROW_IF_NULLPTR_DEREF(p)
THROW_IF_NULLPTR_DEREF(num_regions)
ret = get_dev_value_vec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
ret = GetDevValueVec(amd::smi::kDevPowerODVoltage, dv_ind, &val_vec);
if (ret != RSMI_STATUS_SUCCESS) {
return ret;
}
@@ -1395,7 +1263,7 @@ rsmi_dev_firmware_version_get(uint32_t dv_ind, rsmi_fw_block_t block,
ret = get_dev_value_int(dev_type, dv_ind, fw_version);
if (ret != 0) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
return RSMI_STATUS_SUCCESS;
@@ -1487,7 +1355,7 @@ rsmi_dev_gpu_clk_freq_set(uint32_t dv_ind,
}
ret_i = dev->writeDevInfo(dev_type, freq_enable_str);
return errno_to_rsmi_status(ret_i);
return amd::smi::ErrnoToRsmiStatus(ret_i);
CATCH
}
@@ -1743,7 +1611,7 @@ rsmi_dev_brand_get(uint32_t dv_ind, char *brand, uint32_t len) {
// Retrieve vbios and store in vbios_value string
int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &vbios_value);
if (ret != 0) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
if (vbios_value.length() == 16) {
sku_value = vbios_value.substr(4, 6);
@@ -1779,7 +1647,7 @@ rsmi_dev_vram_vendor_get(uint32_t dv_ind, char *brand, uint32_t len) {
int ret = dev->readDevInfo(amd::smi::kDevVramVendor, &val_str);
if (ret != 0) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
uint32_t ln = static_cast<uint32_t>(val_str.copy(brand, len));
@@ -1893,7 +1761,7 @@ rsmi_dev_pci_bandwidth_set(uint32_t dv_ind, uint64_t bw_bitmask) {
uint32_t ret_i;
ret_i = dev->writeDevInfo(amd::smi::kDevPCIEClk, freq_enable_str);
return errno_to_rsmi_status(ret_i);
return amd::smi::ErrnoToRsmiStatus(ret_i);
CATCH
}
@@ -2565,7 +2433,7 @@ rsmi_dev_vbios_version_get(uint32_t dv_ind, char *vbios, uint32_t len) {
int ret = dev->readDevInfo(amd::smi::kDevVBiosVer, &val_str);
if (ret != 0) {
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
}
uint32_t ln = static_cast<uint32_t>(val_str.copy(vbios, len));
@@ -2629,7 +2497,7 @@ rsmi_version_str_get(rsmi_sw_component_t component, char *ver_str,
err = uname(&buf);
if (err != 0) {
return errno_to_rsmi_status(err);
return amd::smi::ErrnoToRsmiStatus(err);
}
val_str = buf.release;
@@ -2744,7 +2612,7 @@ rsmi_dev_counter_destroy(rsmi_event_handle_t evnt_handle) {
ret = evt->stopCounter();
delete evt;
return errno_to_rsmi_status(ret);;
return amd::smi::ErrnoToRsmiStatus(ret);;
CATCH
}
@@ -2755,7 +2623,7 @@ rsmi_counter_control(rsmi_event_handle_t evt_handle,
amd::smi::evt::Event *evt =
reinterpret_cast<amd::smi::evt::Event *>(evt_handle);
amd::smi::pthread_wrap _pw(*get_mutex(evt->dev_ind()));
amd::smi::pthread_wrap _pw(*amd::smi::GetMutex(evt->dev_ind()));
amd::smi::ScopedPthread _lock(_pw);
REQUIRE_ROOT_ACCESS
@@ -2779,7 +2647,7 @@ rsmi_counter_control(rsmi_event_handle_t evt_handle,
assert(!"Unexpected perf counter command");
return RSMI_STATUS_INVALID_ARGS;
}
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
CATCH
}
@@ -2810,7 +2678,7 @@ rsmi_counter_read(rsmi_event_handle_t evt_handle,
ret = evt->getValue(value);
}
return errno_to_rsmi_status(ret);
return amd::smi::ErrnoToRsmiStatus(ret);
CATCH
}
@@ -2868,7 +2736,7 @@ rsmi_compute_process_info_get(rsmi_process_info_t *procs,
int err = amd::smi::GetProcessInfo(procs, *num_items, &procs_found);
if (err) {
return errno_to_rsmi_status(err);
return amd::smi::ErrnoToRsmiStatus(err);
}
if (procs && *num_items < procs_found) {
@@ -2896,7 +2764,7 @@ rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
int err = amd::smi::GetProcessGPUs(pid, &gpu_set);
if (err) {
return errno_to_rsmi_status(err);
return amd::smi::ErrnoToRsmiStatus(err);
}
uint32_t i = 0;
@@ -2936,7 +2804,7 @@ rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages,
std::vector<std::string> val_vec;
ret = get_dev_value_vec(amd::smi::kDevMemPageBad, dv_ind, &val_vec);
ret = GetDevValueVec(amd::smi::kDevMemPageBad, dv_ind, &val_vec);
if (ret == RSMI_STATUS_FILE_ERROR) {
return RSMI_STATUS_NOT_SUPPORTED;
@@ -3017,7 +2885,7 @@ rsmi_compute_process_info_by_pid_get(uint32_t pid,
int err = amd::smi::GetProcessInfoForPID(pid, proc, &gpu_set);
if (err) {
return errno_to_rsmi_status(err);
return amd::smi::ErrnoToRsmiStatus(err);
}
return RSMI_STATUS_SUCCESS;
@@ -3534,7 +3402,7 @@ rsmi_event_notification_init(uint32_t dv_ind) {
int ret = ioctl(smi.kfd_notif_evt_fh(), AMDKFD_IOC_SMI_EVENTS, &args);
if (ret < 0) {
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
}
if (args.anon_fd < 1) {
return RSMI_STATUS_NO_DATA;
@@ -3544,7 +3412,7 @@ rsmi_event_notification_init(uint32_t dv_ind) {
FILE *anon_file_ptr = fdopen(args.anon_fd, "r");
if (anon_file_ptr == nullptr) {
close(dev->evt_notif_anon_fd());
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
}
dev->set_evt_notif_anon_file_ptr(anon_file_ptr);
@@ -3564,7 +3432,7 @@ rsmi_event_notification_mask_set(uint32_t dv_ind, uint64_t mask) {
ssize_t ret = write(dev->evt_notif_anon_fd(), &mask, sizeof(uint64_t));
if (ret == -1) {
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
}
return RSMI_STATUS_SUCCESS;
@@ -3645,7 +3513,7 @@ rsmi_event_notification_get(int timeout_ms,
fill_data_buffer(false);
if (*num_elem < buffer_size && errno != EAGAIN) {
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
} else if (*num_elem >= buffer_size) {
return RSMI_STATUS_SUCCESS;
}
@@ -3655,7 +3523,7 @@ rsmi_event_notification_get(int timeout_ms,
if (p_ret > 0) {
fill_data_buffer(true);
} else if (p_ret < 0) {
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
}
if (*num_elem == 0) {
return RSMI_STATUS_NO_DATA;
@@ -3684,7 +3552,7 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) {
int ret = close(smi.kfd_notif_evt_fh());
smi.set_kfd_notif_evt_fh(-1);
if (ret < 0) {
return errno_to_rsmi_status(errno);
return amd::smi::ErrnoToRsmiStatus(errno);
}
}
@@ -3700,7 +3568,7 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) {
rsmi_status_t
rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds) {
// DEVICE_MUTEX
amd::smi::pthread_wrap _pw(*get_mutex(dv_ind));
amd::smi::pthread_wrap _pw(*amd::smi::GetMutex(dv_ind));
amd::smi::RocmSMI& smi_ = amd::smi::RocmSMI::getInstance();
bool blocking_ = !(smi_.init_options() && RSMI_INIT_FLAG_RESRV_TEST1);
amd::smi::ScopedPthread _lock(_pw, blocking_);
+70
Zobrazit soubor
@@ -50,6 +50,13 @@
#include <iostream>
#include <sstream>
#include <algorithm>
#include <vector>
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi/rocm_smi_utils.h"
#include "rocm_smi/rocm_smi_exception.h"
#include "rocm_smi/rocm_smi_main.h"
#include "rocm_smi/rocm_smi_device.h"
namespace amd {
namespace smi {
@@ -151,5 +158,68 @@ bool IsInteger(const std::string & n_str) {
return (*tmp == 0);
}
rsmi_status_t handleException() {
try {
throw;
} catch (const std::bad_alloc& e) {
debug_print("RSMI exception: BadAlloc\n");
return RSMI_STATUS_OUT_OF_RESOURCES;
} catch (const amd::smi::rsmi_exception& e) {
debug_print("Exception caught: %s.\n", e.what());
return e.error_code();
} catch (const std::exception& e) {
debug_print("Exception caught: %s\n", e.what());
return RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (const std::nested_exception& e) {
debug_print("Callback threw.\n");
return RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (...) {
debug_print("Unknown exception caught.\n");
return RSMI_STATUS_INTERNAL_EXCEPTION;
}
}
pthread_mutex_t *GetMutex(uint32_t dv_ind) {
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
if (dv_ind >= smi.monitor_devices().size()) {
return nullptr;
}
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind];
assert(dev != nullptr);
return dev->mutex();
}
rsmi_status_t GetDevValueVec(amd::smi::DevInfoTypes type,
uint32_t dv_ind, std::vector<std::string> *val_vec) {
assert(val_vec != nullptr);
if (val_vec == nullptr) {
return RSMI_STATUS_INVALID_ARGS;
}
GET_DEV_FROM_INDX
int ret = dev->readDevInfo(type, val_vec);
return ErrnoToRsmiStatus(ret);
}
rsmi_status_t ErrnoToRsmiStatus(uint32_t err) {
switch (err) {
case 0: return RSMI_STATUS_SUCCESS;
case ESRCH: return RSMI_STATUS_NOT_FOUND;
case EACCES: return RSMI_STATUS_PERMISSION;
case EPERM:
case ENOENT: return RSMI_STATUS_NOT_SUPPORTED;
case EBADF:
case EISDIR: return RSMI_STATUS_FILE_ERROR;
case EINTR: return RSMI_STATUS_INTERRUPT;
case EIO: return RSMI_STATUS_UNEXPECTED_SIZE;
case ENXIO: return RSMI_STATUS_UNEXPECTED_DATA;
case EBUSY: return RSMI_STATUS_BUSY;
default: return RSMI_STATUS_UNKNOWN_ERROR;
}
}
} // namespace smi
} // namespace amd