Initial RDC commit

Includes server, client and example targets.

Change-Id: I30596fb0453af71d49b8390a8468a6d073200836


[ROCm/rdc commit: 5898345d17]
このコミットが含まれているのは:
Chris Freehill
2019-12-15 16:48:58 -06:00
コミット bc7f01e992
31個のファイルの変更5040行の追加1行の削除
実行可能ファイル
+159
ファイルの表示
@@ -0,0 +1,159 @@
# Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.5.0)
# ROCM_DIR should be passed in via command line; these will be used
# in sub-projects
set(RSMI_INC_DIR ${ROCM_DIR}/include)
set(RSMI_LIB_DIR ${ROCM_DIR}/lib)
## Set default module path
if(NOT DEFINED CMAKE_MODULE_PATH)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/")
endif()
## Include common cmake modules
include(utils)
set(RDC "rdc")
set(RDC_TARGET "${RDC}64")
################# Determine the library version #########################
## Setup the package version based on git tags.
set(PKG_VERSION_GIT_TAG_PREFIX "rdc_pkg_ver")
# provide git to utilities
find_program (GIT NAMES git)
get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT)
# VERSION_* variables should be set by get_version_from_tag
message("Package version: ${PKG_VERSION_STR}")
set(${RDC}_VERSION_MAJOR "${VERSION_MAJOR}")
set(${RDC}_VERSION_MINOR "${VERSION_MINOR}")
set(${RDC}_VERSION_PATCH "0")
set(${RDC}_VERSION_BUILD "0")
## Define default variable and variables for the optional build target
## RDC_lib-dev
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "Location of RDC source code.")
set(CMAKE_INSTALL_PREFIX "/"
CACHE STRING "Default installation directory.")
set(CPACK_PACKAGING_INSTALL_PREFIX "/"
CACHE STRING "Default packaging prefix.")
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
project(${RDC_TARGET})
# Create a configure file to get version info from within library
configure_file(
"${PROJECT_SOURCE_DIR}/src/${RDC_TARGET}Config.in"
"${PROJECT_SOURCE_DIR}/include/rdc/${RDC_TARGET}Config.h")
if (NOT DEFINED CPACK_PACKAGE_VENDOR)
set(CPACK_PACKAGE_VENDOR "AMD")
endif()
if (NOT DEFINED CPACK_PACKAGE_CONTACT)
set(CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc.")
endif()
if (NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
"Radeon Data Center Tools")
endif()
if (NOT RDC_PACKAGE)
set(RDC_PACKAGE ${RDC})
endif()
set(CPACK_PACKAGE_FILE_NAME "${RDC_PACKAGE}-${PKG_VERSION_STR}")
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
# Compile .proto files
file(GLOB PROTOB_DEF_SRC_FILES "protos/*.proto")
set(PROTOB_SRC_DIR "${PROJECT_SOURCE_DIR}/protos")
set(PROTOB_OUT_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(PROTOB_CMD "protoc")
foreach(file ${PROTOB_DEF_SRC_FILES})
execute_process(COMMAND
protoc --proto_path=${PROTOB_SRC_DIR} --cpp_out=${PROTOB_OUT_DIR} ${file}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE PROTOB_RESULT
OUTPUT_VARIABLE PROTOB_OUT_VAR)
message("protoc command returned: ${PROTOB_RESULT}")
find_program (GRPC_PLUGIN NAMES grpc_cpp_plugin)
message("GRPC_PLUGIN=${GRPC_PLUGIN})")
message("protoc cmd:")
message(" $ protoc --proto_path=${PROTOB_SRC_DIR}")
message(" --grpc_out=${PROTOB_OUT_DIR}")
message("....--plugin=\"${GRPC_PLUGIN}\" ${file}")
execute_process(COMMAND
protoc --proto_path=${PROTOB_SRC_DIR} --grpc_out=${PROTOB_OUT_DIR}
--plugin=protoc-gen-grpc=${GRPC_PLUGIN} ${file}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
RESULT_VARIABLE PROTOB_RESULT
OUTPUT_VARIABLE PROTOB_OUT_VAR)
message("protoc command returned: ${PROTOB_RESULT}")
endforeach()
add_subdirectory("server")
add_subdirectory("client")
add_subdirectory("tests/example")
# TODO set(CPACK_DEBIAN_PACKAGE_HOMEPAGE <GITHUB URL> )
set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA
"${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;
${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm")
set (CPACK_DEBIAN_PACKAGE_NAME ${RDC_PACKAGE})
set (CPACK_DEBIAN_PACKAGE_VERSION ${PKG_VERSION_STR})
# RPM package specific variables
set(CPACK_RPM_PRE_INSTALL_SCRIPT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post")
set(CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun")
set (CPACK_RPM_PACKAGE_NAME ${RDC_PACKAGE})
set (CPACK_RPM_PACKAGE_VERSION ${PKG_VERSION_STR})
# Generate Doxygen documentation
find_package(Doxygen)
if (DOXYGEN_FOUND)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/docs/rdc_doxygen.cfg
${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
add_custom_target(doc
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating API documentation with Doxygen" VERBATIM)
else()
message("Doxygen is not found. Will not generate documents.")
endif(DOXYGEN_FOUND)
## Add the packaging directives
include (CPack)
+9
ファイルの表示
@@ -0,0 +1,9 @@
Package: rocm_smi_lib
Architecture: amd64
Maintainer: Advanced Micro Devices Inc.
Priority: optional
Version: MODULE_VERSION
Depends:
Homepage: https://github.com/RadeonOpenCompute/rocm_smi_lib
Description: System Management Interface Library for ROCm
実行可能ファイル
+19
ファイルの表示
@@ -0,0 +1,19 @@
#/bin/bash
set -e
do_ldconfig() {
echo /opt/rocm/rocm_smi/lib > /etc/ld.so.conf.d/x86_64-librocm_smi_lib.conf && ldconfig
}
case "$1" in
configure)
do_ldconfig
;;
abort-upgrade|abort-remove|abort-deconfigure)
echo "$1"
;;
*)
exit 0
;;
esac
実行可能ファイル
+19
ファイルの表示
@@ -0,0 +1,19 @@
#!/bin/bash
set -e
rm_ldconfig() {
rm -f /etc/ld.so.conf.d/x86_64-librocm_smi_lib.conf && ldconfig
}
case "$1" in
remove)
rm_ldconfig
;;
purge)
;;
*)
exit 0
;;
esac
+2 -1
ファイルの表示
@@ -1,6 +1,7 @@
MIT License
Copyright (c) 2019 Chris Freehill
Copyright (c) 2019 - Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
実行可能ファイル
+2
ファイルの表示
@@ -0,0 +1,2 @@
echo -e "/opt/rocm/rocm_smi/lib\n/opt/rocm/rocm_smi/lib64" > /etc/ld.so.conf.d/x86_64-librocm_smi_lib.conf && ldconfig
実行可能ファイル
+3
ファイルの表示
@@ -0,0 +1,3 @@
if [ $1 -eq 0 ]; then
rm -f /etc/ld.so.conf.d/x86_64-librocm_smi_lib.conf && ldconfig
fi
実行可能ファイル
+165
ファイルの表示
@@ -0,0 +1,165 @@
# Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.5.0)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Cmake Client Lib ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
## Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -m64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -std=c++11 ")
# Use this instead of above for 32 bit
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif ()
# Required Defines first:
message("")
message("Build Configuration:")
message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
message("--------RSMI Inc Dir: " ${RSMI_INC_DIR})
message("")
## Set default module path if not already set
if(NOT DEFINED CMAKE_MODULE_PATH)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/")
endif()
## Include common cmake modules
# TODO see if we really need this in the end
include(utils)
set(CLIENT_LIB "rdc_client")
set(RDC "rdc")
set(CLIENT_LIB_COMPONENT "lib${CLIENT_LIB}")
set(SRC_DIR "${PROJECT_SOURCE_DIR}/client/src")
set(INC_DIR "${PROJECT_SOURCE_DIR}/client/include/rdc")
################# Determine the library version #########################
## Setup the SO version based on git tags.
set(SO_VERSION_GIT_TAG_PREFIX "rdc_so_ver")
# provide git to utilities
find_program (GIT NAMES git)
# Debian package specific variables
# Set a default value for the package version
get_version_from_tag("1.0.0.0" ${SO_VERSION_GIT_TAG_PREFIX} GIT)
# VERSION_* variables should be set by get_version_from_tag
set(SO_VERSION_STRING "${VERSION_MAJOR}.${VERSION_MINOR}")
message("SOVERSION: ${SO_VERSION_STRING}")
## Define default variable and variables for the optional build target
## RDC_lib-dev
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "Location of RDC client library source code.")
# set(CMAKE_INSTALL_PREFIX "/opt/rocm"
# CACHE STRING "Default installation directory.")
# set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/rocm"
# CACHE STRING "Default packaging prefix.")
set(CPACK_GENERATOR "DEB;RPM" CACHE STRING "Default packaging generators.")
if (NOT DEFINED CPACK_PACKAGE_VENDOR)
set(CPACK_PACKAGE_VENDOR "AMD")
endif()
if (NOT DEFINED CPACK_PACKAGE_CONTACT)
set(CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc.")
endif()
if (NOT DEFINED CPACK_PACKAGE_DESCRIPTION_SUMMARY)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY
"Radeon Data Center Tools")
endif()
if (NOT RDC_PACKAGE)
set(RDC_PACKAGE RDC_lib64)
endif()
set(CPACK_PACKAGE_FILE_NAME "${RDC_PACKAGE}-${PKG_VERSION_STR}")
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
# TODO delete these if not used
file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h")
file(GLOB PROTOBUF_GENERATED_SRCS "${PROTOB_OUT_DIR}/*.cc")
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
"${PROJECT_SOURCE_DIR}"
"${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}")
set(CLIENT_LIB_SRC_LIST "${SRC_DIR}/rdc_client.cc")
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${SRC_DIR}/rdc_main.cc")
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${PROTOBUF_GENERATED_SRCS}")
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST}
"${PROJECT_SOURCE_DIR}/common/rdc_utils.cc")
message("CLIENT_LIB_SRC_LIST=${CLIENT_LIB_SRC_LIST}")
set(CLIENT_LIB_INC_LIST "${INC_DIR}/rdc_client.h")
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} "${INC_DIR}/rdc_exception.h")
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} "${INC_DIR}/rdc_main.h")
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST}
"${PROJECT_SOURCE_DIR}/common/rdc_utils.h")
add_library(${CLIENT_LIB} SHARED ${CLIENT_LIB_SRC_LIST} ${CLIENT_LIB_INC_LIST})
target_link_libraries(${CLIENT_LIB} pthread rt grpc grpc++ grpc++_reflection
dl protobuf)
target_include_directories(${CLIENT_LIB} PUBLIC ${INC_DIR})
# TODO: set the properties for the library once we have one
## Set the VERSION and SOVERSION values
set_property(TARGET ${CLIENT_LIB} PROPERTY
SOVERSION "${VERSION_MAJOR}")
set_property(TARGET ${CLIENT_LIB} PROPERTY
VERSION "${SO_VERSION_STRING}")
## If the library is a release, strip the target library
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
add_custom_command(
TARGET ${CLIENT_LIB}
POST_BUILD COMMAND ${CMAKE_STRIP} lib${CLIENT_LIB}.so)
endif ()
## Add the install directives for the runtime library.
install(TARGETS ${CLIENT_LIB}
LIBRARY DESTINATION opt/rocm/rdc/lib COMPONENT ${CLIENT_LIB_COMPONENT})
install(FILES ${SOURCE_DIR}/client/include/rdc/rdc_client.h
DESTINATION opt/rocm/rdc/include/rdc)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Finished Cmake Client Lib ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
+304
ファイルの表示
@@ -0,0 +1,304 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef CLIENT_INCLUDE_RDC_RDC_CLIENT_H_
#define CLIENT_INCLUDE_RDC_RDC_CLIENT_H_
#include <memory>
#include <string>
#include "rocm_smi/rocm_smi.h"
/**
* @brief Error codes retured by rdc functions
*/
typedef enum {
RDC_STATUS_SUCCESS = 0x0, //!< Operation was successful
RDC_RSMI_STATUS_INVALID_ARGS, //!< Passed in arguments are not valid
RDC_RSMI_STATUS_NOT_SUPPORTED, //!< The requested information or
//!< action is not available for the
//!< given input, on the given system
RDC_RSMI_STATUS_FILE_ERROR, //!< Problem accessing a file. This
//!< may because the operation is not
//!< supported by the Linux kernel
//!< version running on the executing
//!< machine
RDC_RSMI_STATUS_PERMISSION, //!< Permission denied/EACCESS file
//!< error. Many functions require
//!< root access to run.
RDC_RSMI_STATUS_OUT_OF_RESOURCES, //!< Unable to acquire memory or other
//!< resource
RDC_RSMI_STATUS_INTERNAL_EXCEPTION, //!< An internal exception was caught
RDC_RSMI_STATUS_INPUT_OUT_OF_BOUNDS, //!< The provided input is out of
//!< allowable or safe range
RDC_RSMI_STATUS_INIT_ERROR, //!< An error occurred when creating
//!< a communications channel
RDC_RSMI_STATUS_NOT_YET_IMPLEMENTED, //!< The requested function has not
//!< yet been implemented in the
//!< current system for the current
//!< devices
RDC_RSMI_STATUS_NOT_FOUND, //!< An item was searched for but not
//!< found
RDC_RSMI_STATUS_INSUFFICIENT_SIZE, //!< Not enough resources were
//!< available for the operation
RDC_RSMI_STATUS_INTERRUPT, //!< An interrupt occurred during
//!< execution of function
RDC_RSMI_STATUS_UNEXPECTED_SIZE, //!< An unexpected amount of data
//!< was read
RDC_RSMI_STATUS_NO_DATA, //!< No data was found for a given
//!< input
RDC_RSMI_STATUS_UNKNOWN_ERROR, //!< An unknown error occurred
RDC_STATUS_GRPC_ERR_FIRST = 1000,
/// Not an error; returned on success.
RDC_STATUS_GRPC_OK = RDC_STATUS_GRPC_ERR_FIRST,
/// The operation was cancelled (typically by the caller).
RDC_STATUS_GRPC_CANCELLED,
/// Unknown error. An example of where this error may be returned is if a
/// Status value received from another address space belongs to an error-space
/// that is not known in this address space. Also errors raised by APIs that
/// do not return enough error information may be converted to this error.
RDC_STATUS_GRPC_UNKNOWN,
/// Client specified an invalid argument. Note that this differs from
/// FAILED_PRECONDITION. INVALID_ARGUMENT indicates arguments that are
/// problematic regardless of the state of the system (e.g., a malformed file
/// name).
RDC_STATUS_GRPC_INVALID_ARG,
/// Deadline expired before operation could complete. For operations that
/// change the state of the system, this error may be returned even if the
/// operation has completed successfully. For example, a successful response
/// from a server could have been delayed long enough for the deadline to
/// expire.
RDC_STATUS_GRPC_DEADLINE_EXCEEDED,
/// Some requested entity (e.g., file or directory) was not found.
RDC_STATUS_GRPC_NOT_FOUND,
/// Some entity that we attempted to create (e.g., file or directory) already
/// exists.
RDC_STATUS_GRPC_ALREADY_EXISTS,
/// The caller does not have permission to execute the specified operation.
/// PERMISSION_DENIED must not be used for rejections caused by exhausting
/// some resource (use RESOURCE_EXHAUSTED instead for those errors).
/// PERMISSION_DENIED must not be used if the caller can not be identified
/// (use UNAUTHENTICATED instead for those errors).
RDC_STATUS_GRPC_PERM_DENIED,
/// The request does not have valid authentication credentials for the
/// operation.
RDC_STATUS_GRPC_UNAUTHENTICATED,
/// Some resource has been exhausted, perhaps a per-user quota, or perhaps the
/// entire file system is out of space.
RDC_STATUS_GRPC_RESOURCE_EXHAUSTED,
/// Operation was rejected because the system is not in a state required for
/// the operation's execution. For example, directory to be deleted may be
/// non-empty, an rmdir operation is applied to a non-directory, etc.
///
/// A litmus test that may help a service implementor in deciding
/// between FAILED_PRECONDITION, ABORTED, and UNAVAILABLE:
/// (a) Use UNAVAILABLE if the client can retry just the failing call.
/// (b) Use ABORTED if the client should retry at a higher-level
/// (e.g., restarting a read-modify-write sequence).
/// (c) Use FAILED_PRECONDITION if the client should not retry until
/// the system state has been explicitly fixed. E.g., if an "rmdir"
/// fails because the directory is non-empty, FAILED_PRECONDITION
/// should be returned since the client should not retry unless
/// they have first fixed up the directory by deleting files from it.
/// (d) Use FAILED_PRECONDITION if the client performs conditional
/// REST Get/Update/Delete on a resource and the resource on the
/// server does not match the condition. E.g., conflicting
/// read-modify-write on the same resource.
RDC_STATUS_GRPC_FAILED_PRECOND,
/// The operation was aborted, typically due to a concurrency issue like
/// sequencer check failures, transaction aborts, etc.
///
/// See litmus test above for deciding between FAILED_PRECONDITION, ABORTED,
/// and UNAVAILABLE.
RDC_STATUS_GRPC_ABORTED,
/// Operation was attempted past the valid range. E.g., seeking or reading
/// past end of file.
///
/// Unlike INVALID_ARGUMENT, this error indicates a problem that may be fixed
/// if the system state changes. For example, a 32-bit file system will
/// generate INVALID_ARGUMENT if asked to read at an offset that is not in the
/// range [0,2^32-1], but it will generate OUT_OF_RANGE if asked to read from
/// an offset past the current file size.
///
/// There is a fair bit of overlap between FAILED_PRECONDITION and
/// OUT_OF_RANGE. We recommend using OUT_OF_RANGE (the more specific error)
/// when it applies so that callers who are iterating through a space can
/// easily look for an OUT_OF_RANGE error to detect when they are done.
RDC_STATUS_GRPC_OUT_OF_RANGE,
/// Operation is not implemented or not supported/enabled in this service.
RDC_STATUS_GRPC_UNIMPLEMENTED,
/// Internal errors. Means some invariants expected by underlying System has
/// been broken. If you see one of these errors, Something is very broken.
RDC_STATUS_GRPC_INTERNAL,
/// The service is currently unavailable. This is a most likely a transient
/// condition and may be corrected by retrying with a backoff.
///
/// \warning Although data MIGHT not have been transmitted when this
/// status occurs, there is NOT A GUARANTEE that the server has not seen
/// anything. So in general it is unsafe to retry on this status code
/// if the call is non-idempotent.
///
/// See litmus test above for deciding between FAILED_PRECONDITION, ABORTED,
/// and UNAVAILABLE.
RDC_STATUS_GRPC_UNAVAILABLE,
/// Unrecoverable data loss or corruption.
RDC_STATUS_GRPC_DATA_LOSS,
RDC_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rdc_status_t;
/**
* @brief Handle to RDC server channel
*/
typedef uintptr_t rdc_channel_t;
#define RDC_DEFAULT_SERVER_PORT 50051
#define RDC_DEFAULT_SERVER_IP "localhost"
/*****************************************************************************/
/** @defgroup InitShutAdmin Initialization and Shutdown
* These functions are used for initialization of RDC and clean up when
* done.
* @{
*/
/**
* @brief Create a communications channel to an RDC server
*
* @details Given a pointer to an ::rdc_channel_t @p channel, a string
* containing the ip address of the server @p ip, a string containing
* the port number on which the server is listening @p port and a bool
* indicating whether the channel should use a secure link @p secure,
* this function will attempt to create a new channel and write its
* location to address pointed to by @p channel.
*
* @p channel[inout] A pointer to caller provided memory to which an
* ::rdc_channel_t will be written
*
* @param[in] ip A pointer to a string containing the address of the server
*
* @param[in] port A pointer to string containing the port on which the
* RDC server is listening
*
* @param[in] secure A bool indicating whether SSL should be used for
* communications (not currently supported)
*
* @retval ::RDC_STATUS_SUCCESS is returned upon successful call.
*
*/
rdc_status_t
rdc_channel_create(rdc_channel_t *channel, const char *ip, const char *port,
bool secure);
/**
* @brief Destroy a communications channel to an RDC server
*
* @details Given an ::rdc_channel_t @p channel, this function will free any
* resources used by @p channel
*
* @p channel[inout] An ::rdc_channel_t will be freed
*
* @retval ::RDC_STATUS_SUCCESS is returned upon successful call.
*
*/
rdc_status_t
rdc_channel_destroy(rdc_channel_t channel);
/** @} */ // end of InitShutAdmin
/*****************************************************************************/
/** @defgroup RSMIAccess Remote ROCm SMI Calls
* These functions calls make ROCm SMI function calls on the remote server.
* Please refer to the
* [ROCm SMI documentation]
* (https://github.com/RadeonOpenCompute/rocm_smi_lib/tree/master/docs) for
* information about the calls. Here, we will document any additional aspects
* of the calls introduced by RDC that are not covered in the ROCm SMI
* documentation.
*
* All of the functions in this section attempt to make an RSMI call on the
* server machine, given an ::rdc_channel_t associated with the server, and
* all the arguments that are required to make the RSMI call.
* @{
*/
/**
* @brief Remote call to rsmi_num_monitor_devices()
*
*/
rdc_status_t
rdc_num_gpus_get(rdc_channel_t channel, uint64_t *num_gpu);
/** @} */ // end of RSMIAccess
/** @defgroup PhysQuer Physical State Queries
* These functions provide information about the physical characteristics of
* the device.
* @{
*/
/**
* @brief Remote call to rsmi_dev_temp_metric_get()
*
*/
rdc_status_t
rdc_dev_temp_metric_get(rdc_channel_t channel, uint32_t dv_ind,
uint32_t sensor_type, rsmi_temperature_metric_t metric,
int64_t *temperature);
/** @} */ // end of PhysQuer
/**
* @brief Get a description of a provided RDC error status
*
* @details Set the provided pointer to a const char *, @p status_string, to
* a string containing a description of the provided error code @p status.
*
* @param[in] status The error status for which a description is desired
*
* @param[inout] status_string A pointer to a const char * which will be made
* to point to a description of the provided error code
*
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call
*
*/
rdc_status_t
rdc_status_string(rdc_status_t status, const char **status_string);
#endif // CLIENT_INCLUDE_RDC_RDC_CLIENT_H_
+51
ファイルの表示
@@ -0,0 +1,51 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef CLIENT_INCLUDE_RDC_RDC_EXCEPTION_H_
#define CLIENT_INCLUDE_RDC_RDC_EXCEPTION_H_
#include <exception>
#include <string>
#include "rdc/rdc_client.h"
namespace amd {
namespace rdc {
/// @brief Exception type which carries an error code to return to the user.
class rdc_exception : public std::exception {
public:
rdc_exception(rdc_status_t error, const std::string description) :
err_(error), desc_(description) {}
rdc_status_t error_code() const noexcept { return err_; }
const char* what() const noexcept override { return desc_.c_str(); }
private:
rdc_status_t err_;
std::string desc_;
};
} // namespace rdc
} // namespace amd
#endif // CLIENT_INCLUDE_RDC_RDC_EXCEPTION_H_
+62
ファイルの表示
@@ -0,0 +1,62 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef CLIENT_INCLUDE_RDC_RDC_MAIN_H_
#define CLIENT_INCLUDE_RDC_RDC_MAIN_H_
#include <string>
#include <memory>
#include "rdc.grpc.pb.h" // NOLINT
#include "rdc/rdc_client.h"
namespace amd {
namespace rdc {
class RDCChannel {
public:
explicit RDCChannel(std::string server_ip, std::string server_port,
bool secure_channel);
~RDCChannel();
rdc_status_t Initialize(void);
// Getters and Setters
// Don't have setter for server ip and ports; we don't want to change those
// after construction
std::string server_ip(void) const {return server_ip_;}
std::string server_port(void) const {return server_port_;}
bool secure_channel(void) const {return secure_channel_;}
std::shared_ptr<::rdc::Rsmi::Stub> stub(void) const {return stub_;}
private:
std::string server_ip_;
std::string server_port_;
bool secure_channel_;
std::shared_ptr<::rdc::Rsmi::Stub> stub_;
};
} // namespace rdc
} // namespace amd
#endif // CLIENT_INCLUDE_RDC_RDC_MAIN_H_
+440
ファイルの表示
@@ -0,0 +1,440 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <grpcpp/grpcpp.h>
#include <unistd.h>
#include <iostream>
#include "rdc/rdc_main.h"
#include "rdc/rdc_client.h"
#include "common/rdc_utils.h"
#include "rdc/rdc_exception.h"
#include "rdc.grpc.pb.h" // NOLINT
#include "rocm_smi/rocm_smi.h"
#define CHK_PTR_ARG(PTR) \
if ((PTR) == nullptr) { \
return RDC_RSMI_STATUS_INVALID_ARGS; \
}
#define UINTPTR_TO_RDC_CHAN(UPTR) \
amd::rdc::RDCChannel *ch = reinterpret_cast<amd::rdc::RDCChannel *>(UPTR); \
if (ch == nullptr) { \
return RDC_STATUS_GRPC_INVALID_ARG; \
} \
static rdc_status_t handleException() {
try {
throw;
} catch (const std::bad_alloc& e) {
debug_print("RDC exception: BadAlloc\n");
return RDC_RSMI_STATUS_OUT_OF_RESOURCES;
} catch (const amd::rdc::rdc_exception& e) {
debug_print("Exception caught: %s.\n", e.what());
return e.error_code();
return RDC_RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (const std::exception& e) {
debug_print("Unhandled exception: %s\n", e.what());
assert(false && "Unhandled exception.");
return RDC_RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (const std::nested_exception& e) {
debug_print("Callback threw, forwarding.\n");
e.rethrow_nested();
return RDC_RSMI_STATUS_INTERNAL_EXCEPTION;
} catch (...) {
assert(false && "Unhandled exception.");
abort();
return RDC_RSMI_STATUS_INTERNAL_EXCEPTION;
}
}
#define TRY try {
#define CATCH } catch (...) {return handleException();}
rdc_status_t
rdc_channel_create(rdc_channel_t *channel, const char *ip,
const char *port, bool secure) {
TRY
std::string server_str;
std::string port_str;
if (channel == nullptr) {
return RDC_STATUS_GRPC_INVALID_ARG;
}
if (ip != nullptr) {
server_str = ip;
} else {
server_str = RDC_DEFAULT_SERVER_IP;
}
if (port != nullptr) {
port_str = port;
} else {
port_str = std::to_string(RDC_DEFAULT_SERVER_PORT);
}
amd::rdc::RDCChannel *ch =
new amd::rdc::RDCChannel(server_str, port_str, secure);
if (ch == nullptr) {
return RDC_STATUS_GRPC_RESOURCE_EXHAUSTED;
}
rdc_status_t ret = ch->Initialize();
if (ret != 0) {
delete ch;
return ret;
}
*channel = reinterpret_cast<rdc_channel_t>(ch);
return RDC_STATUS_SUCCESS;
CATCH
}
rdc_status_t
rdc_channel_destroy(rdc_channel_t channel) {
TRY
UINTPTR_TO_RDC_CHAN(channel)
delete ch;
return RDC_STATUS_SUCCESS;
CATCH
}
rdc_status_t
rdc_num_gpus_get(rdc_channel_t channel, uint64_t *num_gpu) {
TRY
CHK_PTR_ARG(num_gpu)
UINTPTR_TO_RDC_CHAN(channel)
::rdc::GetNumDevicesResponse resp;
::rdc::GetNumDevicesRequest empty;
::grpc::ClientContext context;
::grpc::Status status = ch->stub()->GetNumDevices(&context, empty, &resp);
if (!status.ok()) {
return amd::rdc::GrpcErrorToRdcError(status.error_code());
}
*num_gpu = resp.val();
return static_cast<rdc_status_t>(resp.ret_val());
CATCH
}
// rsmi and rdc currently happen to have a 1-to-1 mapping, but
// have this function in case that changes
static ::rdc::GetTemperatureRequest_TemperatureMetric
rsmi_temp2rdc_temp(rsmi_temperature_metric_t rsmi_temp) {
return
static_cast<::rdc::GetTemperatureRequest_TemperatureMetric>(rsmi_temp);
}
rdc_status_t
rdc_dev_temp_metric_get(rdc_channel_t channel, uint32_t dv_ind,
uint32_t sensor_type, rsmi_temperature_metric_t metric,
int64_t *temperature) {
TRY
CHK_PTR_ARG(temperature)
UINTPTR_TO_RDC_CHAN(channel)
::rdc::GetTemperatureResponse resp;
::rdc::GetTemperatureRequest in_args;
::grpc::ClientContext context;
in_args.set_metric(rsmi_temp2rdc_temp(metric));
in_args.set_dv_ind(dv_ind);
in_args.set_sensor_type(sensor_type);
::grpc::Status status = ch->stub()->GetTemperature(&context, in_args, &resp);
if (!status.ok()) {
return ::amd::rdc::GrpcErrorToRdcError(status.error_code());
}
*temperature = resp.temperature();
return static_cast<rdc_status_t>(resp.ret_val());
CATCH
}
rdc_status_t
rdc_status_string(rdc_status_t status, const char **status_string) {
TRY
if (status_string == nullptr) {
return RDC_RSMI_STATUS_INVALID_ARGS;
}
const size_t status_u = static_cast<size_t>(status);
switch (status_u) {
case RDC_STATUS_SUCCESS:
*status_string = "RDC_STATUS_SUCCESS: The function has been executed"
" successfully.";
break;
case RDC_RSMI_STATUS_INVALID_ARGS:
*status_string =
"RDC_RSMI_STATUS_INVALID_ARGS: The provided arguments do not"
" meet the preconditions required for calling this function.";
break;
case RDC_RSMI_STATUS_NOT_SUPPORTED:
*status_string = "RDC_RSMI_STATUS_NOT_SUPPORTED: This function is not"
" supported in the current environment.";
break;
case RDC_RSMI_STATUS_FILE_ERROR:
*status_string =
"RDC_RSMI_STATUS_FILE_ERROR: There was an error in finding or"
" opening a file or directory. The operation may not be supported by "
"this Linux kernel version.";
break;
case RDC_RSMI_STATUS_PERMISSION:
*status_string = "RDC_RSMI_STATUS_PERMISSION: The user ID of the calling"
" process does not have sufficient permission to execute a command."
" Often this is fixed by running as root (sudo).";
break;
case RDC_RSMI_STATUS_OUT_OF_RESOURCES:
*status_string = "RDC_RSMI_STATUS_OUT_OF_RESOURCES: Unable to acquire "
"memory or other resource";
break;
case RDC_RSMI_STATUS_INTERNAL_EXCEPTION:
*status_string = "RDC_RSMI_STATUS_INTERNAL_EXCEPTION: An internal "
"exception was caught";
break;
case RDC_RSMI_STATUS_INPUT_OUT_OF_BOUNDS:
*status_string = "RDC_RSMI_STATUS_INPUT_OUT_OF_BOUNDS: The provided "
"input is out of allowable or safe range";
break;
case RDC_RSMI_STATUS_INIT_ERROR:
*status_string = "RDC_RSMI_STATUS_INIT_ERROR: An error occurred during "
"initialization, during "
"monitor discovery or when when initializing internal data structures";
break;
case RDC_RSMI_STATUS_NOT_YET_IMPLEMENTED:
*status_string = "RDC_RSMI_STATUS_NOT_YET_IMPLEMENTED: The called "
"function has not been implemented in this "
"system for this device type";
break;
case RDC_RSMI_STATUS_NOT_FOUND:
*status_string = "RDC_RSMI_STATUS_NOT_FOUND: An item required to "
"complete the call was not found";
break;
case RDC_RSMI_STATUS_INSUFFICIENT_SIZE:
*status_string = "RDC_RSMI_STATUS_INSUFFICIENT_SIZE: Not enough "
"resources were available to fully execute"
" the call";
break;
case RDC_RSMI_STATUS_UNKNOWN_ERROR:
*status_string = "An unknown error prevented the call from completing"
" successfully";
break;
case RDC_RSMI_STATUS_INTERRUPT:
*status_string = "RDC_RSMI_STATUS_INTERRUPT An interrupt occurred while "
"executing the function";
break;
case RDC_STATUS_GRPC_CANCELLED:
*status_string =
"RDC_STATUS_GRPC_CANCELLED The operation was cancelled (typically by "
"the caller).";
break;
case RDC_STATUS_GRPC_UNKNOWN:
*status_string =
"RDC_STATUS_GRPC_UNKNOWN Unknown error. An example of where this error"
" may be returned is if a"
"Status value received from another address space belongs to an error-"
"space that is not known in this address space. Also errors raised by "
"APIs that do not return enough error information may be converted to "
"this error.";
break;
case RDC_STATUS_GRPC_INVALID_ARG:
*status_string =
"RDC_STATUS_GRPC_INVALID_ARG Client specified an invalid argument. "
"Note that this differs from"
"FAILED_PRECONDITION. INVALID_ARGUMENT indicates arguments that are "
"problematic regardless of the state of the system (e.g., a malformed "
"file name).";
break;
case RDC_STATUS_GRPC_DEADLINE_EXCEEDED:
*status_string =
"RDC_STATUS_GRPC_DEADLINE_EXCEEDED Deadline expired before operation "
"could complete. For operations that"
"change the state of the system, this error may be returned even if "
"the operation has completed successfully. For example, a successful "
"response from a server could have been delayed long enough for the "
"deadline to expire.";
break;
case RDC_STATUS_GRPC_NOT_FOUND:
*status_string =
"RDC_STATUS_GRPC_NOT_FOUND Some requested entity (e.g., file or "
"directory) was not found.";
break;
case RDC_STATUS_GRPC_ALREADY_EXISTS:
*status_string =
"RDC_STATUS_GRPC_ALREADY_EXISTS Some entity that we attempted to create "
"(e.g., file or directory) already exists.";
break;
case RDC_STATUS_GRPC_PERM_DENIED:
*status_string =
"RDC_STATUS_GRPC_PERM_DENIED The caller does not have permission to "
"execute the specified operation."
"PERMISSION_DENIED must not be used for rejections caused by "
"exhausting some resource (use RESOURCE_EXHAUSTED instead for those "
"errors). PERMISSION_DENIED must not be used if the caller can not "
" be identified (use UNAUTHENTICATED instead for those errors).";
break;
case RDC_STATUS_GRPC_UNAUTHENTICATED:
*status_string =
"RDC_STATUS_GRPC_UNAUTHENTICATED The request does not have valid "
"authentication credentials for the operation.";
break;
case RDC_STATUS_GRPC_RESOURCE_EXHAUSTED:
*status_string =
"RDC_STATUS_GRPC_RESOURCE_EXHAUSTED Some resource has been exhausted, "
"perhaps a per-user quota, or perhaps the "
"entire file system is out of space.";
break;
case RDC_STATUS_GRPC_FAILED_PRECOND:
*status_string =
"RDC_STATUS_GRPC_FAILED_PRECOND Operation was rejected because the "
"system is not in a state required for "
"the operation's execution. For example, directory to be deleted may "
"be non-empty, an rmdir operation is applied to a non-directory, etc.\n"
"A litmus test that may help a service implementor in deciding "
"between FAILED_PRECONDITION, ABORTED, and UNAVAILABLE:\n"
" (a) Use UNAVAILABLE if the client can retry just the failing call.\n"
" (b) Use ABORTED if the client should retry at a higher-level "
" (e.g., restarting a read-modify-write sequence).\n"
" (c) Use FAILED_PRECONDITION if the client should not retry until"
" the system state has been explicitly fixed. E.g., if an \"rmdir\""
" fails because the directory is non-empty, FAILED_PRECONDITION"
" should be returned since the client should not retry unless"
" they have first fixed up the directory by deleting files from it.\n"
" (d) Use FAILED_PRECONDITION if the client performs conditional"
" REST Get/Update/Delete on a resource and the resource on the"
" server does not match the condition. E.g., conflicting"
" read-modify-write on the same resource.";
break;
case RDC_STATUS_GRPC_ABORTED:
*status_string =
"RDC_STATUS_GRPC_ABORTED The operation was aborted, "
"typically due to a concurrency issue like "
"sequencer check failures, transaction aborts, etc.\n"
"See litmus test above for deciding between "
"FAILED_PRECONDITION, ABORTED, "
"and UNAVAILABLE.";
break;
case RDC_STATUS_GRPC_OUT_OF_RANGE:
*status_string =
"RDC_STATUS_GRPC_OUT_OF_RANGE Operation was attempted "
"past the valid range. E.g., seeking or reading "
"past end of file.\n"
"Unlike INVALID_ARGUMENT, this error indicates a "
"problem that may be fixed "
"if the system state changes. For example, a 32-bit file system will "
"generate INVALID_ARGUMENT if asked to read "
"at an offset that is not in the "
"range [0,2^32-1], but it will generate "
"OUT_OF_RANGE if asked to read from "
"an offset past the current file size.\n"
"There is a fair bit of overlap between FAILED_PRECONDITION and "
"OUT_OF_RANGE. We recommend using OUT_OF_RANGE "
"(the more specific error) "
"when it applies so that callers who are "
"iterating through a space can "
"easily look for an OUT_OF_RANGE error to detect when they are done.";
break;
case RDC_STATUS_GRPC_UNIMPLEMENTED:
*status_string =
"RDC_STATUS_GRPC_UNIMPLEMENTED Operation is not "
"implemented or not supported/enabled in this service.";
break;
case RDC_STATUS_GRPC_INTERNAL:
*status_string =
"RDC_STATUS_GRPC_INTERNAL Internal errors. This means "
"some invariants expected by underlying System has "
"been broken. If you see one of these errors.";
break;
case RDC_STATUS_GRPC_UNAVAILABLE:
*status_string =
"RDC_STATUS_GRPC_UNAVAILABLE The service is currently unavailable. "
"This is a most likely a transient "
"condition and may be corrected by retrying with a backoff.\n"
"Warning: Although data MIGHT not have been transmitted when this "
"status occurs, there is NOT A GUARANTEE that the server has not seen "
"anything. So in general it is unsafe to retry on this status code "
"if the call is non-idempotent. "
"See litmus test above for deciding between "
"FAILED_PRECONDITION, ABORTED,"
"and UNAVAILABLE.";
break;
case RDC_STATUS_GRPC_DATA_LOSS:
*status_string =
"RDC_STATUS_GRPC_DATA_LOSS Unrecoverable data loss or corruption.";
break;
case RDC_STATUS_UNKNOWN_ERROR:
*status_string =
"RDC_STATUS_UNKNOWN_ERROR An unknown RDC error occurred.";
break;
default:
*status_string = "RDC_RSMI_STATUS_UNKNOWN_ERROR An "
"unknown error occurred";
return RDC_RSMI_STATUS_UNKNOWN_ERROR;
}
return RDC_STATUS_SUCCESS;
CATCH
}
実行可能ファイル
+70
ファイルの表示
@@ -0,0 +1,70 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <assert.h>
#include <grpcpp/grpcpp.h>
#include <string>
#include "rdc.grpc.pb.h" // NOLINT
#include "rdc/rdc_main.h"
#include "rdc/rdc_client.h"
namespace amd {
namespace rdc {
RDCChannel::RDCChannel(std::string server_ip, std::string server_port,
bool secure) : server_ip_(server_ip), server_port_(server_port),
secure_channel_(secure) {}
RDCChannel::~RDCChannel() {
}
rdc_status_t
RDCChannel::Initialize(void) {
assert(!server_port_.empty());
assert(!server_ip_.empty());
std::string addr_str = server_ip() + ":";
addr_str += server_port();
std::shared_ptr<grpc::Channel> channel;
if (secure_channel_) {
// Not yet supported
return RDC_STATUS_GRPC_UNIMPLEMENTED;
} else {
channel = ::grpc::CreateChannel(addr_str,
grpc::InsecureChannelCredentials());
}
stub_ = ::rdc::Rsmi::NewStub(channel);
if (stub_ == nullptr) {
return RDC_STATUS_GRPC_RESOURCE_EXHAUSTED;
}
return RDC_STATUS_SUCCESS;
}
} // namespace rdc
} // namespace amd
+162
ファイルの表示
@@ -0,0 +1,162 @@
################################################################################
##
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
## AMD Research and AMD HSA Software Development
##
## Advanced Micro Devices, Inc.
##
## www.amd.com
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal with the Software without restriction, including without limitation
## the rights to use, copy, modify, merge, publish, distribute, sublicense,
## and#or sell copies of the Software, and to permit persons to whom the
## Software is furnished to do so, subject to the following conditions:
##
## - Redistributions of source code must retain the above copyright notice,
## this list of conditions and the following disclaimers.
## - Redistributions in binary form must reproduce the above copyright
## notice, this list of conditions and the following disclaimers in
## the documentation and#or other materials provided with the distribution.
## - Neither the names of Advanced Micro Devices, Inc,
## nor the names of its contributors may be used to endorse or promote
## products derived from this Software without specific prior written
## permission.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
## DEALINGS WITH THE SOFTWARE.
##
################################################################################
## Parses the VERSION_STRING variable and places
## the first, second and third number values in
## the major, minor and patch variables.
function( parse_version VERSION_STRING )
string ( FIND ${VERSION_STRING} "-" STRING_INDEX )
if ( ${STRING_INDEX} GREATER -1 )
math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" )
string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD )
endif ()
string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} )
list ( LENGTH VERSIONS VERSION_COUNT )
if ( ${VERSION_COUNT} GREATER 0)
list ( GET VERSIONS 0 MAJOR )
set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${MAJOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 1 )
list ( GET VERSIONS 1 MINOR )
set ( VERSION_MINOR ${MINOR} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" )
endif ()
if ( ${VERSION_COUNT} GREATER 2 )
list ( GET VERSIONS 2 PATCH )
set ( VERSION_PATCH ${PATCH} PARENT_SCOPE )
set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" )
endif ()
set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE )
endfunction ()
## Gets the current version of the repository
## using versioning tags and git describe.
## Passes back a packaging version string
## and a library version string.
function(get_version_from_tag DEFAULT_VERSION_STRING VERSION_PREFIX GIT)
parse_version ( ${DEFAULT_VERSION_STRING} )
if ( GIT )
execute_process ( COMMAND git describe --tags --dirty --long --match ${VERSION_PREFIX}-[0-9.]*
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_STRING
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT )
if ( ${RESULT} EQUAL 0 )
parse_version ( ${GIT_TAG_STRING} )
endif ()
endif ()
set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE )
set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE )
set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE )
endfunction()
function(num_change_since_prev_pkg VERSION_PREFIX)
find_program(get_commits NAMES version_util.sh
PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules)
if (get_commits)
execute_process( COMMAND ${get_commits} -c ${VERSION_PREFIX}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE NUM_COMMITS
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT )
set(NUM_COMMITS "${NUM_COMMITS}" PARENT_SCOPE )
if ( ${RESULT} EQUAL 0 )
message("${NUM_COMMITS} were found since previous release")
else()
message("Unable to determine number of commits since previous release")
endif()
else()
message("WARNING: Didn't find version_util.sh")
set(NUM_COMMITS "unknown" PARENT_SCOPE )
endif()
endfunction()
function(get_package_version_number DEFAULT_VERSION_STRING VERSION_PREFIX GIT)
get_version_from_tag(${DEFAULT_VERSION_STRING} ${VERSION_PREFIX} GIT)
num_change_since_prev_pkg(${VERSION_PREFIX})
set(PKG_VERSION_STR "${VERSION_STRING}.${NUM_COMMITS}")
if (DEFINED ENV{ROCM_BUILD_ID})
set(VERSION_ID $ENV{ROCM_BUILD_ID})
else()
set(VERSION_ID "local-build-0")
endif()
set(PKG_VERSION_STR "${PKG_VERSION_STR}.${VERSION_ID}")
if (GIT)
execute_process(COMMAND git rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE VERSION_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE RESULT )
if( ${RESULT} EQUAL 0 )
# Check for dirty workspace.
execute_process(COMMAND git diff --quiet
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE RESULT )
if(${RESULT} EQUAL 1)
set(VERSION_HASH "${VERSION_HASH}-dirty")
endif()
else()
set( VERSION_HASH "unknown" )
endif()
else()
set( VERSION_HASH "unknown" )
endif()
set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_HASH}")
set(PKG_VERSION_STR ${PKG_VERSION_STR} PARENT_SCOPE)
endfunction()
+43
ファイルの表示
@@ -0,0 +1,43 @@
#!/bin/bash
# Handle commandline args
while [ "$1" != "" ]; do
case $1 in
-c ) # Commits since prevous tag
TARGET="count" ;;
* )
TARGET="count"
break ;;
esac
shift 1
done
TAG_PREFIX=$1
reg_ex="${TAG_PREFIX}*"
commits_since_last_tag() {
TAG_ARR=(`git tag --sort=committerdate -l ${reg_ex} | tail -2`)
# if we don't have 2 tags, just say there were 0 commits since
# last tag
if [ ${#TAG_ARR[@]} != 2 ]; then
echo 0
exit 0
fi
PREVIOUS_TAG=${TAG_ARR[0]}
CURRENT_TAG=${TAG_ARR[1]}
PREV_CMT_NUM=`git rev-list --count $PREVIOUS_TAG`
CURR_CMT_NUM=`git rev-list --count $CURRENT_TAG`
# Commits since prevous tag:
let NUM_COMMITS="${CURR_CMT_NUM}-${PREV_CMT_NUM}"
echo $NUM_COMMITS
}
case $TARGET in
count) commits_since_last_tag ;;
*) die "Invalid target $target" ;;
esac
exit 0
実行可能ファイル
+42
ファイルの表示
@@ -0,0 +1,42 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include "rdc/rdc_client.h"
#include "rdc.grpc.pb.h" // NOLINT
#include "common/rdc_utils.h"
namespace amd {
namespace rdc {
rdc_status_t GrpcErrorToRdcError(grpc::StatusCode grpc_err) {
uint32_t grpc_err_int = static_cast<uint32_t>(grpc_err);
uint32_t rdc_grpc_base_int =
static_cast<uint32_t>(RDC_STATUS_GRPC_ERR_FIRST);
uint32_t rdc_err_int = grpc_err_int + rdc_grpc_base_int;
return static_cast<rdc_status_t>(rdc_err_int);
}
} // namespace rdc
} // namespace amd
実行可能ファイル
+47
ファイルの表示
@@ -0,0 +1,47 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef COMMON_RDC_UTILS_H_
#define COMMON_RDC_UTILS_H_
namespace amd {
namespace rdc {
#ifdef NDEBUG
#define debug_print(fmt, ...) \
do { \
} while (false)
#else
#define debug_print(fmt, ...) \
do { \
fprintf(stderr, fmt, ##__VA_ARGS__); \
} while (false)
#endif
rdc_status_t GrpcErrorToRdcError(::grpc::StatusCode grpc_err);
} // namespace rdc
} // namespace amd
#endif // COMMON_RDC_UTILS_H_
バイナリ
ファイルの表示
バイナリファイルは表示されません。
実行可能ファイル
+61
ファイルの表示
@@ -0,0 +1,61 @@
# Radeon Data Center Tools
TODO: Add general description of RDC, link to github site
# Important note about Versioning and Backward Compatibility
RDC library is currently under development, and therefore subject to change either at the ABI or API level. The intention is to keep the API as stable as possible even while in development, but in some cases we may need to break backwards compatibility in order to ensure future stability and usability. Following [Semantic Versioning](https://semver.org/) rules, while the ROCm SMI library is in high state of change, the major version will remain 0, and backward compatibility is not ensured.
Once new development has leveled off, the major version will become greater than 0, and backward compatibility will be enforced between major versions.
# Building RDC
#### Additional Required software for building
In order to build the RDC software, the following components are required. Note that the software versions listed are what was used in development. Earlier versions are not guaranteed to work:
TODO: see if protoc and c++ plugin can be installed from packages rather than built from
source. This will be necessary for CI integration.
* CMake (v3.5.0)
* g++ (5.4.0)
* ROCm
* [ROCm SMI Library](https://github.com/RadeonOpenCompute/rocm_smi_lib)
* [gRPC and protoc](https://github.com/grpc/grpc/blob/master/src/cpp/README.md#make)
In order to build the latest documentation, the following are required:
* DOxygen (1.8.11)
* latex (pdfTeX 3.14159265-2.6-1.40.16)
The source code for RDC is available on [ADD RDC GITHUB URL HERE]().
After the RDC library git repository has been cloned to a local Linux machine, building is achieved by following the typical CMake build sequence. Specifically,
##### ```$ mk -p build```
##### ```$ cd build```
##### ```$ cmake -DROCM_DIR=<location of ROCm root, including ROCm SMI> <location of root of ROCm SMI library CMakeLists.txt>```
##### ```$ make```
##### ```# Install library file and header; default location is /opt/rocm```
##### ```$ make install```
The built library will appear in the `build` folder.
#### Building the Documentation
The documentation PDF file can be built with the following steps (continued from the steps above):
##### ```$ make doc```
##### ```$ cd latex```
##### ```$ make```
The reference manual, `refman.pdf` will be in the `latex` directory upon a successful build.
#### Building the Tests UPDATE FOR RDC
In order to verify the build and capability of ROCm SMI on your system and to see an example of how ROCm SMI can be used, you may build and run the tests that are available in the repo. To build the tests, follow these steps:
##### ```$ cd <ROCM SMI source root>```
##### ```$ mkdir lib```
##### ```$ cd lib```
##### ```$ cmake -DROCM_DIR=<ROCM SMI source root> <ROCm SMI source root>/tests/rocm_smi_test```
##### ```$ make
TODO: THE REMAINDER NEEDS TO BE TAILORED FOR RDC
To run the test, execute the program `rsmitst` that is built from the steps above.
# Usage Basics
# Hello RDC
ファイル差分が大きすぎるため省略します 差分を読み込み
実行可能ファイル
+85
ファイルの表示
@@ -0,0 +1,85 @@
// Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
syntax = "proto3";
// option java_multiple_files = true;
// option java_package = "io.grpc.examples.helloworld";
// option java_outer_classname = "HelloWorldProto";
// option objc_class_prefix = "HLW";
package rdc;
// rsmi_num_monitor_devices()
message GetNumDevicesRequest {
}
message GetNumDevicesResponse {
uint64 val = 1;
uint64 ret_val = 2;
}
/* GetNumDevices */
message VerifyConnectionRequest {
string name = 1;
}
message VerifyConnectionResponse {
string message = 1;
}
/* GetTemperature */
message GetTemperatureRequest {
uint32 dv_ind = 1;
uint32 sensor_type = 2;
enum TemperatureMetric {
RSMI_TEMP_CURRENT = 0;
RSMI_TEMP_MAX = 1;
RSMI_TEMP_MIN = 2;
RSMI_TEMP_MAX_HYST = 3;
RSMI_TEMP_MIN_HYST = 4;
RSMI_TEMP_CRITICAL = 5;
RSMI_TEMP_CRITICAL_HYST = 6;
RSMI_TEMP_EMERGENCY = 7;
RSMI_TEMP_EMERGENCY_HYST = 8;
RSMI_TEMP_CRIT_MIN = 9;
RSMI_TEMP_CRIT_MIN_HYST = 10;
RSMI_TEMP_OFFSET = 11;
RSMI_TEMP_LOWEST = 12;
RSMI_TEMP_HIGHEST = 13;
}
TemperatureMetric metric = 3;
}
message GetTemperatureResponse {
int64 temperature = 1;
uint64 ret_val = 2;
}
// The greeting service definition.
service Rsmi {
// RDC admin services
rpc VerifyConnection (VerifyConnectionRequest) returns (VerifyConnectionResponse) {}
// RSMI ID services
rpc GetNumDevices (GetNumDevicesRequest) returns(GetNumDevicesResponse) {}
// RSMI Physical Queries
rpc GetTemperature(GetTemperatureRequest) returns(GetTemperatureResponse) {}
}
実行可能ファイル
+97
ファイルの表示
@@ -0,0 +1,97 @@
# Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.5.0)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Cmake Server ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message("")
message("Build Configuration:")
message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
message("--------RSMI Inc Dir: " ${RSMI_INC_DIR})
message("")
set(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}
CACHE STRING "Location of RDC client library source code.")
# set(CMAKE_INSTALL_PREFIX "/"
# CACHE STRING "Default installation directory.")
# set(CPACK_PACKAGING_INSTALL_PREFIX "/"
# CACHE STRING "Default packaging prefix.")
#
## Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -m64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -std=c++11 ")
# Use this instead of above for 32 bit
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif ()
set(SRC_DIR "src")
set(INC_DIR "include/${RDC}")
# TODO delete these if not used
file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h")
file(GLOB PROTOBUF_GENERATED_SRCS "${PROTOB_OUT_DIR}/*.cc")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
"${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}")
set(SERVER_SRC_LIST "${SRC_DIR}/rdc_rsmi_service.cc")
set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_main.cc")
set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${PROTOBUF_GENERATED_SRCS}")
message("SERVER_SRC_LIST=${SERVER_SRC_LIST}")
set(SERVER_DAEMON_EXE "rdcd")
set(SERVICE_FILE_NAME "rdc.service")
link_directories(${RSMI_LIB_DIR})
add_executable(${SERVER_DAEMON_EXE} "${SERVER_SRC_LIST}")
# target_include_directories(${SERVER_DAEMON_EXE} PUBLIC ${RSMI_INC_DIR})
target_link_libraries(${SERVER_DAEMON_EXE} pthread rt grpc grpc++
grpc++_reflection dl protobuf rocm_smi64)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${SERVER_DAEMON_EXE}
PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
DESTINATION usr/sbin)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/${SERVICE_FILE_NAME}
DESTINATION lib/systemd/system)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Finished Cmake Server ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
+55
ファイルの表示
@@ -0,0 +1,55 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef SERVER_INCLUDE_RDC_RDC_MAIN_H_
#define SERVER_INCLUDE_RDC_RDC_MAIN_H_
#include <grpcpp/grpcpp.h>
#include <string>
#include <memory>
#include "rdc/rdc_rsmi_service.h"
class RDCServer {
public:
RDCServer();
~RDCServer();
void Initialize();
void Run(void);
bool start_rsmi_service(void) const {return start_rsmi_service_;}
void set_start_rsmi_service(bool s) {start_rsmi_service_ = s;}
void ShutDown(void);
private:
void HandleSignal(int sig);
std::string server_address_;
bool start_rsmi_service_;
std::unique_ptr<::grpc::Server> server_;
RsmiServiceImpl *rsmi_service_;
};
#endif // SERVER_INCLUDE_RDC_RDC_MAIN_H_
+54
ファイルの表示
@@ -0,0 +1,54 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef SERVER_INCLUDE_RDC_RDC_RSMI_SERVICE_H_
#define SERVER_INCLUDE_RDC_RDC_RSMI_SERVICE_H_
#include "rdc.grpc.pb.h" // NOLINT
#include "rocm_smi/rocm_smi.h"
#include "rdc/rdc_rsmi_service.h"
class RsmiServiceImpl final : public ::rdc::Rsmi::Service {
public:
RsmiServiceImpl();
~RsmiServiceImpl();
rsmi_status_t Initialize(uint64_t rsmi_init_flags = 0);
::grpc::Status VerifyConnection(::grpc::ServerContext* context,
const rdc::VerifyConnectionRequest* request,
rdc::VerifyConnectionResponse* reply) override;
::grpc::Status
GetNumDevices(::grpc::ServerContext* context,
const ::rdc::GetNumDevicesRequest* request,
::rdc::GetNumDevicesResponse* reply) override;
::grpc::Status
GetTemperature(::grpc::ServerContext* context,
const ::rdc::GetTemperatureRequest* request,
::rdc::GetTemperatureResponse* response) override;
private:
bool rsmi_initialized_;
};
#endif // SERVER_INCLUDE_RDC_RDC_RSMI_SERVICE_H_
実行可能ファイル
+37
ファイルの表示
@@ -0,0 +1,37 @@
# References:
# https://linuxconfig.org/how-to-create-systemd-service-unit-in-linux
# https://www.linux.com/tutorials/systemd-services-beyond-starting-and-stopping/
[Unit]
Description=Radeon Data Center Daemon (rdcd)
After=network.target
# Add any services that must be started before rdcd here
#After=
# Add any non-service units required by rdcd here
#Requires=
[Service]
Type=simple
# If we need to start anything before rdcd, use this
# ExecStartPre=
ExecStart=/usr/sbin/rdcd
# If we need to start anything after rdcd use this
# ExecStartPost=
# If we want to change the default time out for the ExecStop (90 sec),
# we can modify that time limit with TimeoutStopSec
# TimeoutStopSec=
# Note, we can have multiple ExecStop commands if necessary
ExecStop=/bin/kill -15 $MAINPID
#ExecReload=
#ExecStartPost=
#ExecStopPost=
[Install]
WantedBy= multi-user.target
実行可能ファイル
+8
ファイルの表示
@@ -0,0 +1,8 @@
#!/bin/bash
mkdir -p build
cd build
cmake -DROCM_DIR=/opt/rocm ..
make
cd ..
実行可能ファイル
+283
ファイルの表示
@@ -0,0 +1,283 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <assert.h>
#include <fcntl.h>
#include <grpcpp/grpcpp.h>
#include <sys/resource.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <iostream>
#include <memory>
#include <string>
#include <csignal>
#include "rdc.grpc.pb.h" // NOLINT
#include "rocm_smi/rocm_smi.h"
#include "rdc/rdc_main.h"
#include "rdc/rdc_rsmi_service.h"
static bool sShutDownServer = false;
static bool sRestartServer = false;
static const char *kDaemonName = "rdcd";
static const char *kRDCDHomeDir = "/";
static const char *kDaemonLockFile = "/var/run/rdcd.lock";
RDCServer::RDCServer() : server_address_("0.0.0.0:50051"),
rsmi_service_(nullptr) {
}
RDCServer::~RDCServer() {
}
void
RDCServer::Initialize() {
}
// TODO(cfreehil): read server config from YAML file. Config can include things
// like server address, Secure/Insecure creds, rsmi_init flags, etc.
void
RDCServer::Run() {
::grpc::ServerBuilder builder;
// Listen on the given address without any authentication mechanism.
builder.AddListeningPort(server_address_, grpc::InsecureServerCredentials());
// Register services as the instances through which we'll communicate with
// clients. These are synchronous services.
if (start_rsmi_service()) {
rsmi_service_ = new RsmiServiceImpl();
builder.RegisterService(rsmi_service_);
// TODO(cfreehil): pass flags from cnfg file
rsmi_status_t ret = rsmi_service_->Initialize(0);
if (ret != RSMI_STATUS_SUCCESS) {
std::cerr << "Failed to start RSMI service" << std::endl;
return;
}
}
// Finally assemble the server.
// std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
server_ = builder.BuildAndStart();
std::cerr << "Server listening on " << server_address_.c_str() << std::endl;
server_->Wait();
}
static void HandleSignal(int sig) {
std::cerr << "Caught signal " << sig << std::endl;
// For most signals, we will want to exit, so make that the default case
// Handle the other signals specifically.
switch (sig) {
case SIGINT:
case SIGTERM:
sShutDownServer = true;
break;
// Grpc doesn't seem to handle stopping and restarting well, so
// user must manually do these steps
// case SIGHUP:
// sRestartServer = true;
// break;
default:
std::cerr << "Unexpected signal caught" << std::endl;
}
}
static void InitializeSignalHandling(void) {
// signal(SIGHUP, HandleSignal);
signal(SIGINT, HandleSignal);
signal(SIGTERM, HandleSignal);
}
void
RDCServer::ShutDown(void) {
server_->Shutdown();
if (rsmi_service_) {
delete rsmi_service_;
rsmi_service_ = nullptr;
}
}
static void * ProcessSignalLoop(void *server_ptr) {
assert(server_ptr != nullptr);
RDCServer *server = reinterpret_cast<RDCServer *>(server_ptr);
while (1) {
if (sShutDownServer) {
std::cerr << "Shutting down RDC Server." << std::endl;
server->ShutDown();
// We will need to add shutdown of any completion queues
// here, when/if we add them
break;
} else if (sRestartServer) {
std::cerr << "Re-starting RDC Server." << std::endl;
// We will need to add shutdown of any completion queues
// here, when/if we add them
server->ShutDown();
server->Run();
sRestartServer = false;
}
sleep(1);
}
pthread_exit(0);
}
static void ExitIfAlreadyRunning(void) {
int single_proc_fh;
ssize_t fsz;
single_proc_fh = open(kDaemonLockFile, O_RDWR|O_CREAT, 0640);
if (single_proc_fh < 0) {
std::cerr << "Failed to open file lock:" << kDaemonLockFile << std::endl;
exit(1);
}
if (lockf(single_proc_fh, F_TLOCK, 0) < 0) {
std::cerr << "Daemon already running. Exiting this instance." << std::endl;
exit(0);
}
std::string pid_str = std::to_string(getpid());
fsz = write(single_proc_fh, pid_str.c_str(), pid_str.size());
assert(static_cast<unsigned int>(fsz) == pid_str.size());
}
static void
MakeDaemon() {
int fd0, fd1, fd2;
struct rlimit max_files;
// RSMI, for one thing, will need to be able to read/write files
// Note that umask turns *off* permission for a given bit, so you we want
// the complement of the permissions we want files to have.
umask(027);
// To Do; Make this optional based on CL option.
#if 0
pid_t pid;
// We want to dissassociate with calling process, so fork, and let
// daemon live in child process. Parent will exit.
if ((pid = fork()) < 0) {
std::cerr << "Failed to fork rdcd daemon." << std::endl;
} else if (pid != 0) { // parent
exit(0);
}
setsid();
// Insulate from pgrp leader death
signal(SIGHUP, SIG_IGN);
if ((pid = fork()) < 0) {
std::cerr << "Failed to fork after signal(SIGHUP, SIG_IGN)" << std::endl;
} else if (pid != 0) { // parent
exit(0);
}
#endif
// chdir to dir that will always be available
if (chdir(kRDCDHomeDir) < 0) {
std::cerr << "Failed to change directory to " <<kRDCDHomeDir << std::endl;
}
// Determine max. number of open files possible. We need to close all
// open descriptors.
if (getrlimit(RLIMIT_NOFILE, &max_files) < 0) {
std::cerr << kDaemonName << ": can't get file limit" << std::endl;
}
// Close files
if (max_files.rlim_max > 1024) {
max_files.rlim_max = 1024;
}
for (uint32_t i = 0; i < max_files.rlim_max; i++) {
close(i);
}
// Direct stdin, stdout, stdout to /dev/null.
fd0 = open("/dev/null", O_RDWR);
fd1 = dup(0);
fd2 = dup(0);
// Set up log file
// openlog(kDaemonName, LOG_CONS|LOG_PID, LOG_DAEMON);
if (fd0 != 0 || fd1 != 1 || fd2 != 2) {
std::cerr << "unexpected fildes: " << fd0 << " " << fd1 <<
" " << fd2 << std::endl;
exit(1);
}
ExitIfAlreadyRunning();
InitializeSignalHandling();
}
int main(int argc, char** argv) {
RDCServer rdc_server;
(void)argc; // Ignore for now
(void)argv;
MakeDaemon();
rdc_server.Initialize();
// Create a thread to handle signals to shutdown gracefully
pthread_t sig_listen_thread;
int thr_ret = pthread_create(&sig_listen_thread, NULL,
ProcessSignalLoop, &rdc_server);
if (thr_ret) {
std::cerr <<
"Failed to create ProcessSignalLoop. pthread_create() returned " <<
thr_ret;
return 1;
}
// TODO(cfreehil): Eventually, set these by reading a config file
rdc_server.set_start_rsmi_service(true);
// rdc_server.set_secure_communications(false);
// rdc_server.set_address("0.0.0.0:50051")
rdc_server.Run();
if (sShutDownServer) {
std::cerr << "RDC server successfully shut down." << std::endl;
return 0;
} else {
std::cerr << "RDC server failed to start." << std::endl;
return 1;
}
}
+138
ファイルの表示
@@ -0,0 +1,138 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <assert.h>
#include <grpcpp/grpcpp.h>
#include <iostream>
#include <memory>
#include <string>
#include <csignal>
#include "rdc.grpc.pb.h" // NOLINT
#include "rocm_smi/rocm_smi.h"
#include "rdc/rdc_rsmi_service.h"
RsmiServiceImpl::RsmiServiceImpl():rsmi_initialized_(false) {
}
RsmiServiceImpl::~RsmiServiceImpl() {
if (rsmi_initialized_) {
rsmi_status_t rsmi_ret = rsmi_shut_down();
rsmi_initialized_ = false;
assert(rsmi_ret == RSMI_STATUS_SUCCESS);
}
}
// rsmi and rdc currently happen to have a 1-to-1 mapping, but
// have this function in case that changes
static rsmi_temperature_metric_t
rdc_temp2rsmi_temp(::rdc::GetTemperatureRequest_TemperatureMetric
rdc_temp) {
return static_cast<rsmi_temperature_metric_t>(rdc_temp);
}
rsmi_status_t
RsmiServiceImpl::Initialize(uint64_t rsmi_init_flags) {
rsmi_status_t rsmi_ret = rsmi_init(rsmi_init_flags);
if (rsmi_ret != RSMI_STATUS_SUCCESS) {
std::cout << "rsmi_init() returned error" << std::endl;
} else {
rsmi_initialized_ = true;
}
return rsmi_ret;
}
::grpc::Status
RsmiServiceImpl::VerifyConnection(::grpc::ServerContext* context,
const rdc::VerifyConnectionRequest* request,
rdc::VerifyConnectionResponse* reply) {
(void)context; // Quiet warning for now
std::string prefix("Hello ");
reply->set_message(prefix + request->name());
return ::grpc::Status::OK;
}
::grpc::Status
RsmiServiceImpl::GetNumDevices(::grpc::ServerContext* context,
const ::rdc::GetNumDevicesRequest* request,
::rdc::GetNumDevicesResponse* reply) {
assert(reply != nullptr);
uint32_t num_devices;
(void)context; // Quiet warning for now;
(void)request;
rsmi_status_t ret = rsmi_num_monitor_devices(&num_devices);
// TODO(cfreehil) replace below with macro
if (ret != RSMI_STATUS_SUCCESS) {
std::cout << "rsmi_num_monitor_devices() returned error" << std::endl;
}
reply->set_val(num_devices);
reply->set_ret_val(ret);
return ::grpc::Status::OK;
}
::grpc::Status
RsmiServiceImpl::GetTemperature(::grpc::ServerContext* context,
const ::rdc::GetTemperatureRequest* request,
::rdc::GetTemperatureResponse* response) {
(void)context; // Quiet warning for now;
int64_t temperature;
rsmi_status_t ret = rsmi_dev_temp_metric_get(request->dv_ind(),
request->sensor_type(), rdc_temp2rsmi_temp(request->metric()),
&temperature);
response->set_temperature(temperature);
response->set_ret_val(ret);
return ::grpc::Status::OK;
}
// TODO(cfreehil): read server config from YAML file. Config can include things
// like server address, Secure/Insecure creds, rsmi_init flags, etc.
void RunServer() {
std::string server_address("0.0.0.0:50051");
RsmiServiceImpl service;
::grpc::ServerBuilder builder;
// Listen on the given address without any authentication mechanism.
builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
// Register "service" as the instance through which we'll communicate with
// clients. In this case it corresponds to an *synchronous* service.
builder.RegisterService(&service);
// Finally assemble the server.
std::unique_ptr<::grpc::Server> server(builder.BuildAndStart());
std::cout << "Server listening on " << server_address << std::endl;
uint64_t flags = 0; // TODO(cfreehil) Read this from config file
rsmi_status_t rsmi_ret = rsmi_init(flags);
// TODO(cfreehil): check rsmi return code
// Wait for the server to shutdown. Note that some other thread must be
// responsible for shutting down the server for this call to ever return.
if (rsmi_ret != RSMI_STATUS_SUCCESS) {
std::cout << "rsmi_init() returned error. Exiting" << std::endl;
return;
}
server->Wait();
}
実行可能ファイル
+34
ファイルの表示
@@ -0,0 +1,34 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#ifndef INCLUDE_RDC_RDC64CONFIG_H_
#define INCLUDE_RDC_RDC64CONFIG_H_
// This file is generated on build.
#define rocm_smi_VERSION_MAJOR @rocm_smi_VERSION_MAJOR@
#define rocm_smi_VERSION_MINOR @rocm_smi_VERSION_MINOR@
#define rocm_smi_VERSION_PATCH @rocm_smi_VERSION_PATCH@
#define rocm_smi_VERSION_BUILD "@rocm_smi_VERSION_BUILD@"
#endif // INCLUDE_RDC_RDC64CONFIG_H_
+84
ファイルの表示
@@ -0,0 +1,84 @@
# Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Minimum version of cmake required
#
cmake_minimum_required(VERSION 3.5.0)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Cmake Example Lib ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
## Compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -m64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse -msse2 -std=c++11 ")
# Use this instead of above for 32 bit
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
if ("${CMAKE_BUILD_TYPE}" STREQUAL Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb -O0 -DDEBUG")
endif ()
# Required Defines first:
set(RSMI_INC_DIR ${ROCM_DIR}/include)
set(RSMI_LIB_DIR ${ROCM_DIR}/lib)
message("")
message("Build Configuration:")
message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
message("------------Compiler: " ${CMAKE_CXX_COMPILER})
message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION})
message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
message("--------RSMI Inc Dir: " ${RSMI_INC_DIR})
message("")
set(SRC_DIR "${PROJECT_SOURCE_DIR}/tests/example")
set(INC_DIR "${PROJECT_SOURCE_DIR}/client/include")
## Include common cmake modules
include(utils)
## Verbose output.
set(CMAKE_VERBOSE_MAKEFILE on)
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../client/include"
"${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}")
set(EXAMPLE_SRC_LIST "${SRC_DIR}/rdc_client_test.cc")
message("EXAMPLE_SRC_LIST=${EXAMPLE_SRC_LIST}")
set(CLIENT_LIB_INC_LIST "${INC_DIR}/rdc_client.h")
set(TEST_CLIENT_EXE "rdc_test_client")
add_executable(${TEST_CLIENT_EXE} "${EXAMPLE_SRC_LIST}")
target_link_libraries(${TEST_CLIENT_EXE} rdc_client)
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
message(" Finished Cmake Example ")
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
+90
ファイルの表示
@@ -0,0 +1,90 @@
/*
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <unistd.h>
#include <grpcpp/grpcpp.h>
#include <iostream>
#include "rdc/rdc_client.h"
#include "rocm_smi/rocm_smi.h"
#define CHK_RET_STATUS(RET) \
if ((RET) != RDC_STATUS_SUCCESS) { \
std::cout << "rdc call returned error: " << (RET) << std::endl; \
}
#define CHK_RET_STATUS_CONT(RET) \
if ((RET) != RDC_STATUS_SUCCESS) { \
std::cout << "rdc call returned error: " << (RET) << std::endl; \
continue; \
}
int main(int argc, char** argv) {
(void)argc; // ignore for now
(void)argv; // ignore for now
rdc_status_t ret;
rdc_channel_t server;
uint64_t num_gpu;
int64_t temperature;
std::string serv_host("localhost");
std::string serv_port("50051");
if (argc > 1) {
serv_host = argv[1];
}
if (argc > 2) {
serv_port = argv[2];
}
std::cout << "Attempting to create channel to " << serv_host << ":" <<
serv_port << std::endl;
ret = rdc_channel_create(&server, serv_host.c_str(), serv_port.c_str(),
false);
CHK_RET_STATUS(ret)
std::cout << "Successfully created channel" << std::endl;
std::cout << "Getting number of gpus at server..." << std::endl;
ret = rdc_num_gpus_get(server, &num_gpu);
CHK_RET_STATUS(ret)
std::cout << "Number of GPUs at server is " << num_gpu << std::endl;
for (uint32_t dv_ind = 0; dv_ind < num_gpu; ++dv_ind) {
std::cout << "Info for Device " << dv_ind << ":" << std::endl;
std::cout << "\tGetting temperature..." << std::endl;
ret = rdc_dev_temp_metric_get(server, dv_ind, RSMI_TEMP_TYPE_JUNCTION,
RSMI_TEMP_CURRENT, &temperature);
CHK_RET_STATUS_CONT(ret)
std::cout << "\t GPU " << dv_ind << " has a temperature of " <<
temperature << std::endl;
}
ret = rdc_channel_destroy(server);
CHK_RET_STATUS(ret)
std::cout << "Successfully destroyed channel to " << serv_host << ":" <<
serv_port << std::endl;
return 0;
}