Break srvs. into rsmi & admin srvs. Add VerifyConnection api.
Change-Id: I67567264c37e31f3409062a14e56eba4801cd944
[ROCm/rdc commit: dc6f6f3e9a]
This commit is contained in:
@@ -25,8 +25,8 @@ cmake_minimum_required(VERSION 3.5.0)
|
||||
|
||||
# ROCM_DIR should be passed in via command line; these will be used
|
||||
# in sub-projects
|
||||
set(RSMI_INC_DIR ${ROCM_DIR}/include)
|
||||
set(RSMI_LIB_DIR ${ROCM_DIR}/lib)
|
||||
set(RSMI_INC_DIR ${ROCM_DIR}/rocm_smi/include)
|
||||
set(RSMI_LIB_DIR ${ROCM_DIR}/rocm_smi/lib)
|
||||
|
||||
## Set default module path
|
||||
if(NOT DEFINED CMAKE_MODULE_PATH)
|
||||
|
||||
@@ -67,7 +67,7 @@ set(CLIENT_LIB "rdc_client")
|
||||
set(RDC "rdc")
|
||||
set(CLIENT_LIB_COMPONENT "lib${CLIENT_LIB}")
|
||||
set(SRC_DIR "${PROJECT_SOURCE_DIR}/client/src")
|
||||
set(INC_DIR "${PROJECT_SOURCE_DIR}/client/include/rdc")
|
||||
set(RDC_CLIENT_INC_DIR "${PROJECT_SOURCE_DIR}/client/include/rdc")
|
||||
|
||||
################# Determine the library version #########################
|
||||
## Setup the SO version based on git tags.
|
||||
@@ -119,27 +119,30 @@ set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h")
|
||||
file(GLOB PROTOBUF_GENERATED_SRCS "${PROTOB_OUT_DIR}/*.cc")
|
||||
|
||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}")
|
||||
set(CLIENT_LIB_SRC_LIST "${SRC_DIR}/rdc_client.cc")
|
||||
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${SRC_DIR}/rdc_main.cc")
|
||||
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${SRC_DIR}/rdc_client_main.cc")
|
||||
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${PROTOBUF_GENERATED_SRCS}")
|
||||
set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST}
|
||||
"${PROJECT_SOURCE_DIR}/common/rdc_utils.cc")
|
||||
message("CLIENT_LIB_SRC_LIST=${CLIENT_LIB_SRC_LIST}")
|
||||
|
||||
set(CLIENT_LIB_INC_LIST "${INC_DIR}/rdc_client.h")
|
||||
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} "${INC_DIR}/rdc_exception.h")
|
||||
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} "${INC_DIR}/rdc_main.h")
|
||||
set(CLIENT_LIB_INC_LIST "${RDC_CLIENT_INC_DIR}/rdc_client.h")
|
||||
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST}
|
||||
"${PROJECT_SOURCE_DIR}/common/rdc_utils.h")
|
||||
"${RDC_CLIENT_INC_DIR}/rdc_exception.h")
|
||||
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST}
|
||||
"${RDC_CLIENT_INC_DIR}/rdc_client_main.h")
|
||||
set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST}
|
||||
"${PROJECT_SOURCE_DIR}/common/rdc_utils.h")
|
||||
|
||||
add_library(${CLIENT_LIB} SHARED ${CLIENT_LIB_SRC_LIST} ${CLIENT_LIB_INC_LIST})
|
||||
target_link_libraries(${CLIENT_LIB} pthread rt grpc grpc++ grpc++_reflection
|
||||
dl protobuf)
|
||||
target_include_directories(${CLIENT_LIB} PUBLIC ${INC_DIR})
|
||||
|
||||
target_include_directories(${CLIENT_LIB} PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${PROJECT_SOURCE_DIR}/include"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/include"
|
||||
"${PROTOB_OUT_DIR}"
|
||||
"${RSMI_INC_DIR}")
|
||||
# TODO: set the properties for the library once we have one
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property(TARGET ${CLIENT_LIB} PROPERTY
|
||||
|
||||
@@ -24,6 +24,8 @@ THE SOFTWARE.
|
||||
#ifndef CLIENT_INCLUDE_RDC_RDC_CLIENT_H_
|
||||
#define CLIENT_INCLUDE_RDC_RDC_CLIENT_H_
|
||||
|
||||
#include <grpcpp/grpcpp.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
@@ -192,6 +194,64 @@ typedef uintptr_t rdc_channel_t;
|
||||
#define RDC_DEFAULT_SERVER_PORT 50051
|
||||
#define RDC_DEFAULT_SERVER_IP "localhost"
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup RDCAdmin RDC Administration Functions
|
||||
* These administrative functions are used to monitor and control, for
|
||||
* example RDC connectivity.
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Check the connection status of a channel
|
||||
*
|
||||
* @details Given an ::rdc_channel_t @p channel and a boolean @p
|
||||
* try_to_connect, this function will return the grpc_connectivity_state for
|
||||
* that channel
|
||||
*
|
||||
* @p channel[in] The channel for which the status will be given
|
||||
*
|
||||
* @param[in] try_to_connect If the channel is currently IDLE, if the argument
|
||||
* is true, transition to CONNECTING.
|
||||
*
|
||||
* @param[inout] state A pointer to caller provided memory to which an
|
||||
* the grpc_connectivity_state will be written. grpc_connectivity_state has
|
||||
* the following possible values:
|
||||
* GRPC_CHANNEL_IDLE channel is idle
|
||||
* GRPC_CHANNEL_CONNECTING channel is connecting
|
||||
* GRPC_CHANNEL_READY channel is ready for work
|
||||
* GRPC_CHANNEL_TRANSIENT_FAILURE channel has seen a failure but expects to
|
||||
* recover
|
||||
* GRPC_CHANNEL_SHUTDOWN channel has seen a failure that it cannot
|
||||
* recover from
|
||||
*
|
||||
* @retval ::RDC_STATUS_SUCCESS is returned upon successful call.
|
||||
*
|
||||
*/
|
||||
rdc_status_t
|
||||
rdc_channel_state_get(rdc_channel_t channel, bool try_to_connect,
|
||||
grpc_connectivity_state *state);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Verify a channel's connection to the server
|
||||
*
|
||||
* @details Given an ::rdc_channel_t @p channel, this function will send a
|
||||
* random number to the server associated with @p channel. The server will send
|
||||
* the number back. Upon receiving the returned message from the server, the
|
||||
* number sent to the server is compared to the number received from the
|
||||
* server. If the 2 numbers are the same, the connection is verified.
|
||||
* Otherwise, an appropriate error code is returned.
|
||||
*
|
||||
* @p channel[in] The channel for which the connection will be verified
|
||||
*
|
||||
* @retval ::RDC_STATUS_SUCCESS is returned upon successful call.
|
||||
*
|
||||
*/
|
||||
rdc_status_t
|
||||
rdc_channel_connection_verify(rdc_channel_t channel);
|
||||
|
||||
/** @} */ // end of RDCAdmin
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup InitShutAdmin Initialization and Shutdown
|
||||
* These functions are used for initialization of RDC and clean up when
|
||||
@@ -216,7 +276,7 @@ typedef uintptr_t rdc_channel_t;
|
||||
*
|
||||
* @param[in] port A pointer to string containing the port on which the
|
||||
* RDC server is listening
|
||||
*
|
||||
*
|
||||
* @param[in] secure A bool indicating whether SSL should be used for
|
||||
* communications (not currently supported)
|
||||
*
|
||||
|
||||
+11
-5
@@ -21,8 +21,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef CLIENT_INCLUDE_RDC_RDC_MAIN_H_
|
||||
#define CLIENT_INCLUDE_RDC_RDC_MAIN_H_
|
||||
#ifndef CLIENT_INCLUDE_RDC_RDC_CLIENT_MAIN_H_
|
||||
#define CLIENT_INCLUDE_RDC_RDC_CLIENT_MAIN_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
@@ -48,15 +48,21 @@ class RDCChannel {
|
||||
std::string server_ip(void) const {return server_ip_;}
|
||||
std::string server_port(void) const {return server_port_;}
|
||||
bool secure_channel(void) const {return secure_channel_;}
|
||||
std::shared_ptr<::rdc::Rsmi::Stub> stub(void) const {return stub_;}
|
||||
std::shared_ptr<::rdc::Rsmi::Stub> rsmi_stub(void) const {return rsmi_stub_;}
|
||||
std::shared_ptr<::rdc::RdcAdmin::Stub> rdc_admin_stub(void) const {
|
||||
return rdc_admin_stub_;}
|
||||
std::shared_ptr<grpc::Channel> const channel(void) {return channel_;}
|
||||
|
||||
private:
|
||||
std::string server_ip_;
|
||||
std::string server_port_;
|
||||
bool secure_channel_;
|
||||
std::shared_ptr<::rdc::Rsmi::Stub> stub_;
|
||||
std::shared_ptr<::rdc::Rsmi::Stub> rsmi_stub_;
|
||||
std::shared_ptr<::rdc::RdcAdmin::Stub> rdc_admin_stub_;
|
||||
std::shared_ptr<grpc::Channel> channel_;
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // CLIENT_INCLUDE_RDC_RDC_MAIN_H_
|
||||
#endif // CLIENT_INCLUDE_RDC_RDC_CLIENT_MAIN_H_
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2019 - Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -22,10 +22,11 @@ THE SOFTWARE.
|
||||
|
||||
#include <grpcpp/grpcpp.h>
|
||||
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <iostream>
|
||||
|
||||
#include "rdc/rdc_main.h"
|
||||
#include "rdc/rdc_client_main.h"
|
||||
#include "rdc/rdc_client.h"
|
||||
#include "common/rdc_utils.h"
|
||||
#include "rdc/rdc_exception.h"
|
||||
@@ -113,6 +114,45 @@ rdc_channel_create(rdc_channel_t *channel, const char *ip,
|
||||
|
||||
CATCH
|
||||
}
|
||||
rdc_status_t
|
||||
rdc_channel_state_get(rdc_channel_t channel, bool try_to_connect,
|
||||
grpc_connectivity_state *state) {
|
||||
TRY
|
||||
CHK_PTR_ARG(state)
|
||||
UINTPTR_TO_RDC_CHAN(channel)
|
||||
|
||||
*state = ch->channel()->GetState(try_to_connect);
|
||||
return RDC_STATUS_SUCCESS;
|
||||
|
||||
CATCH
|
||||
}
|
||||
|
||||
rdc_status_t
|
||||
rdc_channel_connection_verify(rdc_channel_t channel) {
|
||||
TRY
|
||||
UINTPTR_TO_RDC_CHAN(channel)
|
||||
|
||||
::rdc::VerifyConnectionResponse resp;
|
||||
::rdc::VerifyConnectionRequest req;
|
||||
::grpc::ClientContext context;
|
||||
unsigned int seed = time(NULL);
|
||||
|
||||
req.set_magic_num(static_cast<uint64_t>(rand_r(&seed)));
|
||||
::grpc::Status status =
|
||||
ch->rdc_admin_stub()->VerifyConnection(&context, req, &resp);
|
||||
|
||||
if (!status.ok()) {
|
||||
return amd::rdc::GrpcErrorToRdcError(status.error_code());
|
||||
}
|
||||
|
||||
if (resp.echo_magic_num() != req.magic_num()) {
|
||||
return RDC_STATUS_GRPC_DATA_LOSS;
|
||||
}
|
||||
|
||||
return RDC_STATUS_SUCCESS;
|
||||
|
||||
CATCH
|
||||
}
|
||||
|
||||
rdc_status_t
|
||||
rdc_channel_destroy(rdc_channel_t channel) {
|
||||
@@ -135,7 +175,8 @@ rdc_num_gpus_get(rdc_channel_t channel, uint64_t *num_gpu) {
|
||||
::rdc::GetNumDevicesResponse resp;
|
||||
::rdc::GetNumDevicesRequest empty;
|
||||
::grpc::ClientContext context;
|
||||
::grpc::Status status = ch->stub()->GetNumDevices(&context, empty, &resp);
|
||||
::grpc::Status status =
|
||||
ch->rsmi_stub()->GetNumDevices(&context, empty, &resp);
|
||||
|
||||
if (!status.ok()) {
|
||||
return amd::rdc::GrpcErrorToRdcError(status.error_code());
|
||||
@@ -171,7 +212,8 @@ rdc_dev_temp_metric_get(rdc_channel_t channel, uint32_t dv_ind,
|
||||
in_args.set_dv_ind(dv_ind);
|
||||
in_args.set_sensor_type(sensor_type);
|
||||
|
||||
::grpc::Status status = ch->stub()->GetTemperature(&context, in_args, &resp);
|
||||
::grpc::Status status =
|
||||
ch->rsmi_stub()->GetTemperature(&context, in_args, &resp);
|
||||
|
||||
if (!status.ok()) {
|
||||
return ::amd::rdc::GrpcErrorToRdcError(status.error_code());
|
||||
|
||||
@@ -27,7 +27,7 @@ THE SOFTWARE.
|
||||
#include <string>
|
||||
|
||||
#include "rdc.grpc.pb.h" // NOLINT
|
||||
#include "rdc/rdc_main.h"
|
||||
#include "rdc/rdc_client_main.h"
|
||||
#include "rdc/rdc_client.h"
|
||||
|
||||
namespace amd {
|
||||
@@ -48,21 +48,25 @@ RDCChannel::Initialize(void) {
|
||||
std::string addr_str = server_ip() + ":";
|
||||
addr_str += server_port();
|
||||
|
||||
std::shared_ptr<grpc::Channel> channel;
|
||||
|
||||
if (secure_channel_) {
|
||||
// Not yet supported
|
||||
return RDC_STATUS_GRPC_UNIMPLEMENTED;
|
||||
} else {
|
||||
channel = ::grpc::CreateChannel(addr_str,
|
||||
channel_ = ::grpc::CreateChannel(addr_str,
|
||||
grpc::InsecureChannelCredentials());
|
||||
}
|
||||
|
||||
stub_ = ::rdc::Rsmi::NewStub(channel);
|
||||
|
||||
if (stub_ == nullptr) {
|
||||
rsmi_stub_ = ::rdc::Rsmi::NewStub(channel_);
|
||||
if (rsmi_stub_ == nullptr) {
|
||||
return RDC_STATUS_GRPC_RESOURCE_EXHAUSTED;
|
||||
}
|
||||
|
||||
rdc_admin_stub_ = ::rdc::RdcAdmin::NewStub(channel_);
|
||||
if (rdc_admin_stub_ == nullptr) {
|
||||
return RDC_STATUS_GRPC_RESOURCE_EXHAUSTED;
|
||||
}
|
||||
|
||||
// Test to see if we can connect to server; if not, return err.
|
||||
return RDC_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ rdc_status_t GrpcErrorToRdcError(grpc::StatusCode grpc_err) {
|
||||
uint32_t rdc_grpc_base_int =
|
||||
static_cast<uint32_t>(RDC_STATUS_GRPC_ERR_FIRST);
|
||||
uint32_t rdc_err_int = grpc_err_int + rdc_grpc_base_int;
|
||||
|
||||
|
||||
return static_cast<rdc_status_t>(rdc_err_int);
|
||||
}
|
||||
|
||||
|
||||
File binario non mostrato.
@@ -27,7 +27,18 @@ syntax = "proto3";
|
||||
// option objc_class_prefix = "HLW";
|
||||
|
||||
package rdc;
|
||||
|
||||
|
||||
/****************************************************************************/
|
||||
/********************************** Rsmi Service ****************************/
|
||||
/****************************************************************************/
|
||||
service Rsmi {
|
||||
// RSMI ID services
|
||||
rpc GetNumDevices (GetNumDevicesRequest) returns(GetNumDevicesResponse) {}
|
||||
|
||||
// RSMI Physical Queries
|
||||
rpc GetTemperature(GetTemperatureRequest) returns(GetTemperatureResponse){}
|
||||
}
|
||||
|
||||
// rsmi_num_monitor_devices()
|
||||
message GetNumDevicesRequest {
|
||||
}
|
||||
@@ -36,14 +47,6 @@ message GetNumDevicesResponse {
|
||||
uint64 ret_val = 2;
|
||||
}
|
||||
|
||||
/* GetNumDevices */
|
||||
message VerifyConnectionRequest {
|
||||
string name = 1;
|
||||
}
|
||||
message VerifyConnectionResponse {
|
||||
string message = 1;
|
||||
}
|
||||
|
||||
/* GetTemperature */
|
||||
message GetTemperatureRequest {
|
||||
uint32 dv_ind = 1;
|
||||
@@ -71,15 +74,20 @@ message GetTemperatureResponse {
|
||||
uint64 ret_val = 2;
|
||||
}
|
||||
|
||||
// The greeting service definition.
|
||||
service Rsmi {
|
||||
/****************************************************************************/
|
||||
/********************************** RdcAdmin Service ************************/
|
||||
/****************************************************************************/
|
||||
service RdcAdmin {
|
||||
// RDC admin services
|
||||
rpc VerifyConnection (VerifyConnectionRequest) returns (VerifyConnectionResponse) {}
|
||||
|
||||
// RSMI ID services
|
||||
rpc GetNumDevices (GetNumDevicesRequest) returns(GetNumDevicesResponse) {}
|
||||
|
||||
// RSMI Physical Queries
|
||||
rpc GetTemperature(GetTemperatureRequest) returns(GetTemperatureResponse) {}
|
||||
rpc VerifyConnection (VerifyConnectionRequest)
|
||||
returns (VerifyConnectionResponse) {}
|
||||
}
|
||||
|
||||
/* GetNumDevices */
|
||||
message VerifyConnectionRequest {
|
||||
uint64 magic_num = 1;
|
||||
}
|
||||
message VerifyConnectionResponse {
|
||||
uint64 echo_magic_num = 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -71,7 +71,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
"${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}")
|
||||
|
||||
set(SERVER_SRC_LIST "${SRC_DIR}/rdc_rsmi_service.cc")
|
||||
set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_main.cc")
|
||||
set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_admin_service.cc")
|
||||
set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_server_main.cc")
|
||||
set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${PROTOBUF_GENERATED_SRCS}")
|
||||
message("SERVER_SRC_LIST=${SERVER_SRC_LIST}")
|
||||
|
||||
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_
|
||||
#define SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_
|
||||
|
||||
#include "rdc.grpc.pb.h" // NOLINT
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rdc/rdc_admin_service.h"
|
||||
|
||||
class RDCAdminServiceImpl final : public ::rdc::RdcAdmin::Service {
|
||||
public:
|
||||
RDCAdminServiceImpl();
|
||||
~RDCAdminServiceImpl();
|
||||
::grpc::Status VerifyConnection(::grpc::ServerContext* context,
|
||||
const rdc::VerifyConnectionRequest* request,
|
||||
rdc::VerifyConnectionResponse* reply) override;
|
||||
private:
|
||||
};
|
||||
|
||||
#endif // SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_
|
||||
@@ -33,10 +33,6 @@ class RsmiServiceImpl final : public ::rdc::Rsmi::Service {
|
||||
|
||||
rsmi_status_t Initialize(uint64_t rsmi_init_flags = 0);
|
||||
|
||||
::grpc::Status VerifyConnection(::grpc::ServerContext* context,
|
||||
const rdc::VerifyConnectionRequest* request,
|
||||
rdc::VerifyConnectionResponse* reply) override;
|
||||
|
||||
::grpc::Status
|
||||
GetNumDevices(::grpc::ServerContext* context,
|
||||
const ::rdc::GetNumDevicesRequest* request,
|
||||
|
||||
+12
-4
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef SERVER_INCLUDE_RDC_RDC_MAIN_H_
|
||||
#define SERVER_INCLUDE_RDC_RDC_MAIN_H_
|
||||
#ifndef SERVER_INCLUDE_RDC_RDC_SERVER_MAIN_H_
|
||||
#define SERVER_INCLUDE_RDC_RDC_SERVER_MAIN_H_
|
||||
|
||||
#include <grpcpp/grpcpp.h>
|
||||
|
||||
@@ -28,6 +28,7 @@ THE SOFTWARE.
|
||||
#include <memory>
|
||||
|
||||
#include "rdc/rdc_rsmi_service.h"
|
||||
#include "rdc/rdc_admin_service.h"
|
||||
|
||||
class RDCServer {
|
||||
public:
|
||||
@@ -41,15 +42,22 @@ class RDCServer {
|
||||
bool start_rsmi_service(void) const {return start_rsmi_service_;}
|
||||
void set_start_rsmi_service(bool s) {start_rsmi_service_ = s;}
|
||||
|
||||
bool start_rdc_admin_service(void) const {return start_rdc_admin_service_;}
|
||||
void set_start_rdc_admin_service(bool s) {start_rdc_admin_service_ = s;}
|
||||
|
||||
void ShutDown(void);
|
||||
|
||||
private:
|
||||
void HandleSignal(int sig);
|
||||
std::string server_address_;
|
||||
bool start_rsmi_service_;
|
||||
std::unique_ptr<::grpc::Server> server_;
|
||||
|
||||
bool start_rsmi_service_;
|
||||
RsmiServiceImpl *rsmi_service_;
|
||||
|
||||
bool start_rdc_admin_service_;
|
||||
RDCAdminServiceImpl *rdc_admin_service_;
|
||||
};
|
||||
|
||||
#endif // SERVER_INCLUDE_RDC_RDC_MAIN_H_
|
||||
#endif // SERVER_INCLUDE_RDC_RDC_SERVER_MAIN_H_
|
||||
|
||||
@@ -7,7 +7,7 @@ Description=Radeon Data Center Daemon (rdcd)
|
||||
After=network.target
|
||||
|
||||
# Add any services that must be started before rdcd here
|
||||
#After=
|
||||
#After=
|
||||
|
||||
# Add any non-service units required by rdcd here
|
||||
#Requires=
|
||||
|
||||
Executable
+49
@@ -0,0 +1,49 @@
|
||||
|
||||
/*
|
||||
Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <csignal>
|
||||
|
||||
#include "rdc.grpc.pb.h" // NOLINT
|
||||
#include "rdc/rdc_admin_service.h"
|
||||
|
||||
RDCAdminServiceImpl::RDCAdminServiceImpl() {
|
||||
}
|
||||
|
||||
RDCAdminServiceImpl::~RDCAdminServiceImpl() {
|
||||
}
|
||||
::grpc::Status
|
||||
RDCAdminServiceImpl::VerifyConnection(::grpc::ServerContext* context,
|
||||
const rdc::VerifyConnectionRequest* request,
|
||||
rdc::VerifyConnectionResponse* reply) {
|
||||
(void)context; // Quiet warning for now
|
||||
|
||||
reply->set_echo_magic_num(request->magic_num());
|
||||
return ::grpc::Status::OK;
|
||||
}
|
||||
@@ -61,15 +61,6 @@ RsmiServiceImpl::Initialize(uint64_t rsmi_init_flags) {
|
||||
}
|
||||
return rsmi_ret;
|
||||
}
|
||||
::grpc::Status
|
||||
RsmiServiceImpl::VerifyConnection(::grpc::ServerContext* context,
|
||||
const rdc::VerifyConnectionRequest* request,
|
||||
rdc::VerifyConnectionResponse* reply) {
|
||||
(void)context; // Quiet warning for now
|
||||
std::string prefix("Hello ");
|
||||
reply->set_message(prefix + request->name());
|
||||
return ::grpc::Status::OK;
|
||||
}
|
||||
|
||||
::grpc::Status
|
||||
RsmiServiceImpl::GetNumDevices(::grpc::ServerContext* context,
|
||||
|
||||
@@ -36,7 +36,7 @@ THE SOFTWARE.
|
||||
|
||||
#include "rdc.grpc.pb.h" // NOLINT
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rdc/rdc_main.h"
|
||||
#include "rdc/rdc_server_main.h"
|
||||
#include "rdc/rdc_rsmi_service.h"
|
||||
|
||||
static bool sShutDownServer = false;
|
||||
@@ -46,7 +46,7 @@ static const char *kRDCDHomeDir = "/";
|
||||
static const char *kDaemonLockFile = "/var/run/rdcd.lock";
|
||||
|
||||
RDCServer::RDCServer() : server_address_("0.0.0.0:50051"),
|
||||
rsmi_service_(nullptr) {
|
||||
rsmi_service_(nullptr), rdc_admin_service_(nullptr) {
|
||||
}
|
||||
|
||||
RDCServer::~RDCServer() {
|
||||
@@ -66,6 +66,10 @@ RDCServer::Run() {
|
||||
|
||||
// Register services as the instances through which we'll communicate with
|
||||
// clients. These are synchronous services.
|
||||
if (start_rdc_admin_service()) {
|
||||
rdc_admin_service_ = new RDCAdminServiceImpl();
|
||||
builder.RegisterService(rdc_admin_service_);
|
||||
}
|
||||
|
||||
if (start_rsmi_service()) {
|
||||
rsmi_service_ = new RsmiServiceImpl();
|
||||
@@ -125,6 +129,11 @@ RDCServer::ShutDown(void) {
|
||||
delete rsmi_service_;
|
||||
rsmi_service_ = nullptr;
|
||||
}
|
||||
|
||||
if (rdc_admin_service_) {
|
||||
delete rdc_admin_service_;
|
||||
rdc_admin_service_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
static void * ProcessSignalLoop(void *server_ptr) {
|
||||
@@ -268,6 +277,8 @@ int main(int argc, char** argv) {
|
||||
|
||||
// TODO(cfreehil): Eventually, set these by reading a config file
|
||||
rdc_server.set_start_rsmi_service(true);
|
||||
rdc_server.set_start_rdc_admin_service(true);
|
||||
|
||||
// rdc_server.set_secure_communications(false);
|
||||
// rdc_server.set_address("0.0.0.0:50051")
|
||||
|
||||
@@ -41,9 +41,6 @@ endif ()
|
||||
|
||||
# Required Defines first:
|
||||
|
||||
set(RSMI_INC_DIR ${ROCM_DIR}/include)
|
||||
set(RSMI_LIB_DIR ${ROCM_DIR}/lib)
|
||||
|
||||
message("")
|
||||
message("Build Configuration:")
|
||||
message("-----------BuildType: " ${CMAKE_BUILD_TYPE})
|
||||
@@ -53,7 +50,6 @@ message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR})
|
||||
message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR})
|
||||
message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib)
|
||||
message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin)
|
||||
message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR})
|
||||
message("--------RSMI Inc Dir: " ${RSMI_INC_DIR})
|
||||
message("")
|
||||
|
||||
@@ -66,8 +62,6 @@ include(utils)
|
||||
## Verbose output.
|
||||
set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
|
||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../client/include"
|
||||
"${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}")
|
||||
set(EXAMPLE_SRC_LIST "${SRC_DIR}/rdc_client_test.cc")
|
||||
|
||||
message("EXAMPLE_SRC_LIST=${EXAMPLE_SRC_LIST}")
|
||||
@@ -77,6 +71,11 @@ set(CLIENT_LIB_INC_LIST "${INC_DIR}/rdc_client.h")
|
||||
set(TEST_CLIENT_EXE "rdc_test_client")
|
||||
|
||||
add_executable(${TEST_CLIENT_EXE} "${EXAMPLE_SRC_LIST}")
|
||||
|
||||
target_include_directories(${TEST_CLIENT_EXE} PRIVATE
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/../../client/include"
|
||||
"${RSMI_INC_DIR}")
|
||||
|
||||
target_link_libraries(${TEST_CLIENT_EXE} rdc_client)
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
@@ -31,7 +31,10 @@ THE SOFTWARE.
|
||||
|
||||
#define CHK_RET_STATUS(RET) \
|
||||
if ((RET) != RDC_STATUS_SUCCESS) { \
|
||||
std::cout << "rdc call returned error: " << (RET) << std::endl; \
|
||||
const char *err_msg_str; \
|
||||
(void)rdc_status_string((RET), &err_msg_str); \
|
||||
std::cout << "rdc call returned error: " << (RET) << ":\"" << \
|
||||
err_msg_str << "\"" << std::endl; \
|
||||
}
|
||||
|
||||
#define CHK_RET_STATUS_CONT(RET) \
|
||||
@@ -41,11 +44,8 @@ THE SOFTWARE.
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
(void)argc; // ignore for now
|
||||
(void)argv; // ignore for now
|
||||
|
||||
rdc_status_t ret;
|
||||
rdc_channel_t server;
|
||||
rdc_channel_t server_ch;
|
||||
uint64_t num_gpu;
|
||||
int64_t temperature;
|
||||
std::string serv_host("localhost");
|
||||
@@ -61,27 +61,39 @@ int main(int argc, char** argv) {
|
||||
std::cout << "Attempting to create channel to " << serv_host << ":" <<
|
||||
serv_port << std::endl;
|
||||
|
||||
ret = rdc_channel_create(&server, serv_host.c_str(), serv_port.c_str(),
|
||||
ret = rdc_channel_create(&server_ch, serv_host.c_str(), serv_port.c_str(),
|
||||
false);
|
||||
CHK_RET_STATUS(ret)
|
||||
std::cout << "Successfully created channel" << std::endl;
|
||||
|
||||
std::cout << "Getting number of gpus at server..." << std::endl;
|
||||
ret = rdc_num_gpus_get(server, &num_gpu);
|
||||
grpc_connectivity_state ch_state;
|
||||
ret = rdc_channel_state_get(server_ch, true, &ch_state);
|
||||
CHK_RET_STATUS(ret)
|
||||
std::cout << "Number of GPUs at server is " << num_gpu << std::endl;
|
||||
std::cout << "Current channel state is " << ch_state << std::endl;
|
||||
|
||||
std::cout << "Verifying connection to server..." << std::endl;
|
||||
ret = rdc_channel_connection_verify(server_ch);
|
||||
CHK_RET_STATUS(ret)
|
||||
if (ret == RDC_STATUS_SUCCESS) {
|
||||
std::cout << "Verified connection to server." << std::endl;
|
||||
}
|
||||
std::cout << "Getting number of gpus at server..." << std::endl;
|
||||
ret = rdc_num_gpus_get(server_ch, &num_gpu);
|
||||
CHK_RET_STATUS(ret)
|
||||
std::cout << "Number of GPUs at server is " << server_ch <<
|
||||
num_gpu << std::endl;
|
||||
|
||||
for (uint32_t dv_ind = 0; dv_ind < num_gpu; ++dv_ind) {
|
||||
std::cout << "Info for Device " << dv_ind << ":" << std::endl;
|
||||
std::cout << "\tGetting temperature..." << std::endl;
|
||||
ret = rdc_dev_temp_metric_get(server, dv_ind, RSMI_TEMP_TYPE_JUNCTION,
|
||||
ret = rdc_dev_temp_metric_get(server_ch, dv_ind, RSMI_TEMP_TYPE_JUNCTION,
|
||||
RSMI_TEMP_CURRENT, &temperature);
|
||||
CHK_RET_STATUS_CONT(ret)
|
||||
std::cout << "\t GPU " << dv_ind << " has a temperature of " <<
|
||||
temperature << std::endl;
|
||||
}
|
||||
|
||||
ret = rdc_channel_destroy(server);
|
||||
ret = rdc_channel_destroy(server_ch);
|
||||
CHK_RET_STATUS(ret)
|
||||
std::cout << "Successfully destroyed channel to " << serv_host << ":" <<
|
||||
serv_port << std::endl;
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user