From ba14edbb4dbddaa29f195e39e879fa03bc8d9420 Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Sun, 22 Dec 2019 20:30:58 -0600 Subject: [PATCH] Break srvs. into rsmi & admin srvs. Add VerifyConnection api. Change-Id: I67567264c37e31f3409062a14e56eba4801cd944 [ROCm/rdc commit: dc6f6f3e9a30f699e12d6ecb3649384a5912a767] --- projects/rdc/CMakeLists.txt | 4 +- projects/rdc/client/CMakeLists.txt | 25 +++---- projects/rdc/client/include/rdc/rdc_client.h | 62 +++++++++++++++++- .../rdc/{rdc_main.h => rdc_client_main.h} | 16 +++-- projects/rdc/client/src/rdc_client.cc | 50 ++++++++++++-- .../src/{rdc_main.cc => rdc_client_main.cc} | 18 +++-- projects/rdc/common/rdc_utils.cc | 2 +- projects/rdc/docs/RDC_Manual.pdf | Bin 68099 -> 68078 bytes projects/rdc/protos/rdc.proto | 44 ++++++++----- projects/rdc/server/CMakeLists.txt | 3 +- .../server/include/rdc/rdc_admin_service.h | 39 +++++++++++ .../rdc/server/include/rdc/rdc_rsmi_service.h | 4 -- .../rdc/{rdc_main.h => rdc_server_main.h} | 16 +++-- projects/rdc/server/rdc.service | 2 +- projects/rdc/server/src/rdc_admin_service.cc | 49 ++++++++++++++ projects/rdc/server/src/rdc_rsmi_service.cc | 9 --- .../src/{rdc_main.cc => rdc_server_main.cc} | 15 ++++- projects/rdc/tests/example/CMakeLists.txt | 11 ++-- projects/rdc/tests/example/rdc_client_test.cc | 34 ++++++---- 19 files changed, 316 insertions(+), 87 deletions(-) rename projects/rdc/client/include/rdc/{rdc_main.h => rdc_client_main.h} (76%) rename projects/rdc/client/src/{rdc_main.cc => rdc_client_main.cc} (84%) create mode 100755 projects/rdc/server/include/rdc/rdc_admin_service.h rename projects/rdc/server/include/rdc/{rdc_main.h => rdc_server_main.h} (80%) create mode 100755 projects/rdc/server/src/rdc_admin_service.cc rename projects/rdc/server/src/{rdc_main.cc => rdc_server_main.cc} (95%) diff --git a/projects/rdc/CMakeLists.txt b/projects/rdc/CMakeLists.txt index 362f34e00b..b496826896 100755 --- a/projects/rdc/CMakeLists.txt +++ b/projects/rdc/CMakeLists.txt @@ -25,8 +25,8 @@ cmake_minimum_required(VERSION 3.5.0) # ROCM_DIR should be passed in via command line; these will be used # in sub-projects -set(RSMI_INC_DIR ${ROCM_DIR}/include) -set(RSMI_LIB_DIR ${ROCM_DIR}/lib) +set(RSMI_INC_DIR ${ROCM_DIR}/rocm_smi/include) +set(RSMI_LIB_DIR ${ROCM_DIR}/rocm_smi/lib) ## Set default module path if(NOT DEFINED CMAKE_MODULE_PATH) diff --git a/projects/rdc/client/CMakeLists.txt b/projects/rdc/client/CMakeLists.txt index 41f855f2d2..44db3b46ca 100755 --- a/projects/rdc/client/CMakeLists.txt +++ b/projects/rdc/client/CMakeLists.txt @@ -67,7 +67,7 @@ set(CLIENT_LIB "rdc_client") set(RDC "rdc") set(CLIENT_LIB_COMPONENT "lib${CLIENT_LIB}") set(SRC_DIR "${PROJECT_SOURCE_DIR}/client/src") -set(INC_DIR "${PROJECT_SOURCE_DIR}/client/include/rdc") +set(RDC_CLIENT_INC_DIR "${PROJECT_SOURCE_DIR}/client/include/rdc") ################# Determine the library version ######################### ## Setup the SO version based on git tags. @@ -119,27 +119,30 @@ set(CMAKE_VERBOSE_MAKEFILE on) file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h") file(GLOB PROTOBUF_GENERATED_SRCS "${PROTOB_OUT_DIR}/*.cc") -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include" - "${PROJECT_SOURCE_DIR}" - "${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}") set(CLIENT_LIB_SRC_LIST "${SRC_DIR}/rdc_client.cc") -set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${SRC_DIR}/rdc_main.cc") +set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${SRC_DIR}/rdc_client_main.cc") set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${PROTOBUF_GENERATED_SRCS}") set(CLIENT_LIB_SRC_LIST ${CLIENT_LIB_SRC_LIST} "${PROJECT_SOURCE_DIR}/common/rdc_utils.cc") message("CLIENT_LIB_SRC_LIST=${CLIENT_LIB_SRC_LIST}") -set(CLIENT_LIB_INC_LIST "${INC_DIR}/rdc_client.h") -set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} "${INC_DIR}/rdc_exception.h") -set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} "${INC_DIR}/rdc_main.h") +set(CLIENT_LIB_INC_LIST "${RDC_CLIENT_INC_DIR}/rdc_client.h") set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} - "${PROJECT_SOURCE_DIR}/common/rdc_utils.h") + "${RDC_CLIENT_INC_DIR}/rdc_exception.h") +set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} + "${RDC_CLIENT_INC_DIR}/rdc_client_main.h") +set(CLIENT_LIB_INC_LIST ${CLIENT_LIB_INC_LIST} + "${PROJECT_SOURCE_DIR}/common/rdc_utils.h") add_library(${CLIENT_LIB} SHARED ${CLIENT_LIB_SRC_LIST} ${CLIENT_LIB_INC_LIST}) target_link_libraries(${CLIENT_LIB} pthread rt grpc grpc++ grpc++_reflection dl protobuf) -target_include_directories(${CLIENT_LIB} PUBLIC ${INC_DIR}) - +target_include_directories(${CLIENT_LIB} PRIVATE + "${PROJECT_SOURCE_DIR}" + "${PROJECT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${PROTOB_OUT_DIR}" + "${RSMI_INC_DIR}") # TODO: set the properties for the library once we have one ## Set the VERSION and SOVERSION values set_property(TARGET ${CLIENT_LIB} PROPERTY diff --git a/projects/rdc/client/include/rdc/rdc_client.h b/projects/rdc/client/include/rdc/rdc_client.h index d05f73d99a..981c249a29 100755 --- a/projects/rdc/client/include/rdc/rdc_client.h +++ b/projects/rdc/client/include/rdc/rdc_client.h @@ -24,6 +24,8 @@ THE SOFTWARE. #ifndef CLIENT_INCLUDE_RDC_RDC_CLIENT_H_ #define CLIENT_INCLUDE_RDC_RDC_CLIENT_H_ +#include + #include #include #include "rocm_smi/rocm_smi.h" @@ -192,6 +194,64 @@ typedef uintptr_t rdc_channel_t; #define RDC_DEFAULT_SERVER_PORT 50051 #define RDC_DEFAULT_SERVER_IP "localhost" +/*****************************************************************************/ +/** @defgroup RDCAdmin RDC Administration Functions + * These administrative functions are used to monitor and control, for + * example RDC connectivity. + * @{ + */ + +/** + * @brief Check the connection status of a channel + * + * @details Given an ::rdc_channel_t @p channel and a boolean @p + * try_to_connect, this function will return the grpc_connectivity_state for + * that channel + * + * @p channel[in] The channel for which the status will be given + * + * @param[in] try_to_connect If the channel is currently IDLE, if the argument + * is true, transition to CONNECTING. + * + * @param[inout] state A pointer to caller provided memory to which an + * the grpc_connectivity_state will be written. grpc_connectivity_state has + * the following possible values: + * GRPC_CHANNEL_IDLE channel is idle + * GRPC_CHANNEL_CONNECTING channel is connecting + * GRPC_CHANNEL_READY channel is ready for work + * GRPC_CHANNEL_TRANSIENT_FAILURE channel has seen a failure but expects to + * recover + * GRPC_CHANNEL_SHUTDOWN channel has seen a failure that it cannot + * recover from + * + * @retval ::RDC_STATUS_SUCCESS is returned upon successful call. + * + */ +rdc_status_t +rdc_channel_state_get(rdc_channel_t channel, bool try_to_connect, + grpc_connectivity_state *state); + + +/** + * @brief Verify a channel's connection to the server + * + * @details Given an ::rdc_channel_t @p channel, this function will send a + * random number to the server associated with @p channel. The server will send + * the number back. Upon receiving the returned message from the server, the + * number sent to the server is compared to the number received from the + * server. If the 2 numbers are the same, the connection is verified. + * Otherwise, an appropriate error code is returned. + * + * @p channel[in] The channel for which the connection will be verified + * + * @retval ::RDC_STATUS_SUCCESS is returned upon successful call. + * + */ +rdc_status_t +rdc_channel_connection_verify(rdc_channel_t channel); + +/** @} */ // end of RDCAdmin + /*****************************************************************************/ /** @defgroup InitShutAdmin Initialization and Shutdown * These functions are used for initialization of RDC and clean up when @@ -216,7 +276,7 @@ typedef uintptr_t rdc_channel_t; * * @param[in] port A pointer to string containing the port on which the * RDC server is listening - * + * * @param[in] secure A bool indicating whether SSL should be used for * communications (not currently supported) * diff --git a/projects/rdc/client/include/rdc/rdc_main.h b/projects/rdc/client/include/rdc/rdc_client_main.h similarity index 76% rename from projects/rdc/client/include/rdc/rdc_main.h rename to projects/rdc/client/include/rdc/rdc_client_main.h index 8c2fa88b3b..1b7860b46c 100755 --- a/projects/rdc/client/include/rdc/rdc_main.h +++ b/projects/rdc/client/include/rdc/rdc_client_main.h @@ -21,8 +21,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef CLIENT_INCLUDE_RDC_RDC_MAIN_H_ -#define CLIENT_INCLUDE_RDC_RDC_MAIN_H_ +#ifndef CLIENT_INCLUDE_RDC_RDC_CLIENT_MAIN_H_ +#define CLIENT_INCLUDE_RDC_RDC_CLIENT_MAIN_H_ #include #include @@ -48,15 +48,21 @@ class RDCChannel { std::string server_ip(void) const {return server_ip_;} std::string server_port(void) const {return server_port_;} bool secure_channel(void) const {return secure_channel_;} - std::shared_ptr<::rdc::Rsmi::Stub> stub(void) const {return stub_;} + std::shared_ptr<::rdc::Rsmi::Stub> rsmi_stub(void) const {return rsmi_stub_;} + std::shared_ptr<::rdc::RdcAdmin::Stub> rdc_admin_stub(void) const { + return rdc_admin_stub_;} + std::shared_ptr const channel(void) {return channel_;} + private: std::string server_ip_; std::string server_port_; bool secure_channel_; - std::shared_ptr<::rdc::Rsmi::Stub> stub_; + std::shared_ptr<::rdc::Rsmi::Stub> rsmi_stub_; + std::shared_ptr<::rdc::RdcAdmin::Stub> rdc_admin_stub_; + std::shared_ptr channel_; }; } // namespace rdc } // namespace amd -#endif // CLIENT_INCLUDE_RDC_RDC_MAIN_H_ +#endif // CLIENT_INCLUDE_RDC_RDC_CLIENT_MAIN_H_ diff --git a/projects/rdc/client/src/rdc_client.cc b/projects/rdc/client/src/rdc_client.cc index 643534922c..b3ab41efd2 100755 --- a/projects/rdc/client/src/rdc_client.cc +++ b/projects/rdc/client/src/rdc_client.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2019 - Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -22,10 +22,11 @@ THE SOFTWARE. #include +#include #include #include -#include "rdc/rdc_main.h" +#include "rdc/rdc_client_main.h" #include "rdc/rdc_client.h" #include "common/rdc_utils.h" #include "rdc/rdc_exception.h" @@ -113,6 +114,45 @@ rdc_channel_create(rdc_channel_t *channel, const char *ip, CATCH } +rdc_status_t +rdc_channel_state_get(rdc_channel_t channel, bool try_to_connect, + grpc_connectivity_state *state) { + TRY + CHK_PTR_ARG(state) + UINTPTR_TO_RDC_CHAN(channel) + + *state = ch->channel()->GetState(try_to_connect); + return RDC_STATUS_SUCCESS; + + CATCH +} + +rdc_status_t +rdc_channel_connection_verify(rdc_channel_t channel) { + TRY + UINTPTR_TO_RDC_CHAN(channel) + + ::rdc::VerifyConnectionResponse resp; + ::rdc::VerifyConnectionRequest req; + ::grpc::ClientContext context; + unsigned int seed = time(NULL); + + req.set_magic_num(static_cast(rand_r(&seed))); + ::grpc::Status status = + ch->rdc_admin_stub()->VerifyConnection(&context, req, &resp); + + if (!status.ok()) { + return amd::rdc::GrpcErrorToRdcError(status.error_code()); + } + + if (resp.echo_magic_num() != req.magic_num()) { + return RDC_STATUS_GRPC_DATA_LOSS; + } + + return RDC_STATUS_SUCCESS; + + CATCH +} rdc_status_t rdc_channel_destroy(rdc_channel_t channel) { @@ -135,7 +175,8 @@ rdc_num_gpus_get(rdc_channel_t channel, uint64_t *num_gpu) { ::rdc::GetNumDevicesResponse resp; ::rdc::GetNumDevicesRequest empty; ::grpc::ClientContext context; - ::grpc::Status status = ch->stub()->GetNumDevices(&context, empty, &resp); + ::grpc::Status status = + ch->rsmi_stub()->GetNumDevices(&context, empty, &resp); if (!status.ok()) { return amd::rdc::GrpcErrorToRdcError(status.error_code()); @@ -171,7 +212,8 @@ rdc_dev_temp_metric_get(rdc_channel_t channel, uint32_t dv_ind, in_args.set_dv_ind(dv_ind); in_args.set_sensor_type(sensor_type); - ::grpc::Status status = ch->stub()->GetTemperature(&context, in_args, &resp); + ::grpc::Status status = + ch->rsmi_stub()->GetTemperature(&context, in_args, &resp); if (!status.ok()) { return ::amd::rdc::GrpcErrorToRdcError(status.error_code()); diff --git a/projects/rdc/client/src/rdc_main.cc b/projects/rdc/client/src/rdc_client_main.cc similarity index 84% rename from projects/rdc/client/src/rdc_main.cc rename to projects/rdc/client/src/rdc_client_main.cc index 0a4164a616..a40277f478 100755 --- a/projects/rdc/client/src/rdc_main.cc +++ b/projects/rdc/client/src/rdc_client_main.cc @@ -27,7 +27,7 @@ THE SOFTWARE. #include #include "rdc.grpc.pb.h" // NOLINT -#include "rdc/rdc_main.h" +#include "rdc/rdc_client_main.h" #include "rdc/rdc_client.h" namespace amd { @@ -48,21 +48,25 @@ RDCChannel::Initialize(void) { std::string addr_str = server_ip() + ":"; addr_str += server_port(); - std::shared_ptr channel; - if (secure_channel_) { // Not yet supported return RDC_STATUS_GRPC_UNIMPLEMENTED; } else { - channel = ::grpc::CreateChannel(addr_str, + channel_ = ::grpc::CreateChannel(addr_str, grpc::InsecureChannelCredentials()); } - stub_ = ::rdc::Rsmi::NewStub(channel); - - if (stub_ == nullptr) { + rsmi_stub_ = ::rdc::Rsmi::NewStub(channel_); + if (rsmi_stub_ == nullptr) { return RDC_STATUS_GRPC_RESOURCE_EXHAUSTED; } + + rdc_admin_stub_ = ::rdc::RdcAdmin::NewStub(channel_); + if (rdc_admin_stub_ == nullptr) { + return RDC_STATUS_GRPC_RESOURCE_EXHAUSTED; + } + + // Test to see if we can connect to server; if not, return err. return RDC_STATUS_SUCCESS; } diff --git a/projects/rdc/common/rdc_utils.cc b/projects/rdc/common/rdc_utils.cc index aeb3dc0679..051e61dd34 100755 --- a/projects/rdc/common/rdc_utils.cc +++ b/projects/rdc/common/rdc_utils.cc @@ -34,7 +34,7 @@ rdc_status_t GrpcErrorToRdcError(grpc::StatusCode grpc_err) { uint32_t rdc_grpc_base_int = static_cast(RDC_STATUS_GRPC_ERR_FIRST); uint32_t rdc_err_int = grpc_err_int + rdc_grpc_base_int; - + return static_cast(rdc_err_int); } diff --git a/projects/rdc/docs/RDC_Manual.pdf b/projects/rdc/docs/RDC_Manual.pdf index 656627a628a95b5b2945ce42bd3a84c6fd664134..c86885a51d11e52739a4fddf1b884245d171f0b3 100644 GIT binary patch delta 3339 zcmV+m4fOJZlmzaR1h8lO2r@G;HZwRfF_Ulm8wfHpFg7zdGBJ~q`^N$_Fp~iRD1Yr* z+j84R5`E`aOx3=an-b;@%xtM#rHhq#BPos~JN7Cn6-7c4W0GJ9Fmd+l_w?XG5E2#2 z%I?#a1OgoNbe}%mJr{tI31*oVEN0SamM|p^lY(g}nPb{&hVMosOloEY2{8+48MD~9 zo)Ro^lF5WQWf=aQ7ffm9ohO}ApnsP_CuuBG(4jn}98@NXX$38@VX%;hq^Id@VHoBU zFPRR2a2wrR0$iR+4||%<-Z3MY5& z(I2Liff5Tn#u;Xz#{wffRyF}C&=Zr>Kpxw;XQ40lo>}NiU^GVm#90=jzkh>IG5Wg% zxM9bmU#!6Zz>IAI9q>%sxW=tX!OF+8^u}KVw;3^^F z;iLn!7!=2vlF5~Bn zf16!oj32UB!~4O!NEeapv)8YCuYT4^j3fD32qEd?XJG`L&*)=7A0zrm=_8|$SzGF` zkt&+2`rbj)Z`KHeuZy0{U)ZIfg7Y>~M6QvM^H)7+S)e<`fTSw} zE3+Rz{R=^j9KxC;G7?Ce*>Wd$U;vb(b_q5fdG&;dYOoFK>$Q{dhe4+~fP% zVw5g|esRUW<8S%Fr3_yP&=4-fK0*;#uc8EFg7hwu82Vx%BO@fF_Sn<8zKW`J&kd^% z^;)JN4KV5@AyyAUto~Oa*7AXA|Dq79GQ_GZ6y=@>_+t^q6o18J6JhOav-y0U=Y{t*|`?I%nyt*%ndHxU1^K|+f!k;bfcz=}u z&jjU7l+g4iMU)^xFb767^(hg>Nd**2n9 zN7lP-#*x)j!H=-`(e@={iV@wy)-Rj+wBJ5p{*i#`NPj%W*dwAQjwDFgMpXI;Yszgz zV<#dkZMG6c$c)a)Eg`lNHK=JR)mEZ0A}zPmDlvzutwgo4 zQS7z?prwlvx0O~b*pZD}X;qjT{C2z|RU+(kwh@&YSf$xU)MMB%@x}sg76 zV;?E}_J4ptn;lnQQ>kt-AY5t;W0)E*CAoltzQoWU(bS2Z3|*lm#`XV?|H=V&E%uau zc4$n$bXeNwR!58+@Ci1Bv>|LAF*c}lFTsYBzAM^zLPxkq4+Wqn$fBKOYq_Rhb?vbc z4{cRD*(Y5M4%$#xy6JxVOfqy$2g%fObavQz)_?cv0~6$0zks*ug99v?;DM**I|_s} z>TqaD%R9enqqCx2biCD3>meQWfn_7+DiLh09T*GfUW_8GagdI1kJ_=Vrt5H@o)}be zb-(&LNle*RwKuqH9E7XZE#%snWP&d0*YcIlfuHJLeFT15{pgx-H5`OTMf6BTG+c(I z;eU!nNH`_d7^ClmzP@h%L7uXj_!IP;DJe51U{p;eizbgm8F3++3=cX5R@G6v6xd|frSW9hQIVrP0Q}0bt3l9VV&4$i041VZ zfSYT=KypHav#Wy89ah5qx*rYtRiFCMntz_7mFF^CWpb!SgA^?YO+#D-K78p$F&q|o zvRaSQ6nL2C!^L=BWQ%f2JRMAN=ls*ryZsN}9GslIl>#>=gFCEOONsrkFx}CL?I=sn zE3xZKNHO2bhqNw@JtPkX^Y7B}-F-nNig`IL@9jvs`L-BL#>2hY-6TZ|8qSM!`hNjD z^!UfBm(dbR?gtB+LtkM7lMZO#GD-8iU*b3#qjw_PYv4a~>%)%+7w10(x01)bl9f{u zivJp?oZRTtV6n)4zt6JI3%a}JmVS;~pH6=I{{27U*7>JRZdtbDEjFCW(xXeY@(WIW zonOX3?bjr|U3bYo$EE!@KYn+1_)3hronharT< z2e;}+9^Vfvyx~XT0a2M>7yNybVXfRMvuc%2 zB~5ewEB|$w73t_^Le*D(30Eb9OaA*hf9_eM=lQ48kEe%i{+_5wthaWQvy?tqGh5vv zPp*?E*U8fx$aA*Eozk1Q`!=vg6?)p#D6#l4uz0f=3_qtu;H0_^V}CEd!PYP>LLF1> z)zyqL4!3Xl?HK>Rf%#T7ui~{9+?x~%!b)$Ji7yfJmf`4m-vcwy>2&}*hX5)&iKtnZ#)uS2KRpp3?QTk!LSQ4*7hH`77 zhOj&Sk$KsU9P@fDcu24I5V^|wx^7kl^GmB_=2ZlW73cJlb*+TZ;H%uYdRQi{7b4V% z;cb7Ar{TfEgB8?TRnZH}@SyK#yvPemWNb`({CH4*K}q~H9)A`0IlTgXv_zk>-C5>z zOq;H0V_T-_OKn;|-*jgw(lH%(P16|InAR`6H07y$(x=LA)tiL!abst6_+eR0Fbd`sI#xj?boF&< zNH+b(7~5F&mT`|&@BFGK{cdd#pxGGEUL9t`o%3R_DEjo{6T(uzJ^eeqK2_^&)TGOn z>FE0UKAWc3!&?mF_v6WR`M&6SF&bWvvSEH*z7|4w->Z%JoPs4jFpcH-jjY&|sYJ?r*yNxhrMYmD+>8B{2M$@O?rF0r?pMIp$!McWH zP524Oai4zX(ovuE={GN(Mu*0@+vxlBYnb&$_@=um6(Gf8Gvc_vF1#7B0-tf0B?X&t zmnG1tyMKVertabenWozpc+6&k2}4_UGg>fdx?F%r)5QfIO|sEm(q9v8%#}s$N@cF} z%asY}%9Qissf>P1Cx#^Y5z8Y>>4O0lve{j)99xW2x&Hun0ofRr5eNZn zQ7|$>G&MFuHZU+lL^d@>GDJ2sLq;<)LozitG(|EnL_Q!qFfu|kH8w*wFfc!Q6h+ZcU%z6_K`Ue}*W#@OPN4$r&}Jp&SX1bLPUwexeSL|w3%a2PPM{b1Ub@r1G6v(^KcADP=W;VEj@dj5{uv)&$~_N|XX)B#+;C0xNZ+`ui=;12GspHJD@RR5Q& V8ZMXH2>~7oH90v7B_%~qMhYU`bA$i@ delta 3386 zcmV-A4aM^AlLUj51h8lO2r)D=GBGkVH_C zR(7AZBoN@Br~CBj?zsSzOt6q?!6GKDW-(LJFe#Xpl3Auh&G6lbm`Tk-K|(Bqw2W9} zY)=UmTghb1tTGJ$)(NIGbJmegDSyyQp_4QrQ_!Itq%2g%ifIKckzue9i@2xh9KtZn z$4)Zs0pT{f*%-JSlMePYotoLlvVREuEqsd5 z-^RcVI}ZIK4F&*~JQM&C5z9~##W=^N7lB;^#sE>rU==2f1Sl|LVxd?J+8v0EBR~XK zF%b_ZEuclfYZVI}^y4y8q3cCZ9a-=aLKVJ*j>Qtu2%%a7k4C%EL<);TOL1m#46ulE zY#qo~P)+(nW~CB6Sdg%7@qe!*%pk-DF@Oad34Bl!Zo-0f-~thI0`5D7OK=8_Kp*z@ z_Zh!r{G045WBi!C8cqg_B3TBq&tAXoz4}?l5svt0A%vulpM?>0zMzi*eT?WMp^uC{ z=549tMyhD8>U&2`zj-6zzb@NSSB+x-F{O{Z--Uf0qf9OsKS_%l!hgNL-{V(*EE2}w z4aP~2A7%3*nHM>8B-i5?NuI5i!z4!y`G&J(lnxHFFYH=S!MQL{M6Qv6bvHd|S)x0| zfTSw}YqK9e{R=@2EW#ScG7w0c`D!}7sU=Tfo)P2|>aJ;!A|^Uk{p}vVT-_D^`)NA= z+~bGYa+EB+esROUbU_ZM&Jcr_`CMgC9D^JMlL!k;b2 zyuVI>XM%Djh-vzhB8ZV7m;(cv`jiMFrJj#zoEDSST`l!^fQzvyqzz%~h_FGWdoebg^j*=$6FU4odME%rUKZ^nTgx^5 zs%wXhxNoc4$v)|7aL|Uj(oOfQTsznTHd*J8=V#HqNAOTS`X={4=fuoR|#)x{VzxMa( zi9sb-_p7ht*pzKmdxN{iLAYw<Q%#_OVfEnn&E`Kj*Jhv%o&kFE(+AL(<-& zo(k!CC3bx=Ddq?Hkk+M@Z-_dmXgF06x2W&=Lrqii}I zEP29{Usr=Ezvp8<;gdfWlVr|Q{+UntjL-ReHM>iec{-l+j4${iS*F>DV}Dyt&p*;( zHpQ+!pAPZ~FZhbD<|7CXvt`1+@IN+4jh{p6+4<%1`%e|AmjjS`m`z7pNJR?nX?lkt zgvJQzS}CJcn^ym|p%vSgEwsuuuc205UcUjKSDwug=o^D6A}5ce<#aqBUq2j32)}ZY?RK&U@*PEPhf{0BV8+C#bnzQq#;|%c!@8;*5iv?Xj2DZeb;wX| zP1F!}%Re$F+mT~VZv+qN)gB_(Szp)9x?p~3mCT}wK(XeWUb1eK5E@*S8#fQjr1e6C z8Zo>b4)Vl5Sa`64+NdgeVd)?AoutdWphU*Tw8u{e^%s=HPk-sCnB?>d^wAQ1%64a& z(=lzjrcKx~Ow>Mi2|tKRulPx|e~AV9M*puIZIhI^OAU|IC($0vlPetY_NdVQ)k+o(yG zEwj<>?IfEex5Il3rTHjTlere)a;iu^=Ejnmxshh4xdE#Ga0lw(dk59bHbtCK;*KLHcP;?u0 zpMIm#X*7NM!Ahqw?9;DTI#@SwYzRLAIqK8zUpngJKK&S`(`eBcbsJruekQZo2;X#9 zr2?c#Y=1`_^*4pLBUa!u>awI@GwQMgI&~K?*wkITAk%dF0*~2FFkxuRZbu6yO_vK0 zX}Y+;qlvfLi~Ad*jk&VgZK=$be%UesZJBaTJeARp>BLw?zixSCjeRu03O5_~%0Z`j zLtl>?b-h<4I8_e@GBz_tHZ(ahMK(e?K3xhgOl59obZ8(lGcb|? zDSsW#S2Dw47=`h3{z?)hLG%_xl<2)hi4raBfhDm3GZtVCHsRW}dsoH`dET4fJoBCR z;*+GmL`r1+mPF;D28INhpv6)yPMYBi&LIV@jrA+m475Yqay{PK;1sIR2_05ai8TjZ z&<%r-ZLBY`_CPQ6!3p%k04%|fm8r(@Fn^4|D2%}&j6(q?U=pUF2-8r48JLARn1^FH zf-)??A}qrStU?9WU>!DK)5^Zb*S4(kYt*(?OGWKi4^L6M*7JANp7r(^wQqgoqYmH# zF5wEU;RbG@4tH>G{d~$!7xfR9(;7kwWo~41baG{3Z3<;>WN%_>3UhQ}a&&ldWtZRy Q0R##*F*yn)B}Gq03Qyc})c^nh diff --git a/projects/rdc/protos/rdc.proto b/projects/rdc/protos/rdc.proto index a857ecfd93..ca9a2ea70d 100755 --- a/projects/rdc/protos/rdc.proto +++ b/projects/rdc/protos/rdc.proto @@ -27,7 +27,18 @@ syntax = "proto3"; // option objc_class_prefix = "HLW"; package rdc; - + +/****************************************************************************/ +/********************************** Rsmi Service ****************************/ +/****************************************************************************/ +service Rsmi { + // RSMI ID services + rpc GetNumDevices (GetNumDevicesRequest) returns(GetNumDevicesResponse) {} + + // RSMI Physical Queries + rpc GetTemperature(GetTemperatureRequest) returns(GetTemperatureResponse){} +} + // rsmi_num_monitor_devices() message GetNumDevicesRequest { } @@ -36,14 +47,6 @@ message GetNumDevicesResponse { uint64 ret_val = 2; } -/* GetNumDevices */ -message VerifyConnectionRequest { - string name = 1; -} -message VerifyConnectionResponse { - string message = 1; -} - /* GetTemperature */ message GetTemperatureRequest { uint32 dv_ind = 1; @@ -71,15 +74,20 @@ message GetTemperatureResponse { uint64 ret_val = 2; } -// The greeting service definition. -service Rsmi { +/****************************************************************************/ +/********************************** RdcAdmin Service ************************/ +/****************************************************************************/ +service RdcAdmin { // RDC admin services - rpc VerifyConnection (VerifyConnectionRequest) returns (VerifyConnectionResponse) {} - - // RSMI ID services - rpc GetNumDevices (GetNumDevicesRequest) returns(GetNumDevicesResponse) {} - - // RSMI Physical Queries - rpc GetTemperature(GetTemperatureRequest) returns(GetTemperatureResponse) {} + rpc VerifyConnection (VerifyConnectionRequest) + returns (VerifyConnectionResponse) {} +} + +/* GetNumDevices */ +message VerifyConnectionRequest { + uint64 magic_num = 1; +} +message VerifyConnectionResponse { + uint64 echo_magic_num = 1; } diff --git a/projects/rdc/server/CMakeLists.txt b/projects/rdc/server/CMakeLists.txt index 10c94fecce..221bdfe840 100755 --- a/projects/rdc/server/CMakeLists.txt +++ b/projects/rdc/server/CMakeLists.txt @@ -71,7 +71,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include "${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}") set(SERVER_SRC_LIST "${SRC_DIR}/rdc_rsmi_service.cc") -set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_main.cc") +set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_admin_service.cc") +set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${SRC_DIR}/rdc_server_main.cc") set(SERVER_SRC_LIST ${SERVER_SRC_LIST} "${PROTOBUF_GENERATED_SRCS}") message("SERVER_SRC_LIST=${SERVER_SRC_LIST}") diff --git a/projects/rdc/server/include/rdc/rdc_admin_service.h b/projects/rdc/server/include/rdc/rdc_admin_service.h new file mode 100755 index 0000000000..c0f8d0c1ce --- /dev/null +++ b/projects/rdc/server/include/rdc/rdc_admin_service.h @@ -0,0 +1,39 @@ +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#ifndef SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_ +#define SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_ + +#include "rdc.grpc.pb.h" // NOLINT +#include "rocm_smi/rocm_smi.h" +#include "rdc/rdc_admin_service.h" + +class RDCAdminServiceImpl final : public ::rdc::RdcAdmin::Service { + public: + RDCAdminServiceImpl(); + ~RDCAdminServiceImpl(); + ::grpc::Status VerifyConnection(::grpc::ServerContext* context, + const rdc::VerifyConnectionRequest* request, + rdc::VerifyConnectionResponse* reply) override; + private: +}; + +#endif // SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_ diff --git a/projects/rdc/server/include/rdc/rdc_rsmi_service.h b/projects/rdc/server/include/rdc/rdc_rsmi_service.h index c5b8a2215d..a420cfe311 100755 --- a/projects/rdc/server/include/rdc/rdc_rsmi_service.h +++ b/projects/rdc/server/include/rdc/rdc_rsmi_service.h @@ -33,10 +33,6 @@ class RsmiServiceImpl final : public ::rdc::Rsmi::Service { rsmi_status_t Initialize(uint64_t rsmi_init_flags = 0); - ::grpc::Status VerifyConnection(::grpc::ServerContext* context, - const rdc::VerifyConnectionRequest* request, - rdc::VerifyConnectionResponse* reply) override; - ::grpc::Status GetNumDevices(::grpc::ServerContext* context, const ::rdc::GetNumDevicesRequest* request, diff --git a/projects/rdc/server/include/rdc/rdc_main.h b/projects/rdc/server/include/rdc/rdc_server_main.h similarity index 80% rename from projects/rdc/server/include/rdc/rdc_main.h rename to projects/rdc/server/include/rdc/rdc_server_main.h index 98c4d347c7..5af7a32e0b 100755 --- a/projects/rdc/server/include/rdc/rdc_main.h +++ b/projects/rdc/server/include/rdc/rdc_server_main.h @@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef SERVER_INCLUDE_RDC_RDC_MAIN_H_ -#define SERVER_INCLUDE_RDC_RDC_MAIN_H_ +#ifndef SERVER_INCLUDE_RDC_RDC_SERVER_MAIN_H_ +#define SERVER_INCLUDE_RDC_RDC_SERVER_MAIN_H_ #include @@ -28,6 +28,7 @@ THE SOFTWARE. #include #include "rdc/rdc_rsmi_service.h" +#include "rdc/rdc_admin_service.h" class RDCServer { public: @@ -41,15 +42,22 @@ class RDCServer { bool start_rsmi_service(void) const {return start_rsmi_service_;} void set_start_rsmi_service(bool s) {start_rsmi_service_ = s;} + bool start_rdc_admin_service(void) const {return start_rdc_admin_service_;} + void set_start_rdc_admin_service(bool s) {start_rdc_admin_service_ = s;} + void ShutDown(void); private: void HandleSignal(int sig); std::string server_address_; - bool start_rsmi_service_; std::unique_ptr<::grpc::Server> server_; + bool start_rsmi_service_; RsmiServiceImpl *rsmi_service_; + + bool start_rdc_admin_service_; + RDCAdminServiceImpl *rdc_admin_service_; }; -#endif // SERVER_INCLUDE_RDC_RDC_MAIN_H_ +#endif // SERVER_INCLUDE_RDC_RDC_SERVER_MAIN_H_ + diff --git a/projects/rdc/server/rdc.service b/projects/rdc/server/rdc.service index 218a7c4356..713f8d10c8 100755 --- a/projects/rdc/server/rdc.service +++ b/projects/rdc/server/rdc.service @@ -7,7 +7,7 @@ Description=Radeon Data Center Daemon (rdcd) After=network.target # Add any services that must be started before rdcd here -#After= +#After= # Add any non-service units required by rdcd here #Requires= diff --git a/projects/rdc/server/src/rdc_admin_service.cc b/projects/rdc/server/src/rdc_admin_service.cc new file mode 100755 index 0000000000..af4e3aa917 --- /dev/null +++ b/projects/rdc/server/src/rdc_admin_service.cc @@ -0,0 +1,49 @@ + +/* +Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "rdc.grpc.pb.h" // NOLINT +#include "rdc/rdc_admin_service.h" + +RDCAdminServiceImpl::RDCAdminServiceImpl() { +} + +RDCAdminServiceImpl::~RDCAdminServiceImpl() { +} +::grpc::Status +RDCAdminServiceImpl::VerifyConnection(::grpc::ServerContext* context, + const rdc::VerifyConnectionRequest* request, + rdc::VerifyConnectionResponse* reply) { + (void)context; // Quiet warning for now + + reply->set_echo_magic_num(request->magic_num()); + return ::grpc::Status::OK; +} diff --git a/projects/rdc/server/src/rdc_rsmi_service.cc b/projects/rdc/server/src/rdc_rsmi_service.cc index 58c2200daa..1c75a22ee5 100755 --- a/projects/rdc/server/src/rdc_rsmi_service.cc +++ b/projects/rdc/server/src/rdc_rsmi_service.cc @@ -61,15 +61,6 @@ RsmiServiceImpl::Initialize(uint64_t rsmi_init_flags) { } return rsmi_ret; } -::grpc::Status -RsmiServiceImpl::VerifyConnection(::grpc::ServerContext* context, - const rdc::VerifyConnectionRequest* request, - rdc::VerifyConnectionResponse* reply) { - (void)context; // Quiet warning for now - std::string prefix("Hello "); - reply->set_message(prefix + request->name()); - return ::grpc::Status::OK; -} ::grpc::Status RsmiServiceImpl::GetNumDevices(::grpc::ServerContext* context, diff --git a/projects/rdc/server/src/rdc_main.cc b/projects/rdc/server/src/rdc_server_main.cc similarity index 95% rename from projects/rdc/server/src/rdc_main.cc rename to projects/rdc/server/src/rdc_server_main.cc index 00fe2c56cb..0f4240fe23 100755 --- a/projects/rdc/server/src/rdc_main.cc +++ b/projects/rdc/server/src/rdc_server_main.cc @@ -36,7 +36,7 @@ THE SOFTWARE. #include "rdc.grpc.pb.h" // NOLINT #include "rocm_smi/rocm_smi.h" -#include "rdc/rdc_main.h" +#include "rdc/rdc_server_main.h" #include "rdc/rdc_rsmi_service.h" static bool sShutDownServer = false; @@ -46,7 +46,7 @@ static const char *kRDCDHomeDir = "/"; static const char *kDaemonLockFile = "/var/run/rdcd.lock"; RDCServer::RDCServer() : server_address_("0.0.0.0:50051"), - rsmi_service_(nullptr) { + rsmi_service_(nullptr), rdc_admin_service_(nullptr) { } RDCServer::~RDCServer() { @@ -66,6 +66,10 @@ RDCServer::Run() { // Register services as the instances through which we'll communicate with // clients. These are synchronous services. + if (start_rdc_admin_service()) { + rdc_admin_service_ = new RDCAdminServiceImpl(); + builder.RegisterService(rdc_admin_service_); + } if (start_rsmi_service()) { rsmi_service_ = new RsmiServiceImpl(); @@ -125,6 +129,11 @@ RDCServer::ShutDown(void) { delete rsmi_service_; rsmi_service_ = nullptr; } + + if (rdc_admin_service_) { + delete rdc_admin_service_; + rdc_admin_service_ = nullptr; + } } static void * ProcessSignalLoop(void *server_ptr) { @@ -268,6 +277,8 @@ int main(int argc, char** argv) { // TODO(cfreehil): Eventually, set these by reading a config file rdc_server.set_start_rsmi_service(true); + rdc_server.set_start_rdc_admin_service(true); + // rdc_server.set_secure_communications(false); // rdc_server.set_address("0.0.0.0:50051") diff --git a/projects/rdc/tests/example/CMakeLists.txt b/projects/rdc/tests/example/CMakeLists.txt index 149a0718b7..104be28fa2 100755 --- a/projects/rdc/tests/example/CMakeLists.txt +++ b/projects/rdc/tests/example/CMakeLists.txt @@ -41,9 +41,6 @@ endif () # Required Defines first: -set(RSMI_INC_DIR ${ROCM_DIR}/include) -set(RSMI_LIB_DIR ${ROCM_DIR}/lib) - message("") message("Build Configuration:") message("-----------BuildType: " ${CMAKE_BUILD_TYPE}) @@ -53,7 +50,6 @@ message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR}) message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR}) message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib) message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin) -message("--------RSMI Lib Dir: " ${RSMI_LIB_DIR}) message("--------RSMI Inc Dir: " ${RSMI_INC_DIR}) message("") @@ -66,8 +62,6 @@ include(utils) ## Verbose output. set(CMAKE_VERBOSE_MAKEFILE on) -include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../../client/include" - "${PROTOB_OUT_DIR}" "${RSMI_INC_DIR}") set(EXAMPLE_SRC_LIST "${SRC_DIR}/rdc_client_test.cc") message("EXAMPLE_SRC_LIST=${EXAMPLE_SRC_LIST}") @@ -77,6 +71,11 @@ set(CLIENT_LIB_INC_LIST "${INC_DIR}/rdc_client.h") set(TEST_CLIENT_EXE "rdc_test_client") add_executable(${TEST_CLIENT_EXE} "${EXAMPLE_SRC_LIST}") + +target_include_directories(${TEST_CLIENT_EXE} PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/../../client/include" + "${RSMI_INC_DIR}") + target_link_libraries(${TEST_CLIENT_EXE} rdc_client) message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") diff --git a/projects/rdc/tests/example/rdc_client_test.cc b/projects/rdc/tests/example/rdc_client_test.cc index b4eca57d0f..c81dc0d853 100755 --- a/projects/rdc/tests/example/rdc_client_test.cc +++ b/projects/rdc/tests/example/rdc_client_test.cc @@ -31,7 +31,10 @@ THE SOFTWARE. #define CHK_RET_STATUS(RET) \ if ((RET) != RDC_STATUS_SUCCESS) { \ - std::cout << "rdc call returned error: " << (RET) << std::endl; \ + const char *err_msg_str; \ + (void)rdc_status_string((RET), &err_msg_str); \ + std::cout << "rdc call returned error: " << (RET) << ":\"" << \ + err_msg_str << "\"" << std::endl; \ } #define CHK_RET_STATUS_CONT(RET) \ @@ -41,11 +44,8 @@ THE SOFTWARE. } int main(int argc, char** argv) { - (void)argc; // ignore for now - (void)argv; // ignore for now - rdc_status_t ret; - rdc_channel_t server; + rdc_channel_t server_ch; uint64_t num_gpu; int64_t temperature; std::string serv_host("localhost"); @@ -61,27 +61,39 @@ int main(int argc, char** argv) { std::cout << "Attempting to create channel to " << serv_host << ":" << serv_port << std::endl; - ret = rdc_channel_create(&server, serv_host.c_str(), serv_port.c_str(), + ret = rdc_channel_create(&server_ch, serv_host.c_str(), serv_port.c_str(), false); CHK_RET_STATUS(ret) std::cout << "Successfully created channel" << std::endl; - std::cout << "Getting number of gpus at server..." << std::endl; - ret = rdc_num_gpus_get(server, &num_gpu); + grpc_connectivity_state ch_state; + ret = rdc_channel_state_get(server_ch, true, &ch_state); CHK_RET_STATUS(ret) - std::cout << "Number of GPUs at server is " << num_gpu << std::endl; + std::cout << "Current channel state is " << ch_state << std::endl; + + std::cout << "Verifying connection to server..." << std::endl; + ret = rdc_channel_connection_verify(server_ch); + CHK_RET_STATUS(ret) + if (ret == RDC_STATUS_SUCCESS) { + std::cout << "Verified connection to server." << std::endl; + } + std::cout << "Getting number of gpus at server..." << std::endl; + ret = rdc_num_gpus_get(server_ch, &num_gpu); + CHK_RET_STATUS(ret) + std::cout << "Number of GPUs at server is " << server_ch << + num_gpu << std::endl; for (uint32_t dv_ind = 0; dv_ind < num_gpu; ++dv_ind) { std::cout << "Info for Device " << dv_ind << ":" << std::endl; std::cout << "\tGetting temperature..." << std::endl; - ret = rdc_dev_temp_metric_get(server, dv_ind, RSMI_TEMP_TYPE_JUNCTION, + ret = rdc_dev_temp_metric_get(server_ch, dv_ind, RSMI_TEMP_TYPE_JUNCTION, RSMI_TEMP_CURRENT, &temperature); CHK_RET_STATUS_CONT(ret) std::cout << "\t GPU " << dv_ind << " has a temperature of " << temperature << std::endl; } - ret = rdc_channel_destroy(server); + ret = rdc_channel_destroy(server_ch); CHK_RET_STATUS(ret) std::cout << "Successfully destroyed channel to " << serv_host << ":" << serv_port << std::endl;