Enable RDC link Status feature
1.add link status APIs
2.Add link status example for link status API usage
[ROCm/rdc commit: 29b6699b62]
This commit is contained in:
@@ -132,6 +132,12 @@ set(CONFIG_EXAMPLE_EXE "config")
|
||||
add_executable(${CONFIG_EXAMPLE_EXE} "${CONFIG_EXAMPLE_SRC_LIST}")
|
||||
target_link_libraries(${CONFIG_EXAMPLE_EXE} pthread dl rdc_bootstrap)
|
||||
|
||||
set(TOPOLOGYLINK_EXAMPLE_SRC_LIST "topologylink_example.cc")
|
||||
cmake_print_variables(TOPOLOGYLINK_EXAMPLE_SRC_LIST)
|
||||
set(TOPOLOGYLINK_EXAMPLE_EXE "topologylink")
|
||||
add_executable(${TOPOLOGYLINK_EXAMPLE_EXE} "${TOPOLOGYLINK_EXAMPLE_SRC_LIST}")
|
||||
target_link_libraries(${TOPOLOGYLINK_EXAMPLE_EXE} pthread dl rdc_bootstrap)
|
||||
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
message(" Finished Cmake Example ")
|
||||
message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
|
||||
|
||||
@@ -119,10 +119,18 @@ int main() {
|
||||
<< "max_bandwidth: " << std::to_string(topo.link_infos[i].max_bandwidth) << "\n"
|
||||
<< "hops: " << std::to_string(topo.link_infos[i].hops) << "\n"
|
||||
<< "link_type: " << topology_link_type_to_str(topo.link_infos[i].link_type) << "\n"
|
||||
<< "is_p2p_accessible: " << std::to_string(topo.link_infos[i].is_p2p_accessible) << "\n"
|
||||
<< "is_p2p_accessible: " << std::to_string(topo.link_infos[i].is_p2p_accessible)
|
||||
<< "\n"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
rdc_link_status_t link_status;
|
||||
result = rdc_link_status_get(rdc_handle, &link_status);
|
||||
if (result != RDC_ST_OK) {
|
||||
std::cout << "Error clear topology, Return: " << rdc_status_string(result) << std::endl;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
//... clean up
|
||||
cleanup:
|
||||
std::cout << "Cleaning up.\n";
|
||||
|
||||
@@ -654,7 +654,7 @@ typedef enum {
|
||||
#define RDC_MAX_NUM_OF_LINKS 16
|
||||
|
||||
typedef struct {
|
||||
int32_t gpu_index;
|
||||
uint32_t gpu_index;
|
||||
uint32_t num_of_links; // The size of the array link_states
|
||||
rdc_topology_link_type_t link_types; // XGMI, PCIe, and so on
|
||||
rdc_link_state_t link_states[RDC_MAX_NUM_OF_LINKS];
|
||||
|
||||
@@ -147,9 +147,10 @@ class RdcEmbeddedHandler final : public RdcHandler {
|
||||
RdcWatchTablePtr watch_table_;
|
||||
RdcMetricsUpdaterPtr metrics_updater_;
|
||||
RdcPolicyPtr policy_;
|
||||
RdcTopologyLinkPtr topologylink_;
|
||||
RdcConfigSettingsPtr config_handler_;
|
||||
std::future<void> updater_;
|
||||
RdcTopologyLinkPtr topologylink_;
|
||||
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -217,6 +217,8 @@ service RdcAPI {
|
||||
//Clear the setting
|
||||
rpc ClearConfig(ClearConfigRequest) returns (ClearConfigResponse) {}
|
||||
|
||||
// rdc_status_t GetLinkStatus()
|
||||
rpc GetLinkStatus(Empty) returns (GetLinkStatusResponse) {}
|
||||
}
|
||||
|
||||
message Empty {
|
||||
@@ -729,6 +731,32 @@ message GetTopologyResponse {
|
||||
uint32 status = 1;
|
||||
Topology toppology = 2;
|
||||
}
|
||||
message GpuLinkStatus{
|
||||
uint32 gpu_index = 1;
|
||||
uint32 num_of_links = 2;
|
||||
enum LinkTypes {
|
||||
RDC_IOLINK_TYPE_UNDEFINED = 0;
|
||||
RDC_IOLINK_TYPE_PCIEXPRESS = 1;
|
||||
RDC_IOLINK_TYPE_XGMI = 2;
|
||||
RDCI_IOLINK_TYPE_NUMIOLINKTYPES = 3;
|
||||
};
|
||||
LinkTypes link_types = 3;
|
||||
enum LinkState{
|
||||
RDC_LINK_STATE_NOT_SUPPORTED = 0;
|
||||
RDC_LINK_STATE_DISABLED = 1;
|
||||
RDC_LINK_STATE_DOWN = 2;
|
||||
RDC_LINK_STATE_UP = 3;
|
||||
};
|
||||
repeated LinkState link_states = 4;
|
||||
}
|
||||
message LinkStatus{
|
||||
uint32 num_of_gpus = 1;
|
||||
repeated GpuLinkStatus gpus = 2;
|
||||
}
|
||||
message GetLinkStatusResponse{
|
||||
uint32 status = 1;
|
||||
LinkStatus linkstatus = 2;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ THE SOFTWARE.
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
#include <map>
|
||||
@@ -75,6 +76,7 @@ rdc_status_t RdcTopologyLinkImpl::rdc_device_topology_get(uint32_t gpu_index,
|
||||
|
||||
// Assign the index to the index list
|
||||
count = device_count.value.l_int;
|
||||
assert(count <= RDC_MAX_NUM_DEVICES);
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
gpu_index_list[i] = i;
|
||||
}
|
||||
@@ -134,6 +136,46 @@ rdc_status_t RdcTopologyLinkImpl::rdc_device_topology_get(uint32_t gpu_index,
|
||||
|
||||
rdc_status_t RdcTopologyLinkImpl::rdc_link_status_get(rdc_link_status_t* results) {
|
||||
rdc_status_t status = RDC_ST_NOT_FOUND;
|
||||
amdsmi_status_t err = AMDSMI_STATUS_SUCCESS;
|
||||
uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES];
|
||||
|
||||
uint32_t count = 0;
|
||||
rdc_field_value device_count;
|
||||
status = metric_fetcher_->fetch_smi_field(0, RDC_FI_GPU_COUNT, &device_count);
|
||||
if (status != RDC_ST_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Assign the index to the index list
|
||||
count = device_count.value.l_int;
|
||||
assert(count <= RDC_MAX_NUM_DEVICES);
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
gpu_index_list[i] = i;
|
||||
}
|
||||
results->num_of_gpus = count;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
amdsmi_processor_handle processor_handle;
|
||||
err = get_processor_handle_from_id(gpu_index_list[i], &processor_handle);
|
||||
if (err != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_INFO, "Fail to get process GPUs processor handle information: " << err);
|
||||
return status;
|
||||
}
|
||||
|
||||
amdsmi_xgmi_link_status_t link_status;
|
||||
err = amdsmi_get_gpu_xgmi_link_status(processor_handle, &link_status);
|
||||
if (err != AMDSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_INFO, "Fail to get process GPUs xgmi link information: " << err);
|
||||
}
|
||||
results->gpus[i].gpu_index = gpu_index_list[i];
|
||||
results->gpus[i].num_of_links = link_status.total_links;
|
||||
for (uint32_t n = 0; n < link_status.total_links; n++) {
|
||||
results->gpus[i].link_states[n] = static_cast<rdc_link_state_t>(link_status.status[n]);
|
||||
}
|
||||
|
||||
results->gpus[i].link_types = RDC_IOLINK_TYPE_XGMI;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
@@ -1027,27 +1027,46 @@ rdc_status_t RdcStandaloneHandler::rdc_device_topology_get(uint32_t gpu_index,
|
||||
if (err_status != RDC_ST_OK) return err_status;
|
||||
|
||||
::rdc::Topology Topology = reply.toppology();
|
||||
results->num_of_gpus= Topology.num_of_gpus();
|
||||
results->numa_node= Topology.numa_node();
|
||||
results->num_of_gpus = Topology.num_of_gpus();
|
||||
results->numa_node = Topology.numa_node();
|
||||
|
||||
for (uint32_t i = 0; i < Topology.num_of_gpus(); ++i) {
|
||||
::rdc::TopologyLinkInfo linkinfo = Topology.link_infos(i);
|
||||
results->link_infos[i].gpu_index=linkinfo.gpu_index();
|
||||
results->link_infos[i].weight=linkinfo.weight();
|
||||
results->link_infos[i].min_bandwidth=linkinfo.min_bandwidth();
|
||||
results->link_infos[i].max_bandwidth=linkinfo.max_bandwidth();
|
||||
results->link_infos[i].hops=linkinfo.hops();
|
||||
results->link_infos[i].link_type=static_cast<rdc_topology_link_type_t>(linkinfo.link_type());
|
||||
results->link_infos[i].is_p2p_accessible=linkinfo.p2p_accessible();
|
||||
::rdc::TopologyLinkInfo linkinfo = Topology.link_infos(i);
|
||||
results->link_infos[i].gpu_index = linkinfo.gpu_index();
|
||||
results->link_infos[i].weight = linkinfo.weight();
|
||||
results->link_infos[i].min_bandwidth = linkinfo.min_bandwidth();
|
||||
results->link_infos[i].max_bandwidth = linkinfo.max_bandwidth();
|
||||
results->link_infos[i].hops = linkinfo.hops();
|
||||
results->link_infos[i].link_type = static_cast<rdc_topology_link_type_t>(linkinfo.link_type());
|
||||
results->link_infos[i].is_p2p_accessible = linkinfo.p2p_accessible();
|
||||
}
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_link_status_get(rdc_link_status_t* results) {
|
||||
::rdc::UpdateAllFieldsResponse reply;
|
||||
::grpc::Status status = grpc::Status::OK;
|
||||
return error_handle(status, reply.status());
|
||||
::rdc::Empty request;
|
||||
::rdc::GetLinkStatusResponse reply;
|
||||
::grpc::ClientContext context;
|
||||
|
||||
::grpc::Status status = stub_->GetLinkStatus(&context, request, &reply);
|
||||
rdc_status_t err_status = error_handle(status, reply.status());
|
||||
if (err_status != RDC_ST_OK) return err_status;
|
||||
|
||||
::rdc::LinkStatus LinkStatus = reply.linkstatus();
|
||||
results->num_of_gpus = LinkStatus.num_of_gpus();
|
||||
|
||||
for (uint32_t i = 0; i < LinkStatus.num_of_gpus(); ++i) {
|
||||
::rdc::GpuLinkStatus gpulinkstatus = LinkStatus.gpus(i);
|
||||
results->gpus[i].gpu_index = gpulinkstatus.gpu_index();
|
||||
results->gpus[i].num_of_links = gpulinkstatus.num_of_links();
|
||||
results->gpus[i].link_types = static_cast<rdc_topology_link_type_t>(gpulinkstatus.link_types());
|
||||
for (uint32_t n = 0; n < gpulinkstatus.num_of_links(); n++) {
|
||||
results->gpus[i].link_states[n] = static_cast<rdc_link_state_t>(gpulinkstatus.link_states(n));
|
||||
}
|
||||
}
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -72,6 +72,7 @@ set(RDCI_SRC_LIST
|
||||
"${SRC_DIR}/RdciConfigSubSystem.cc"
|
||||
"${SRC_DIR}/RdciSubSystem.cc"
|
||||
"${SRC_DIR}/RdciTopologyLinkSubSystem.cc"
|
||||
"${SRC_DIR}/RdciXgmiLinkStatusSubSystem.cc"
|
||||
"${SRC_DIR}/rdci.cc")
|
||||
message("RDCI_SRC_LIST=${RDCI_SRC_LIST}")
|
||||
set(RDCI_EXE "rdci")
|
||||
|
||||
@@ -34,7 +34,7 @@ class RdciTopologyLinkSubSystem : public RdciSubSystem {
|
||||
private:
|
||||
void show_help() const;
|
||||
enum OPERATIONS {
|
||||
POLICY_UNKNOWN = 0,
|
||||
TOPOLOGY_UNKNOWN = 0,
|
||||
TOPOLOGY_INDEX,
|
||||
} topology_ops_;
|
||||
uint32_t group_index_;
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDCI_INCLUDE_RDCIXMGILINKSTATUSSYSTEM_H_
|
||||
#define RDCI_INCLUDE_RDCIXMGILINKSTATUSSYSTEM_H_
|
||||
#include <signal.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "RdciSubSystem.h"
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
class RdciXgmiLinkStatusSubSystem : public RdciSubSystem {
|
||||
public:
|
||||
RdciXgmiLinkStatusSubSystem();
|
||||
void parse_cmd_opts(int argc, char** argv) override;
|
||||
void process() override;
|
||||
|
||||
private:
|
||||
void show_help() const;
|
||||
enum OPERATIONS {
|
||||
XMGI_LINK_UNKNOWN = 0,
|
||||
XMGI_LINK_STATUS,
|
||||
} link_status_ops_;
|
||||
};
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
#endif // RDCI_INCLUDE_RDCIXMGILINKSTATUSSYSTEM_H_
|
||||
@@ -28,7 +28,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
RdciTopologyLinkSubSystem::RdciTopologyLinkSubSystem() {}
|
||||
RdciTopologyLinkSubSystem::RdciTopologyLinkSubSystem() : topology_ops_(TOPOLOGY_UNKNOWN) {}
|
||||
|
||||
void RdciTopologyLinkSubSystem::parse_cmd_opts(int argc, char** argv) {
|
||||
const int HOST_OPTIONS = 1000;
|
||||
@@ -109,8 +109,7 @@ void RdciTopologyLinkSubSystem::process() {
|
||||
<< "------------------+\n";
|
||||
for (uint32_t i = 0; i < topology.num_of_gpus; i++) {
|
||||
std::cout << "| To GPU " << i + 1 << "\t\t"
|
||||
<< "| " << topology_link_type_to_str(RDC_IOLINK_TYPE_XGMI)
|
||||
<< "\t\t\t|\n";
|
||||
<< "| " << topology_link_type_to_str(RDC_IOLINK_TYPE_XGMI) << "\t\t\t|\n";
|
||||
}
|
||||
std::cout << "+-----------------------+"
|
||||
<< "-----------------------------"
|
||||
@@ -118,6 +117,9 @@ void RdciTopologyLinkSubSystem::process() {
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER, "Unknown command");
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // namespace rdc
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "RdciXgmiLinkStatusSubSystem.h"
|
||||
|
||||
#include <getopt.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "common/rdc_utils.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
RdciXgmiLinkStatusSubSystem::RdciXgmiLinkStatusSubSystem() : link_status_ops_(XMGI_LINK_UNKNOWN) {}
|
||||
|
||||
void RdciXgmiLinkStatusSubSystem::parse_cmd_opts(int argc, char** argv) {
|
||||
const int HOST_OPTIONS = 1000;
|
||||
const struct option long_options[] = {{"host", required_argument, nullptr, HOST_OPTIONS},
|
||||
{"help", optional_argument, nullptr, 'h'},
|
||||
{"unauth", optional_argument, nullptr, 'u'},
|
||||
{"xgmi-link-status", optional_argument, nullptr, 'l'},
|
||||
{nullptr, 0, nullptr, 0}};
|
||||
int option_index = 0;
|
||||
int opt = 0;
|
||||
while ((opt = getopt_long(argc, argv, "hul", long_options, &option_index)) != -1) {
|
||||
{
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
ip_port_ = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
show_help();
|
||||
break;
|
||||
case 'u':
|
||||
use_auth_ = false;
|
||||
break;
|
||||
case 'l':
|
||||
link_status_ops_ = XMGI_LINK_STATUS;
|
||||
break;
|
||||
default:
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER, "Unknown command line options");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void RdciXgmiLinkStatusSubSystem::show_help() const {
|
||||
std::cout << " link -- Used to link Get the link status the link is up or down.\n\n";
|
||||
std::cout << "Usage\n";
|
||||
std::cout << " rdci link [--host <IP/FQDN>:port] [--xgmi-link-status] -l \n";
|
||||
std::cout << "\nFlags:\n";
|
||||
show_common_usage();
|
||||
}
|
||||
|
||||
static const char* xgmi_link_status_to_str(rdc_link_state_t state) {
|
||||
if (state == RDC_LINK_STATE_DISABLED) return "X ";
|
||||
if (state == RDC_LINK_STATE_DOWN) return "D ";
|
||||
if (state == RDC_LINK_STATE_UP) return "U ";
|
||||
return "N/A ";
|
||||
}
|
||||
|
||||
void RdciXgmiLinkStatusSubSystem::process() {
|
||||
rdc_status_t result = RDC_ST_OK;
|
||||
switch (link_status_ops_) {
|
||||
case XMGI_LINK_STATUS: {
|
||||
rdc_link_status_t link_status;
|
||||
result = rdc_link_status_get(rdc_handle_, &link_status);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "GPUs:\n";
|
||||
for (int32_t i = 0; i < link_status.num_of_gpus; i++) {
|
||||
std::cout << " GPU:" << link_status.gpus[i].gpu_index << "\n";
|
||||
std::cout << " ";
|
||||
for (uint32_t n = 0; n < link_status.gpus[i].num_of_links; n++) {
|
||||
std::cout << xgmi_link_status_to_str(link_status.gpus[i].link_states[n]);
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER, "Unknown command");
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
@@ -35,6 +35,7 @@ THE SOFTWARE.
|
||||
#include "RdciTopologyLinkSubSystem.h"
|
||||
#include "RdciPolicySubSystem.h"
|
||||
#include "RdciStatsSubSystem.h"
|
||||
#include "RdciXgmiLinkStatusSubSystem.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
@@ -57,7 +58,7 @@ int main(int argc, char** argv) {
|
||||
const std::string usage_help =
|
||||
"Usage:\trdci <subsystem>|<options>\n"
|
||||
"subsystem: \n"
|
||||
" discovery, dmon, group, fieldgroup, stats, diag, config, policy, health, topo\n"
|
||||
" discovery, dmon, group, fieldgroup, stats, diag, config, policy, health, topo, link\n"
|
||||
"options: \n"
|
||||
" -v(--version) : Print client version information only\n";
|
||||
|
||||
@@ -93,6 +94,8 @@ int main(int argc, char** argv) {
|
||||
subsystem.reset(new amd::rdc::RdciHealthSubSystem());
|
||||
} else if (subsystem_name == "topo") {
|
||||
subsystem.reset(new amd::rdc::RdciTopologyLinkSubSystem());
|
||||
} else if (subsystem_name == "link") {
|
||||
subsystem.reset(new amd::rdc::RdciXgmiLinkStatusSubSystem());
|
||||
} else if (subsystem_name == "stats") {
|
||||
subsystem.reset(new amd::rdc::RdciStatsSubSystem());
|
||||
} else if (subsystem_name == "policy") {
|
||||
|
||||
@@ -152,11 +152,14 @@ class RdcAPIServiceImpl final : public ::rdc::RdcAPI::Service {
|
||||
::grpc::Status UnRegisterPolicy(::grpc::ServerContext* context,
|
||||
const ::rdc::UnRegisterPolicyRequest* request,
|
||||
::rdc::UnRegisterPolicyResponse* reply) override;
|
||||
|
||||
|
||||
::grpc::Status GetTopology(::grpc::ServerContext* context,
|
||||
const ::rdc::GetTopologyRequest* request,
|
||||
::rdc::GetTopologyResponse* reply) override;
|
||||
|
||||
::grpc::Status GetLinkStatus(::grpc::ServerContext* context, const ::rdc::Empty* request,
|
||||
::rdc::GetLinkStatusResponse* reply) override;
|
||||
|
||||
::grpc::Status SetHealth(::grpc::ServerContext* context, const ::rdc::SetHealthRequest* request,
|
||||
::rdc::SetHealthResponse* reply) override;
|
||||
|
||||
|
||||
@@ -1106,5 +1106,38 @@ int RdcAPIServiceImpl::PolicyCallback(rdc_policy_callback_response_t* userData)
|
||||
return ::grpc::Status::OK;
|
||||
}
|
||||
|
||||
::grpc::Status RdcAPIServiceImpl::GetLinkStatus(::grpc::ServerContext* context,
|
||||
const ::rdc::Empty* request,
|
||||
::rdc::GetLinkStatusResponse* reply) {
|
||||
(void)(context);
|
||||
if (!reply || !request) {
|
||||
return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents");
|
||||
}
|
||||
|
||||
rdc_link_status_t link_status_results;
|
||||
// call RDC link status API
|
||||
rdc_status_t result = rdc_link_status_get(rdc_handle_, &link_status_results);
|
||||
reply->set_status(result);
|
||||
if (result != RDC_ST_OK) {
|
||||
return ::grpc::Status::OK;
|
||||
}
|
||||
|
||||
::rdc::LinkStatus* linkstatus = reply->mutable_linkstatus();
|
||||
linkstatus->set_num_of_gpus(link_status_results.num_of_gpus);
|
||||
for (int32_t i = 0; i < link_status_results.num_of_gpus; ++i) {
|
||||
::rdc::GpuLinkStatus* gpulinkstatus = linkstatus->add_gpus();
|
||||
gpulinkstatus->set_gpu_index(link_status_results.gpus[i].gpu_index);
|
||||
gpulinkstatus->set_num_of_links(link_status_results.gpus[i].num_of_links);
|
||||
gpulinkstatus->set_link_types(
|
||||
static_cast<::rdc::GpuLinkStatus_LinkTypes>(link_status_results.gpus[i].link_types));
|
||||
for (uint32_t n = 0; n < link_status_results.gpus[i].num_of_links; n++) {
|
||||
gpulinkstatus->add_link_states(static_cast<::rdc::GpuLinkStatus_LinkState>(
|
||||
link_status_results.gpus[i].link_states[n]));
|
||||
}
|
||||
}
|
||||
|
||||
return ::grpc::Status::OK;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
مرجع در شماره جدید
Block a user