diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index 5dae24155b..3370e49bce 100644 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -141,6 +141,11 @@ typedef enum { INTEGER = 0, DOUBLE, STRING, BLOB } rdc_field_type_t; */ #define RDC_MAX_NUM_FIELD_GROUPS 64 +/** + * @brief The max string length occupied by version information + */ +#define RDC_MAX_VERSION_STR_LENGTH 60 + /** * These enums are used to specify a particular field to be retrieved. */ @@ -343,6 +348,13 @@ typedef struct { char device_name[RDC_MAX_STR_LENGTH]; //!< Name of the device. } rdc_device_attributes_t; +/** + * @brief Store version information for each component + */ +typedef struct { + char version[RDC_MAX_VERSION_STR_LENGTH]; +} rdc_component_version_t; + /** * @brief The structure to store the group info */ @@ -476,6 +488,14 @@ typedef enum { RDC_DIAG_TEST_LAST = RDC_DIAG_GPU_PARAMETERS } rdc_diag_test_cases_t; +/** + * @brief Type of Components + */ +typedef enum { + RDC_AMDMSI_COMPONENT + //If needed later, add them one by one +} rdc_component_t; + /** * @brief The maximum test cases to run */ @@ -750,6 +770,21 @@ rdc_status_t rdc_device_get_all(rdc_handle_t p_rdc_handle, rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr); +/** + * @brief Get version information of components used by rdc. + * + * @details Given a component type, return its version information. + * + * @param[in] p_rdc_handle The RDC handler. + * + * @param[in] component Type of Components. See rdc_component_t definition for details. + * + * @param[out] p_rdc_compv Version information of the corresponding component. + * + * @retval ::RDC_ST_OK is returned upon successful call. + */ +rdc_status_t rdc_device_get_component_version(rdc_handle_t p_rdc_handle, rdc_component_t component, rdc_component_version_t* p_rdc_compv); + /** * @brief Create a group contains multiple GPUs * diff --git a/projects/rdc/include/rdc_lib/RdcHandler.h b/projects/rdc/include/rdc_lib/RdcHandler.h index 01fa29eae5..f5b92efcb9 100644 --- a/projects/rdc/include/rdc_lib/RdcHandler.h +++ b/projects/rdc/include/rdc_lib/RdcHandler.h @@ -44,6 +44,7 @@ class RdcHandler { uint32_t* count) = 0; virtual rdc_status_t rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) = 0; + virtual rdc_status_t rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) = 0; // Group API virtual rdc_status_t rdc_group_gpu_create(rdc_group_type_t type, const char* group_name, diff --git a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h index 5c68ec9654..8d08cbefb4 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcEmbeddedHandler.h @@ -51,6 +51,7 @@ class RdcEmbeddedHandler final : public RdcHandler { uint32_t* count) override; rdc_status_t rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) override; + rdc_status_t rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) override; // Group API rdc_status_t rdc_group_gpu_create(rdc_group_type_t type, const char* group_name, diff --git a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h index 1ba8793984..af81355779 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h +++ b/projects/rdc/include/rdc_lib/impl/RdcStandaloneHandler.h @@ -46,6 +46,7 @@ class RdcStandaloneHandler : public RdcHandler { uint32_t* count) override; rdc_status_t rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) override; + rdc_status_t rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) override; // Group RdcAPI rdc_status_t rdc_group_gpu_create(rdc_group_type_t type, const char* group_name, diff --git a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc index 469455bce9..9f5268654d 100644 --- a/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc +++ b/projects/rdc/rdc_libs/bootstrap/src/RdcBootStrap.cc @@ -182,6 +182,15 @@ rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, uint32_t gpu_i ->rdc_device_get_attributes(gpu_index, p_rdc_attr); } +rdc_status_t rdc_device_get_component_version(rdc_handle_t p_rdc_handle, rdc_component_t component, rdc_component_version_t* p_rdc_compv) { + if (!p_rdc_handle || !p_rdc_compv) { + return RDC_ST_INVALID_HANDLER; + } + + return static_cast(p_rdc_handle) + ->rdc_device_get_component_version(component, p_rdc_compv); +} + rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, uint32_t num_field_ids, rdc_field_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { diff --git a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc index 473795507d..c70b0780f8 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcEmbeddedHandler.cc @@ -194,6 +194,29 @@ rdc_status_t RdcEmbeddedHandler::rdc_device_get_attributes(uint32_t gpu_index, return status; } +rdc_status_t RdcEmbeddedHandler::rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) { + if (!p_rdc_compv) { + return RDC_ST_BAD_PARAMETER; + } + + if (component == RDC_AMDMSI_COMPONENT) { + amdsmi_status_t ret; + amdsmi_version_t ver = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, nullptr}; + + ret = amdsmi_get_lib_version(&ver); + + if (ret != AMDSMI_STATUS_SUCCESS) { + RDC_LOG(RDC_ERROR, "Failed to obtain the version of the server's amd-smi library. reason: " << (ret == AMDSMI_STATUS_INVAL ? "Invalid parameters" : "unknown")); + return RDC_ST_MSI_ERROR; + } + + strncpy_with_null(p_rdc_compv->version, ver.build, RDC_MAX_VERSION_STR_LENGTH); + return RDC_ST_OK; + } else { + return RDC_ST_BAD_PARAMETER; + } +} + // Group API rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_create(rdc_group_type_t type, const char* group_name, rdc_gpu_group_t* p_rdc_group_id) { diff --git a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc index 2def05c41f..52a88843ff 100644 --- a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc +++ b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc @@ -243,6 +243,25 @@ rdc_status_t RdcStandaloneHandler::rdc_device_get_attributes(uint32_t gpu_index, return RDC_ST_OK; } +rdc_status_t RdcStandaloneHandler::rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) { + + if (!p_rdc_compv) { + return RDC_ST_BAD_PARAMETER; + } + + ::rdc::GetComponentVersionRequest request; + ::rdc::GetComponentVersionResponse reply; + ::grpc::ClientContext context; + + request.set_component_index(component); + ::grpc::Status status = stub_->GetComponentVersion(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + strncpy_with_null(p_rdc_compv->version, reply.version().c_str(), RDC_MAX_VERSION_STR_LENGTH); + return RDC_ST_OK; +} + // Group RdcAPI rdc_status_t RdcStandaloneHandler::rdc_group_gpu_create(rdc_group_type_t type, const char* group_name, diff --git a/projects/rdc/server/include/rdc/rdc_api_service.h b/projects/rdc/server/include/rdc/rdc_api_service.h index 8572335afb..b8011de9a6 100644 --- a/projects/rdc/server/include/rdc/rdc_api_service.h +++ b/projects/rdc/server/include/rdc/rdc_api_service.h @@ -42,6 +42,9 @@ class RdcAPIServiceImpl final : public ::rdc::RdcAPI::Service { const ::rdc::GetDeviceAttributesRequest* request, ::rdc::GetDeviceAttributesResponse* reply) override; + ::grpc::Status GetComponentVersion(::grpc::ServerContext* context, const ::rdc::GetComponentVersionRequest* request, + ::rdc::GetComponentVersionResponse* reply) override; + ::grpc::Status CreateGpuGroup(::grpc::ServerContext* context, const ::rdc::CreateGpuGroupRequest* request, ::rdc::CreateGpuGroupResponse* reply) override; diff --git a/projects/rdc/server/src/rdc_api_service.cc b/projects/rdc/server/src/rdc_api_service.cc index 8ee687b237..30ebc6421e 100644 --- a/projects/rdc/server/src/rdc_api_service.cc +++ b/projects/rdc/server/src/rdc_api_service.cc @@ -103,6 +103,23 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { return ::grpc::Status::OK; } +::grpc::Status RdcAPIServiceImpl::GetComponentVersion(::grpc::ServerContext* context, + const ::rdc::GetComponentVersionRequest* request, + ::rdc::GetComponentVersionResponse* reply) { + (void)(context); + if (!reply) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty reply"); + } + + rdc_component_t component = static_cast(request->component_index()); + rdc_component_version_t compv; + rdc_status_t result = rdc_device_get_component_version(rdc_handle_, component, &compv); + + reply->set_version(compv.version); + reply->set_status(result); + return ::grpc::Status::OK; +} + ::grpc::Status RdcAPIServiceImpl::CreateGpuGroup(::grpc::ServerContext* context, const ::rdc::CreateGpuGroupRequest* request, ::rdc::CreateGpuGroupResponse* reply) {