Implement rdc_device_get_component_version API related code

Implement an API to obtain the version information of the rdc calling component.
See rdc_component_t for details on available components.
It can be expanded later if necessary.

Change-Id: I03b48f774179c52c57b606704283add74ca39a02
Signed-off-by: Chen Gong <curry.gong@amd.com>


[ROCm/rdc commit: 5a3fd9fbc1]
This commit is contained in:
Chen Gong
2024-08-18 19:29:04 +08:00
committed by Galantsev, Dmitrii
orang tua 2ae8557614
melakukan 891039280f
9 mengubah file dengan 109 tambahan dan 0 penghapusan
+35
Melihat File
@@ -141,6 +141,11 @@ typedef enum { INTEGER = 0, DOUBLE, STRING, BLOB } rdc_field_type_t;
*/
#define RDC_MAX_NUM_FIELD_GROUPS 64
/**
* @brief The max string length occupied by version information
*/
#define RDC_MAX_VERSION_STR_LENGTH 60
/**
* These enums are used to specify a particular field to be retrieved.
*/
@@ -343,6 +348,13 @@ typedef struct {
char device_name[RDC_MAX_STR_LENGTH]; //!< Name of the device.
} rdc_device_attributes_t;
/**
* @brief Store version information for each component
*/
typedef struct {
char version[RDC_MAX_VERSION_STR_LENGTH];
} rdc_component_version_t;
/**
* @brief The structure to store the group info
*/
@@ -476,6 +488,14 @@ typedef enum {
RDC_DIAG_TEST_LAST = RDC_DIAG_GPU_PARAMETERS
} rdc_diag_test_cases_t;
/**
* @brief Type of Components
*/
typedef enum {
RDC_AMDMSI_COMPONENT
//If needed later, add them one by one
} rdc_component_t;
/**
* @brief The maximum test cases to run
*/
@@ -750,6 +770,21 @@ rdc_status_t rdc_device_get_all(rdc_handle_t p_rdc_handle,
rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, uint32_t gpu_index,
rdc_device_attributes_t* p_rdc_attr);
/**
* @brief Get version information of components used by rdc.
*
* @details Given a component type, return its version information.
*
* @param[in] p_rdc_handle The RDC handler.
*
* @param[in] component Type of Components. See rdc_component_t definition for details.
*
* @param[out] p_rdc_compv Version information of the corresponding component.
*
* @retval ::RDC_ST_OK is returned upon successful call.
*/
rdc_status_t rdc_device_get_component_version(rdc_handle_t p_rdc_handle, rdc_component_t component, rdc_component_version_t* p_rdc_compv);
/**
* @brief Create a group contains multiple GPUs
*
@@ -44,6 +44,7 @@ class RdcHandler {
uint32_t* count) = 0;
virtual rdc_status_t rdc_device_get_attributes(uint32_t gpu_index,
rdc_device_attributes_t* p_rdc_attr) = 0;
virtual rdc_status_t rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) = 0;
// Group API
virtual rdc_status_t rdc_group_gpu_create(rdc_group_type_t type, const char* group_name,
@@ -51,6 +51,7 @@ class RdcEmbeddedHandler final : public RdcHandler {
uint32_t* count) override;
rdc_status_t rdc_device_get_attributes(uint32_t gpu_index,
rdc_device_attributes_t* p_rdc_attr) override;
rdc_status_t rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) override;
// Group API
rdc_status_t rdc_group_gpu_create(rdc_group_type_t type, const char* group_name,
@@ -46,6 +46,7 @@ class RdcStandaloneHandler : public RdcHandler {
uint32_t* count) override;
rdc_status_t rdc_device_get_attributes(uint32_t gpu_index,
rdc_device_attributes_t* p_rdc_attr) override;
rdc_status_t rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) override;
// Group RdcAPI
rdc_status_t rdc_group_gpu_create(rdc_group_type_t type, const char* group_name,
@@ -182,6 +182,15 @@ rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, uint32_t gpu_i
->rdc_device_get_attributes(gpu_index, p_rdc_attr);
}
rdc_status_t rdc_device_get_component_version(rdc_handle_t p_rdc_handle, rdc_component_t component, rdc_component_version_t* p_rdc_compv) {
if (!p_rdc_handle || !p_rdc_compv) {
return RDC_ST_INVALID_HANDLER;
}
return static_cast<amd::rdc::RdcHandler*>(p_rdc_handle)
->rdc_device_get_component_version(component, p_rdc_compv);
}
rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, uint32_t num_field_ids,
rdc_field_t* field_ids, const char* field_group_name,
rdc_field_grp_t* rdc_field_group_id) {
@@ -194,6 +194,29 @@ rdc_status_t RdcEmbeddedHandler::rdc_device_get_attributes(uint32_t gpu_index,
return status;
}
rdc_status_t RdcEmbeddedHandler::rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) {
if (!p_rdc_compv) {
return RDC_ST_BAD_PARAMETER;
}
if (component == RDC_AMDMSI_COMPONENT) {
amdsmi_status_t ret;
amdsmi_version_t ver = {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, nullptr};
ret = amdsmi_get_lib_version(&ver);
if (ret != AMDSMI_STATUS_SUCCESS) {
RDC_LOG(RDC_ERROR, "Failed to obtain the version of the server's amd-smi library. reason: " << (ret == AMDSMI_STATUS_INVAL ? "Invalid parameters" : "unknown"));
return RDC_ST_MSI_ERROR;
}
strncpy_with_null(p_rdc_compv->version, ver.build, RDC_MAX_VERSION_STR_LENGTH);
return RDC_ST_OK;
} else {
return RDC_ST_BAD_PARAMETER;
}
}
// Group API
rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_create(rdc_group_type_t type, const char* group_name,
rdc_gpu_group_t* p_rdc_group_id) {
@@ -243,6 +243,25 @@ rdc_status_t RdcStandaloneHandler::rdc_device_get_attributes(uint32_t gpu_index,
return RDC_ST_OK;
}
rdc_status_t RdcStandaloneHandler::rdc_device_get_component_version(rdc_component_t component, rdc_component_version_t* p_rdc_compv) {
if (!p_rdc_compv) {
return RDC_ST_BAD_PARAMETER;
}
::rdc::GetComponentVersionRequest request;
::rdc::GetComponentVersionResponse reply;
::grpc::ClientContext context;
request.set_component_index(component);
::grpc::Status status = stub_->GetComponentVersion(&context, request, &reply);
rdc_status_t err_status = error_handle(status, reply.status());
if (err_status != RDC_ST_OK) return err_status;
strncpy_with_null(p_rdc_compv->version, reply.version().c_str(), RDC_MAX_VERSION_STR_LENGTH);
return RDC_ST_OK;
}
// Group RdcAPI
rdc_status_t RdcStandaloneHandler::rdc_group_gpu_create(rdc_group_type_t type,
const char* group_name,
@@ -42,6 +42,9 @@ class RdcAPIServiceImpl final : public ::rdc::RdcAPI::Service {
const ::rdc::GetDeviceAttributesRequest* request,
::rdc::GetDeviceAttributesResponse* reply) override;
::grpc::Status GetComponentVersion(::grpc::ServerContext* context, const ::rdc::GetComponentVersionRequest* request,
::rdc::GetComponentVersionResponse* reply) override;
::grpc::Status CreateGpuGroup(::grpc::ServerContext* context,
const ::rdc::CreateGpuGroupRequest* request,
::rdc::CreateGpuGroupResponse* reply) override;
@@ -103,6 +103,23 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() {
return ::grpc::Status::OK;
}
::grpc::Status RdcAPIServiceImpl::GetComponentVersion(::grpc::ServerContext* context,
const ::rdc::GetComponentVersionRequest* request,
::rdc::GetComponentVersionResponse* reply) {
(void)(context);
if (!reply) {
return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty reply");
}
rdc_component_t component = static_cast<rdc_component_t>(request->component_index());
rdc_component_version_t compv;
rdc_status_t result = rdc_device_get_component_version(rdc_handle_, component, &compv);
reply->set_version(compv.version);
reply->set_status(result);
return ::grpc::Status::OK;
}
::grpc::Status RdcAPIServiceImpl::CreateGpuGroup(::grpc::ServerContext* context,
const ::rdc::CreateGpuGroupRequest* request,
::rdc::CreateGpuGroupResponse* reply) {