From 7ee29b6cdd187415c2ad4fb59c16a7e26c728fcd Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Wed, 18 Mar 2020 08:58:48 -0400 Subject: [PATCH] Implement the APIs for gRPC calls in client/server Implement the APIs defined in the RdcStandaloneHandler to make gRPC call to daemon Implement the APIs defined in the RdcAPIServiceImpl to handle the gRPC calls in daemon Add two APIs to get all GPU groups and field groups: rdc_group_get_all_ids() and rdc_group_field_all_ids() Those two APIs are required by the rdci group and fieldgroup sub-modules. Change-Id: I066091423146dea180c16af212688ed43dc44611 --- example/field_value_example.cc | 14 +- example/job_stats_example.cc | 4 +- include/rdc/rdc.h | 71 +++- include/rdc_lib/RdcCacheManager.h | 4 +- include/rdc_lib/RdcGroupSettings.h | 5 + include/rdc_lib/RdcHandler.h | 18 +- include/rdc_lib/RdcWatchTable.h | 6 +- include/rdc_lib/impl/RdcCacheManagerImpl.h | 4 +- include/rdc_lib/impl/RdcEmbeddedHandler.h | 18 +- include/rdc_lib/impl/RdcGroupSettingsImpl.h | 6 +- include/rdc_lib/impl/RdcStandaloneHandler.h | 18 +- include/rdc_lib/impl/RdcWatchTableImpl.h | 10 +- protos/rdc.proto | 20 +- rdc_libs/bootstrap/src/RdcBootStrap.cc | 48 ++- rdc_libs/rdc/src/RdcCacheManagerImpl.cc | 4 +- rdc_libs/rdc/src/RdcEmbeddedHandler.cc | 46 ++- rdc_libs/rdc/src/RdcGroupSettingsImpl.cc | 46 +++ rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc | 2 +- rdc_libs/rdc/src/RdcWatchTableImpl.cc | 6 +- .../rdc_client/src/RdcStandaloneHandler.cc | 313 ++++++++++++++--- rdci/src/RdciDiscoverySubSystem.cc | 20 +- server/include/rdc/rdc_api_service.h | 57 ++++ server/src/rdc_api_service.cc | 321 +++++++++++++++++- 23 files changed, 907 insertions(+), 154 deletions(-) diff --git a/example/field_value_example.cc b/example/field_value_example.cc index e935f8af8a..b742c6f958 100644 --- a/example/field_value_example.cc +++ b/example/field_value_example.cc @@ -82,7 +82,7 @@ int main(int, char **) { // Now we can use the same API for both standalone and embedded // Get the list of devices in the system uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES]; - result = rdc_get_all_devices(rdc_handle, gpu_index_list, &count); + result = rdc_device_get_all(rdc_handle, gpu_index_list, &count); if (result != RDC_ST_OK) { std::cout << "Error to find devices on the system. Return: " << rdc_status_string(result); @@ -116,7 +116,7 @@ int main(int, char **) { goto cleanup; } rdc_device_attributes_t attribute; - result = rdc_get_device_attributes(rdc_handle, + result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], &attribute); if (result != RDC_ST_OK) { std::cout << "Error get GPU attribute. Return: " @@ -146,7 +146,7 @@ int main(int, char **) { // Let the RDC to watch the fields and groups. The fields will be updated // once per second, the max keep age is 1 minutes and only keep 10 samples. - result = rdc_watch_fields(rdc_handle, group_id, + result = rdc_field_watch(rdc_handle, group_id, field_group_id, 1000000, 60, 10); if (result != RDC_ST_OK) { std::cout << "Error watch group fields. Return: " @@ -159,7 +159,7 @@ int main(int, char **) { // Since we are running the RDC_OPERATION_MODE_AUTO mode, the rdc_update_ // all_fields() will be called periodically at background. If running as - // RDC_OPERATION_MODE_MANUAL mode, we must call rdc_update_all_fields() + // RDC_OPERATION_MODE_MANUAL mode, we must call rdc_field_update_all() // periodically to take samples. usleep(5000000); // sleep 5 seconds before fetch the stats @@ -188,7 +188,7 @@ int main(int, char **) { for (uint32_t gindex = 0; gindex < group_info.count; gindex++) { for (uint32_t findex = 0; findex < field_info.count; findex++) { rdc_field_value value; - result = rdc_get_latest_value_for_field(rdc_handle, + result = rdc_field_get_latest_value(rdc_handle, group_info.entity_ids[gindex], field_info.field_ids[findex], &value); if (result == RDC_ST_NOT_FOUND) { continue; @@ -207,7 +207,7 @@ int main(int, char **) { } // Stop watching the field group - result = rdc_unwatch_fields(rdc_handle, group_id, field_group_id); + result = rdc_field_unwatch(rdc_handle, group_id, field_group_id); if (result != RDC_ST_OK) { std::cout << "Error stop watch fields. Return: " << rdc_status_string(result); @@ -227,7 +227,7 @@ int main(int, char **) { since_timestamp = start_timestamp; while (true) { rdc_field_value value; - result = rdc_get_field_value_since(rdc_handle, + result = rdc_field_get_value_since(rdc_handle, group_info.entity_ids[gindex] , field_info.field_ids[findex], since_timestamp, &next_timestamp, &value); if (result == RDC_ST_NOT_FOUND) { diff --git a/example/job_stats_example.cc b/example/job_stats_example.cc index c9b51b0997..11c99ea127 100644 --- a/example/job_stats_example.cc +++ b/example/job_stats_example.cc @@ -104,11 +104,11 @@ int main(int, char **) { } // For standalone mode, the daemon will update and cache the samples - // In manual mode, we must call rdc_update_all_fields periodically to + // In manual mode, we must call rdc_field_update_all periodically to // take samples. if (!standalone) { // embedded manual mode for (int i=5; i > 0 ; i--) { // As an example, we will take 5 samples - result = rdc_update_all_fields(rdc_handle, 0); + result = rdc_field_update_all(rdc_handle, 0); if (result != RDC_ST_OK) { std::cout << "Error update all fields. Return: " << rdc_status_string(result); diff --git a/include/rdc/rdc.h b/include/rdc/rdc.h index 89f2ed915d..53a5eb8dd1 100644 --- a/include/rdc/rdc.h +++ b/include/rdc/rdc.h @@ -59,7 +59,7 @@ typedef enum { * @brief rdc operation mode * rdc can run in auto mode where background threads will collect metrics. * When run in manual mode, the user needs to periodically call - * rdc_update_all_fields for data collection. + * rdc_field_update_all for data collection. */ typedef enum { RDC_OPERATION_MODE_AUTO = 0, @@ -109,6 +109,15 @@ typedef enum { */ #define RDC_MAX_FIELD_IDS_PER_FIELD_GROUP 128 +/** + * @brief The max number of groups + */ +#define RDC_MAX_NUM_GROUPS 64 + +/** + * @brief The max number of the field groups + */ +#define RDC_MAX_NUM_FIELD_GROUPS 64 /** * @biref The fields @@ -279,12 +288,12 @@ rdc_status_t rdc_shutdown(); * * @details The RDC is loaded as library so that it does not require rdcd * daemon. In this mode, the user has to periodically call - * rdc_update_all_fields() when op_mode is RDC_OPERATION_MODE_MANUAL, which + * rdc_field_update_all() when op_mode is RDC_OPERATION_MODE_MANUAL, which * tells RDC to collect the stats. This function is not thread safe. * * @param[in] op_mode Operation modes. When RDC_OPERATION_MODE_AUTO, RDC schedules * background task to collect the stats. When RDC_OPERATION_MODE_MANUAL, the user - * needs to call rdc_update_all_fields() periodically. + * needs to call rdc_field_update_all() periodically. * * @param[inout] p_rdc_handle Caller provided pointer to rdc_handle_t. Upon * successful call, the value will contain the handler for following API calls. @@ -351,7 +360,7 @@ rdc_status_t rdc_disconnect(rdc_handle_t p_rdc_handle); * * @details This should be executed as part of job prologue. The summary * job stats can be retrieved using rdc_job_get_stats(). - * In RDC_OPERATION_MODE_MANUAL, user must call rdc_update_all_fields(1) + * In RDC_OPERATION_MODE_MANUAL, user must call rdc_field_update_all(1) * at least once, before call rdc_job_get_stats() * * @param[in] p_rdc_handle The RDC handler. @@ -419,7 +428,7 @@ rdc_status_t rdc_job_stop_stats(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_update_all_fields(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_update_all(rdc_handle_t p_rdc_handle, uint32_t wait_for_update); /** @@ -438,7 +447,7 @@ rdc_status_t rdc_update_all_fields(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_get_all_devices(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_device_get_all(rdc_handle_t p_rdc_handle, uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count); /** @@ -455,7 +464,7 @@ rdc_status_t rdc_get_all_devices(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_get_device_attributes(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr); /** @@ -518,6 +527,23 @@ rdc_status_t rdc_group_gpu_add(rdc_handle_t p_rdc_handle, rdc_status_t rdc_group_gpu_get_info(rdc_handle_t p_rdc_handle, rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info); +/** + * @brief Used to get information about all GPU groups in the system. + * + * @details Get the list of GPU group ids in the system. + * + * @param[in] p_rdc_handle The RDC handler. + * + * @param[out] group_id_list Array reference to fill GPU group + * ids in the system. + * + * @param[out] count Number of GPU group returned in group_id_list. + * + * @retval ::RDC_ST_OK is returned upon successful call. + */ +rdc_status_t rdc_group_get_all_ids(rdc_handle_t p_rdc_handle, + rdc_gpu_group_t group_id_list[], uint32_t* count); + /** * @brief Destroy GPU group represented by p_rdc_group_id * @@ -576,6 +602,23 @@ rdc_status_t rdc_group_field_get_info(rdc_handle_t p_rdc_handle, rdc_field_grp_t rdc_field_group_id, rdc_field_group_info_t* field_group_info); +/** + * @brief Used to get information about all field groups in the system. + * + * @details Get the list of field group ids in the system. + * + * @param[in] p_rdc_handle The RDC handler. + * + * @param[out] field_group_id_list Array reference to fill field group + * ids in the system. + * + * @param[out] count Number of field group returned in field_group_id_list. + * + * @retval ::RDC_ST_OK is returned upon successful call. + */ +rdc_status_t rdc_group_field_get_all_ids(rdc_handle_t p_rdc_handle, + rdc_field_grp_t field_group_id_list[], uint32_t* count); + /** * @brief Destroy field group represented by rdc_field_group_id * @@ -596,7 +639,7 @@ rdc_status_t rdc_group_field_destroy(rdc_handle_t p_rdc_handle, * * @details Note that the first update of the field will not occur * until the next field update cycle. To force a field update cycle, - * user must call rdc_update_all_fields(1) + * user must call rdc_field_update_all(1) * * @param[in] p_rdc_handle The RDC handler. * @@ -612,14 +655,14 @@ rdc_status_t rdc_group_field_destroy(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_watch_fields(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples); /** * @brief Request a latest cached field of a GPU * - * @details Note that the field can be cached after called rdc_watch_fields + * @details Note that the field can be cached after called rdc_field_watch * * @param[in] p_rdc_handle The RDC handler. * @@ -631,13 +674,13 @@ rdc_status_t rdc_watch_fields(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_get_latest_value_for_field(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, uint32_t gpu_index, uint32_t field, rdc_field_value* value); /** * @brief Request a history cached field of a GPU * - * @details Note that the field can be cached after called rdc_watch_fields + * @details Note that the field can be cached after called rdc_field_watch * * @param[in] p_rdc_handle The RDC handler. * @@ -655,7 +698,7 @@ rdc_status_t rdc_get_latest_value_for_field(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_get_field_value_since(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle, uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value); @@ -672,7 +715,7 @@ rdc_status_t rdc_get_field_value_since(rdc_handle_t p_rdc_handle, * * @retval ::RDC_ST_OK is returned upon successful call. */ -rdc_status_t rdc_unwatch_fields(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_unwatch(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id); /** diff --git a/include/rdc_lib/RdcCacheManager.h b/include/rdc_lib/RdcCacheManager.h index 34f41461de..9948dd2c5e 100644 --- a/include/rdc_lib/RdcCacheManager.h +++ b/include/rdc_lib/RdcCacheManager.h @@ -34,9 +34,9 @@ namespace rdc { class RdcCacheManager { public: - virtual rdc_status_t rdc_get_latest_value_for_field(uint32_t gpu_index, + virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, uint32_t field, rdc_field_value* value) = 0; - virtual rdc_status_t rdc_get_field_value_since(uint32_t gpu_index, + virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) = 0; virtual rdc_status_t rdc_update_cache(uint32_t gpu_index, diff --git a/include/rdc_lib/RdcGroupSettings.h b/include/rdc_lib/RdcGroupSettings.h index 12d45e9530..2dd0d321ee 100644 --- a/include/rdc_lib/RdcGroupSettings.h +++ b/include/rdc_lib/RdcGroupSettings.h @@ -40,6 +40,9 @@ class RdcGroupSettings { virtual rdc_status_t rdc_group_gpu_get_info( rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info) = 0; + virtual rdc_status_t rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) = 0; + virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids, uint32_t* field_ids, const char* field_group_name, @@ -49,6 +52,8 @@ class RdcGroupSettings { virtual rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, rdc_field_group_info_t* field_group_info) = 0; + virtual rdc_status_t rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) = 0; virtual ~RdcGroupSettings() {} }; diff --git a/include/rdc_lib/RdcHandler.h b/include/rdc_lib/RdcHandler.h index 74db05a53f..7c5d927641 100644 --- a/include/rdc_lib/RdcHandler.h +++ b/include/rdc_lib/RdcHandler.h @@ -41,9 +41,9 @@ class RdcHandler { // Discovery API - virtual rdc_status_t rdc_get_all_devices( + virtual rdc_status_t rdc_device_get_all( uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) = 0; - virtual rdc_status_t rdc_get_device_attributes(uint32_t gpu_index, + virtual rdc_status_t rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) = 0; // Group API @@ -59,25 +59,29 @@ class RdcHandler { rdc_field_group_info_t* field_group_info) = 0; virtual rdc_status_t rdc_group_gpu_get_info( rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info) = 0; + virtual rdc_status_t rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) = 0; + virtual rdc_status_t rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) = 0; virtual rdc_status_t rdc_group_gpu_destroy( rdc_gpu_group_t p_rdc_group_id) = 0; virtual rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) = 0; // Field API - virtual rdc_status_t rdc_watch_fields(rdc_gpu_group_t group_id, + virtual rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) = 0; - virtual rdc_status_t rdc_get_latest_value_for_field(uint32_t gpu_index, + virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, uint32_t field, rdc_field_value* value) = 0; - virtual rdc_status_t rdc_get_field_value_since(uint32_t gpu_index, + virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) = 0; - virtual rdc_status_t rdc_unwatch_fields(rdc_gpu_group_t group_id, + virtual rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) = 0; // Control API - virtual rdc_status_t rdc_update_all_fields(uint32_t wait_for_update) = 0; + virtual rdc_status_t rdc_field_update_all(uint32_t wait_for_update) = 0; virtual ~RdcHandler(){} }; diff --git a/include/rdc_lib/RdcWatchTable.h b/include/rdc_lib/RdcWatchTable.h index 595eb07672..67cbe61785 100644 --- a/include/rdc_lib/RdcWatchTable.h +++ b/include/rdc_lib/RdcWatchTable.h @@ -33,7 +33,7 @@ namespace rdc { class RdcWatchTable { public: - virtual rdc_status_t rdc_update_all_fields() = 0; + virtual rdc_status_t rdc_field_update_all() = 0; virtual rdc_status_t rdc_job_start_stats(rdc_gpu_group_t group_id, char job_id[64]) = 0; @@ -41,10 +41,10 @@ class RdcWatchTable { uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) = 0; - virtual rdc_status_t rdc_watch_fields(rdc_gpu_group_t group_id, + virtual rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) = 0; - virtual rdc_status_t rdc_unwatch_fields(rdc_gpu_group_t group_id, + virtual rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) = 0; virtual ~RdcWatchTable() {} diff --git a/include/rdc_lib/impl/RdcCacheManagerImpl.h b/include/rdc_lib/impl/RdcCacheManagerImpl.h index c4445076c9..454554e4aa 100644 --- a/include/rdc_lib/impl/RdcCacheManagerImpl.h +++ b/include/rdc_lib/impl/RdcCacheManagerImpl.h @@ -44,9 +44,9 @@ typedef std::map> RdcCacheSamples; class RdcCacheManagerImpl: public RdcCacheManager { public: - rdc_status_t rdc_get_latest_value_for_field(uint32_t gpu_index, + rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, uint32_t field, rdc_field_value* value) override; - rdc_status_t rdc_get_field_value_since(uint32_t gpu_index, uint32_t field, + rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; rdc_status_t rdc_update_cache(uint32_t gpu_index, diff --git a/include/rdc_lib/impl/RdcEmbeddedHandler.h b/include/rdc_lib/impl/RdcEmbeddedHandler.h index 28ca4ea54b..8c5c2cf008 100644 --- a/include/rdc_lib/impl/RdcEmbeddedHandler.h +++ b/include/rdc_lib/impl/RdcEmbeddedHandler.h @@ -43,9 +43,9 @@ class RdcEmbeddedHandler: public RdcHandler { rdc_status_t rdc_job_stop_stats(char job_id[64]) override; // Discovery API - rdc_status_t rdc_get_all_devices( + rdc_status_t rdc_device_get_all( uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) override; - rdc_status_t rdc_get_device_attributes(uint32_t gpu_index, + rdc_status_t rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) override; // Group API @@ -62,25 +62,29 @@ class RdcEmbeddedHandler: public RdcHandler { rdc_field_group_info_t* field_group_info) override; rdc_status_t rdc_group_gpu_get_info(rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info) override; + rdc_status_t rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) override; + rdc_status_t rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) override; rdc_status_t rdc_group_gpu_destroy( rdc_gpu_group_t p_rdc_group_id) override; rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) override; // Field API - rdc_status_t rdc_watch_fields(rdc_gpu_group_t group_id, + rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; - rdc_status_t rdc_get_latest_value_for_field(uint32_t gpu_index, + rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, uint32_t field, rdc_field_value* value) override; - rdc_status_t rdc_get_field_value_since(uint32_t gpu_index, + rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; - rdc_status_t rdc_unwatch_fields(rdc_gpu_group_t group_id, + rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; // Control API - rdc_status_t rdc_update_all_fields(uint32_t wait_for_update) override; + rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override; explicit RdcEmbeddedHandler(rdc_operation_mode_t op_mode); ~RdcEmbeddedHandler(); diff --git a/include/rdc_lib/impl/RdcGroupSettingsImpl.h b/include/rdc_lib/impl/RdcGroupSettingsImpl.h index bd94629e0b..9480d2e7fe 100644 --- a/include/rdc_lib/impl/RdcGroupSettingsImpl.h +++ b/include/rdc_lib/impl/RdcGroupSettingsImpl.h @@ -25,7 +25,7 @@ THE SOFTWARE. #include #include -#include +#include // NOLINT #include #include "rdc_lib/RdcGroupSettings.h" @@ -42,6 +42,8 @@ class RdcGroupSettingsImpl: public RdcGroupSettings { uint32_t gpu_index) override; rdc_status_t rdc_group_gpu_get_info(rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info) override; + rdc_status_t rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) override; rdc_status_t rdc_group_field_create(uint32_t num_field_ids, uint32_t* field_ids, const char* field_group_name, @@ -51,6 +53,8 @@ class RdcGroupSettingsImpl: public RdcGroupSettings { rdc_status_t rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, rdc_field_group_info_t* field_group_info) override; + rdc_status_t rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) override; RdcGroupSettingsImpl(); diff --git a/include/rdc_lib/impl/RdcStandaloneHandler.h b/include/rdc_lib/impl/RdcStandaloneHandler.h index b64a3311a5..6e32a57c02 100644 --- a/include/rdc_lib/impl/RdcStandaloneHandler.h +++ b/include/rdc_lib/impl/RdcStandaloneHandler.h @@ -40,9 +40,9 @@ class RdcStandaloneHandler: public RdcHandler { rdc_status_t rdc_job_stop_stats(char job_id[64]) override; // Discovery RdcAPI - rdc_status_t rdc_get_all_devices( + rdc_status_t rdc_device_get_all( uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) override; - rdc_status_t rdc_get_device_attributes(uint32_t gpu_index, + rdc_status_t rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) override; // Group RdcAPI @@ -59,25 +59,29 @@ class RdcStandaloneHandler: public RdcHandler { rdc_field_group_info_t* field_group_info) override; rdc_status_t rdc_group_gpu_get_info(rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info) override; + rdc_status_t rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) override; + rdc_status_t rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) override; rdc_status_t rdc_group_gpu_destroy( rdc_gpu_group_t p_rdc_group_id) override; rdc_status_t rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) override; // Field RdcAPI - rdc_status_t rdc_watch_fields(rdc_gpu_group_t group_id, + rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; - rdc_status_t rdc_get_latest_value_for_field(uint32_t gpu_index, + rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index, uint32_t field, rdc_field_value* value) override; - rdc_status_t rdc_get_field_value_since(uint32_t gpu_index, + rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) override; - rdc_status_t rdc_unwatch_fields(rdc_gpu_group_t group_id, + rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; // Control RdcAPI - rdc_status_t rdc_update_all_fields(uint32_t wait_for_update) override; + rdc_status_t rdc_field_update_all(uint32_t wait_for_update) override; explicit RdcStandaloneHandler(const char* ip_and_port, const char* root_ca, const char* client_cert, const char* client_key); diff --git a/include/rdc_lib/impl/RdcWatchTableImpl.h b/include/rdc_lib/impl/RdcWatchTableImpl.h index 7a4fb39c64..e8f2568c2f 100644 --- a/include/rdc_lib/impl/RdcWatchTableImpl.h +++ b/include/rdc_lib/impl/RdcWatchTableImpl.h @@ -54,14 +54,14 @@ class RdcWatchTableImpl : public RdcWatchTable { uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; - rdc_status_t rdc_watch_fields(rdc_gpu_group_t group_id, + rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) override; - //!< rdc_unwatch_fields() will not remove the entry from watch_table. + //!< rdc_field_unwatch() will not remove the entry from watch_table. //!< The unwatched entry is still kept until the max_keep_age of the entry //!< is reached, which will be handled in the clean_up() function. - rdc_status_t rdc_unwatch_fields(rdc_gpu_group_t group_id, + rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) override; //!< When the RDC is running as RDC_OPERATION_MODE_MANUAL, the user will @@ -71,7 +71,7 @@ class RdcWatchTableImpl : public RdcWatchTable { //!< This function may be called very frequently, and the cache cleanup //!< is expensive. Internally, this function will throttle the cleanup to //!< once per second. - rdc_status_t rdc_update_all_fields() override; + rdc_status_t rdc_field_update_all() override; RdcWatchTableImpl(const RdcGroupSettingsPtr& group_settings, const RdcCacheManagerPtr& cache_mgr, @@ -97,7 +97,7 @@ class RdcWatchTableImpl : public RdcWatchTable { std::map watch_table_; //!< The settings for each field can be deduced from watch_table. But every - //!< rdc_update_all_fields() call needs to deduce them. To improve the + //!< rdc_field_update_all() call needs to deduce them. To improve the //!< performance, the fields_to_watch_ is used to track the field settings. //!< Those settings will only be updated when watching or unwatching. std::map fields_to_watch_; diff --git a/protos/rdc.proto b/protos/rdc.proto index b0a6dc313d..16399a3793 100755 --- a/protos/rdc.proto +++ b/protos/rdc.proto @@ -190,6 +190,12 @@ service RdcAPI { // rdc_status_t rdc_update_all_fields(uint32_t wait_for_update) rpc UpdateAllFields(UpdateAllFieldsRequest) returns (UpdateAllFieldsResponse) {} + + // rdc_status_t rdc_group_get_all_ids(rdc_gpu_group_t group_id_list[], uint32_t* count) + rpc GetGroupAllIds(Empty) returns (GetGroupAllIdsResponse) {} + + // rdc_status_t rdc_group_field_all_ids(rdc_field_grp_t field_group_id_list[], uint32_t* count) + rpc GetFieldGroupAllIds(Empty) returns (GetFieldGroupAllIdsResponse) {} } message Empty { @@ -235,9 +241,10 @@ message AddToGpuGroupRequest { message AddToGpuGroupResponse { uint32 status = 1; } + message CreateFieldGroupRequest { repeated uint32 field_ids = 1; - string filed_group_name = 2; + string field_group_name = 2; } message CreateFieldGroupResponse { @@ -359,3 +366,14 @@ message UpdateAllFieldsRequest { message UpdateAllFieldsResponse { uint32 status = 1; } + +message GetGroupAllIdsResponse { + uint32 status = 1; + repeated uint32 group_ids = 2; +} + + +message GetFieldGroupAllIdsResponse { + uint32 status = 1; + repeated uint32 field_group_ids = 2; +} diff --git a/rdc_libs/bootstrap/src/RdcBootStrap.cc b/rdc_libs/bootstrap/src/RdcBootStrap.cc index 46ef1832a6..96a5c5cd85 100644 --- a/rdc_libs/bootstrap/src/RdcBootStrap.cc +++ b/rdc_libs/bootstrap/src/RdcBootStrap.cc @@ -123,14 +123,14 @@ rdc_status_t rdc_stop_embedded(rdc_handle_t p_rdc_handle) { return RDC_ST_OK; } -rdc_status_t rdc_update_all_fields(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_update_all(rdc_handle_t p_rdc_handle, uint32_t wait_for_update) { if (!p_rdc_handle) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle)-> - rdc_update_all_fields(wait_for_update); + rdc_field_update_all(wait_for_update); } rdc_status_t rdc_job_get_stats(rdc_handle_t p_rdc_handle, char job_id[64] , @@ -186,24 +186,24 @@ rdc_status_t rdc_group_gpu_add(rdc_handle_t p_rdc_handle, rdc_group_gpu_add(groupId, gpuIndex); } -rdc_status_t rdc_get_all_devices(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_device_get_all(rdc_handle_t p_rdc_handle, uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) { if (!p_rdc_handle || !count) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle)-> - rdc_get_all_devices(gpu_index_list, count); + rdc_device_get_all(gpu_index_list, count); } -rdc_status_t rdc_get_device_attributes(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle, uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) { if (!p_rdc_handle || !p_rdc_attr) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle)-> - rdc_get_device_attributes(gpu_index, p_rdc_attr); + rdc_device_get_attributes(gpu_index, p_rdc_attr); } rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle, @@ -240,7 +240,27 @@ rdc_status_t rdc_group_gpu_get_info(rdc_handle_t p_rdc_handle, rdc_group_gpu_get_info(p_rdc_group_id, p_rdc_group_info); } -rdc_status_t rdc_watch_fields(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_group_get_all_ids(rdc_handle_t p_rdc_handle, + rdc_gpu_group_t group_id_list[], uint32_t* count) { + if (!p_rdc_handle || !count) { + return RDC_ST_INVALID_HANDLER; + } + + return static_cast(p_rdc_handle)-> + rdc_group_get_all_ids(group_id_list, count); +} + +rdc_status_t rdc_group_field_get_all_ids(rdc_handle_t p_rdc_handle, + rdc_field_grp_t field_group_id_list[], uint32_t* count) { + if (!p_rdc_handle || !count) { + return RDC_ST_INVALID_HANDLER; + } + + return static_cast(p_rdc_handle)-> + rdc_group_field_get_all_ids(field_group_id_list, count); +} + +rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) { if (!p_rdc_handle) { @@ -248,21 +268,21 @@ rdc_status_t rdc_watch_fields(rdc_handle_t p_rdc_handle, } return static_cast(p_rdc_handle)-> - rdc_watch_fields(group_id, field_group_id, update_freq, + rdc_field_watch(group_id, field_group_id, update_freq, max_keep_age, max_keep_samples); } -rdc_status_t rdc_get_latest_value_for_field(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle, uint32_t gpu_index, uint32_t field, rdc_field_value* value) { if (!p_rdc_handle || !value) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle)-> - rdc_get_latest_value_for_field(gpu_index, field, value); + rdc_field_get_latest_value(gpu_index, field, value); } -rdc_status_t rdc_get_field_value_since(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle, uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!p_rdc_handle || !next_since_time_stamp || !value) { @@ -270,18 +290,18 @@ rdc_status_t rdc_get_field_value_since(rdc_handle_t p_rdc_handle, } return static_cast(p_rdc_handle)-> - rdc_get_field_value_since(gpu_index, field, since_time_stamp, + rdc_field_get_value_since(gpu_index, field, since_time_stamp, next_since_time_stamp, value); } -rdc_status_t rdc_unwatch_fields(rdc_handle_t p_rdc_handle, +rdc_status_t rdc_field_unwatch(rdc_handle_t p_rdc_handle, rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) { if (!p_rdc_handle) { return RDC_ST_INVALID_HANDLER; } return static_cast(p_rdc_handle)-> - rdc_unwatch_fields(group_id, field_group_id); + rdc_field_unwatch(group_id, field_group_id); } rdc_status_t rdc_group_gpu_destroy(rdc_handle_t p_rdc_handle, diff --git a/rdc_libs/rdc/src/RdcCacheManagerImpl.cc b/rdc_libs/rdc/src/RdcCacheManagerImpl.cc index f9607fcabd..9834fa4725 100644 --- a/rdc_libs/rdc/src/RdcCacheManagerImpl.cc +++ b/rdc_libs/rdc/src/RdcCacheManagerImpl.cc @@ -28,7 +28,7 @@ THE SOFTWARE. namespace amd { namespace rdc { -rdc_status_t RdcCacheManagerImpl::rdc_get_field_value_since( +rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since( uint32_t gpu_index, uint32_t field_id, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { @@ -104,7 +104,7 @@ rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index, return RDC_ST_OK; } -rdc_status_t RdcCacheManagerImpl::rdc_get_latest_value_for_field( +rdc_status_t RdcCacheManagerImpl::rdc_field_get_latest_value( uint32_t gpu_index, uint32_t field_id, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; diff --git a/rdc_libs/rdc/src/RdcEmbeddedHandler.cc b/rdc_libs/rdc/src/RdcEmbeddedHandler.cc index 0314d45733..8540fccfc7 100644 --- a/rdc_libs/rdc/src/RdcEmbeddedHandler.cc +++ b/rdc_libs/rdc/src/RdcEmbeddedHandler.cc @@ -111,7 +111,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_job_stop_stats(char job_id[64] ) { // Discovery API -rdc_status_t RdcEmbeddedHandler::rdc_get_all_devices( +rdc_status_t RdcEmbeddedHandler::rdc_device_get_all( uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) { if (!count) { return RDC_ST_BAD_PARAMETER; @@ -132,7 +132,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_get_all_devices( return RDC_ST_OK; } -rdc_status_t RdcEmbeddedHandler::rdc_get_device_attributes(uint32_t gpu_index, +rdc_status_t RdcEmbeddedHandler::rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) { if (!p_rdc_attr) { return RDC_ST_BAD_PARAMETER; @@ -164,7 +164,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_create(rdc_group_type_t type, // Add All GPUs to the group uint32_t count = 0; uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES]; - status = rdc_get_all_devices( + status = rdc_device_get_all( gpu_index_list, &count); if (status != RDC_ST_OK) { return status; @@ -180,7 +180,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id, uint32_t gpu_index) { uint32_t count = 0; uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES]; - rdc_status_t status = rdc_get_all_devices( + rdc_status_t status = rdc_device_get_all( gpu_index_list, &count); if (status != RDC_ST_OK) { return status; @@ -243,6 +243,24 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_get_info( p_rdc_group_id, p_rdc_group_info); } +rdc_status_t RdcEmbeddedHandler::rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) { + if (!count) { + return RDC_ST_BAD_PARAMETER; + } + return group_settings_->rdc_group_get_all_ids(group_id_list, count); +} + +rdc_status_t RdcEmbeddedHandler::rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) { + if (!count) { + return RDC_ST_BAD_PARAMETER; + } + return group_settings_->rdc_group_field_get_all_ids( + field_group_id_list, count); +} + + rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_destroy( rdc_gpu_group_t p_rdc_group_id) { return group_settings_->rdc_group_gpu_destroy(p_rdc_group_id); @@ -254,14 +272,14 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_field_destroy( } // Field API -rdc_status_t RdcEmbeddedHandler::rdc_watch_fields(rdc_gpu_group_t group_id, +rdc_status_t RdcEmbeddedHandler::rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) { - return watch_table_->rdc_watch_fields(group_id, field_group_id, + return watch_table_->rdc_field_watch(group_id, field_group_id, update_freq, max_keep_age, max_keep_samples); } -rdc_status_t RdcEmbeddedHandler::rdc_get_latest_value_for_field( +rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value( uint32_t gpu_index, uint32_t field, rdc_field_value* value) { if (!value) { return RDC_ST_BAD_PARAMETER; @@ -269,10 +287,10 @@ rdc_status_t RdcEmbeddedHandler::rdc_get_latest_value_for_field( if (!metric_fetcher_->is_field_valid(field)) { return RDC_ST_NOT_SUPPORTED; } - return cache_mgr_->rdc_get_latest_value_for_field(gpu_index, field, value); + return cache_mgr_->rdc_field_get_latest_value(gpu_index, field, value); } -rdc_status_t RdcEmbeddedHandler::rdc_get_field_value_since(uint32_t gpu_index, +rdc_status_t RdcEmbeddedHandler::rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { if (!next_since_time_stamp || !value) { @@ -281,21 +299,21 @@ rdc_status_t RdcEmbeddedHandler::rdc_get_field_value_since(uint32_t gpu_index, if (!metric_fetcher_->is_field_valid(field)) { return RDC_ST_NOT_SUPPORTED; } - return cache_mgr_->rdc_get_field_value_since(gpu_index, field, + return cache_mgr_->rdc_field_get_value_since(gpu_index, field, since_time_stamp, next_since_time_stamp, value); } -rdc_status_t RdcEmbeddedHandler::rdc_unwatch_fields(rdc_gpu_group_t group_id, +rdc_status_t RdcEmbeddedHandler::rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) { - return watch_table_->rdc_unwatch_fields(group_id, field_group_id); + return watch_table_->rdc_field_unwatch(group_id, field_group_id); } // Control API -rdc_status_t RdcEmbeddedHandler::rdc_update_all_fields( +rdc_status_t RdcEmbeddedHandler::rdc_field_update_all( uint32_t wait_for_update) { // TODO(bill_liu): implement the case wait_for_update==0 (void)(wait_for_update); - return watch_table_->rdc_update_all_fields(); + return watch_table_->rdc_field_update_all(); } } // namespace rdc diff --git a/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc b/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc index 77b9f6ffe9..21d57f4136 100644 --- a/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc +++ b/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc @@ -36,6 +36,9 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create( ginfo.count = 0; std::lock_guard guard(group_mutex_); + if (gpu_group_.size() >= RDC_MAX_NUM_GROUPS) { + return RDC_ST_MAX_LIMIT; + } gpu_group_.emplace(cur_group_id_, ginfo); *p_rdc_group_id = cur_group_id_; cur_group_id_++; @@ -94,6 +97,26 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_get_info( return RDC_ST_OK; } +rdc_status_t RdcGroupSettingsImpl::rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) { + if (!count) { + return RDC_ST_BAD_PARAMETER; + } + + *count = 0; + std::lock_guard guard(group_mutex_); + auto ite = gpu_group_.begin(); + for (; ite != gpu_group_.end(); ite++) { + if (*count >= RDC_MAX_NUM_GROUPS) { + return RDC_ST_MAX_LIMIT; + } + group_id_list[*count] = ite->first; + (*count)++; + } + + return RDC_ST_OK; +} + rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create( uint32_t num_field_ids, uint32_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { @@ -110,6 +133,9 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create( } std::lock_guard guard(field_group_mutex_); + if (field_group_.size() >= RDC_MAX_NUM_FIELD_GROUPS) { + return RDC_ST_MAX_LIMIT; + } field_group_.emplace(cur_filed_group_id_, finfo); *rdc_field_group_id = cur_filed_group_id_; cur_filed_group_id_++; @@ -144,6 +170,26 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_field_get_info( return RDC_ST_OK; } +rdc_status_t RdcGroupSettingsImpl::rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) { + if (!count) { + return RDC_ST_BAD_PARAMETER; + } + + *count = 0; + std::lock_guard guard(field_group_mutex_); + auto ite = field_group_.begin(); + for (; ite != field_group_.end(); ite++) { + if (*count >= RDC_MAX_NUM_FIELD_GROUPS) { + return RDC_ST_MAX_LIMIT; + } + field_group_id_list[*count] = ite->first; + (*count)++; + } + + return RDC_ST_OK; +} + } // namespace rdc } // namespace amd diff --git a/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc b/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc index ee5169c339..d00310d9c2 100644 --- a/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc +++ b/rdc_libs/rdc/src/RdcMetricsUpdaterImpl.cc @@ -43,7 +43,7 @@ void RdcMetricsUpdaterImpl::start() { started_ = true; updater_ = std::async(std::launch::async, [this](){ while (started_) { - watch_table_->rdc_update_all_fields(); + watch_table_->rdc_field_update_all(); std::this_thread::sleep_for( std::chrono::microseconds(_check_frequency)); } diff --git a/rdc_libs/rdc/src/RdcWatchTableImpl.cc b/rdc_libs/rdc/src/RdcWatchTableImpl.cc index ece156a9f6..979e5eb536 100644 --- a/rdc_libs/rdc/src/RdcWatchTableImpl.cc +++ b/rdc_libs/rdc/src/RdcWatchTableImpl.cc @@ -83,7 +83,7 @@ rdc_status_t RdcWatchTableImpl::get_fields_from_group(rdc_gpu_group_t group_id, } -rdc_status_t RdcWatchTableImpl::rdc_watch_fields(rdc_gpu_group_t group_id, +rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) { std::lock_guard guard(watch_mutex_); @@ -209,7 +209,7 @@ rdc_status_t RdcWatchTableImpl::update_field_in_table_when_unwatch( return RDC_ST_OK; } -rdc_status_t RdcWatchTableImpl::rdc_unwatch_fields( +rdc_status_t RdcWatchTableImpl::rdc_field_unwatch( rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) { struct timeval tv; gettimeofday(&tv, NULL); @@ -229,7 +229,7 @@ rdc_status_t RdcWatchTableImpl::rdc_unwatch_fields( } -rdc_status_t RdcWatchTableImpl::rdc_update_all_fields() { +rdc_status_t RdcWatchTableImpl::rdc_field_update_all() { uint32_t items_fetched = 0; rdc_status_t result; diff --git a/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc b/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc index 7d24fe5f94..810cfb8c99 100644 --- a/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc +++ b/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc @@ -90,7 +90,7 @@ rdc_status_t RdcStandaloneHandler::rdc_job_stop_stats(char job_id[64] ) { // Discovery RdcAPI -rdc_status_t RdcStandaloneHandler::rdc_get_all_devices( +rdc_status_t RdcStandaloneHandler::rdc_device_get_all( uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) { if (!count) { return RDC_ST_BAD_PARAMETER; @@ -115,7 +115,7 @@ rdc_status_t RdcStandaloneHandler::rdc_get_all_devices( return RDC_ST_OK; } -rdc_status_t RdcStandaloneHandler::rdc_get_device_attributes(uint32_t gpu_index, +rdc_status_t RdcStandaloneHandler::rdc_device_get_attributes(uint32_t gpu_index, rdc_device_attributes_t* p_rdc_attr) { if (!p_rdc_attr) { return RDC_ST_BAD_PARAMETER; @@ -142,118 +142,319 @@ rdc_status_t RdcStandaloneHandler::rdc_get_device_attributes(uint32_t gpu_index, rdc_status_t RdcStandaloneHandler::rdc_group_gpu_create(rdc_group_type_t type, const char* group_name, rdc_gpu_group_t* p_rdc_group_id) { - // TODO(bill_liu): implement - (void)(type); - (void)(group_name); - (void)(p_rdc_group_id); + if (!group_name || !p_rdc_group_id) { + return RDC_ST_BAD_PARAMETER; + } + + ::rdc::CreateGpuGroupRequest request; + ::rdc::CreateGpuGroupResponse reply; + ::grpc::ClientContext context; + + request.set_type( + static_cast<::rdc::CreateGpuGroupRequest_GpuGroupType>(type)); + request.set_group_name(group_name); + ::grpc::Status status = stub_-> + CreateGpuGroup(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + *p_rdc_group_id = reply.group_id(); + return RDC_ST_OK; } rdc_status_t RdcStandaloneHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id, uint32_t gpu_index) { - // TODO(bill_liu): implement - (void)(group_id); - (void)(gpu_index); - return RDC_ST_OK; + ::rdc::AddToGpuGroupRequest request; + ::rdc::AddToGpuGroupResponse reply; + ::grpc::ClientContext context; + + request.set_group_id(group_id); + request.set_gpu_index(gpu_index); + ::grpc::Status status = stub_-> + AddToGpuGroup(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + + return err_status; } rdc_status_t RdcStandaloneHandler::rdc_group_field_create( uint32_t num_field_ids, uint32_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { - // TODO(bill_liu): implement - (void)(num_field_ids); - (void)(field_ids); - (void)(field_group_name); - (void)(rdc_field_group_id); + if (!field_ids || !field_group_name || !rdc_field_group_id) { + return RDC_ST_BAD_PARAMETER; + } + + ::rdc::CreateFieldGroupRequest request; + ::rdc::CreateFieldGroupResponse reply; + ::grpc::ClientContext context; + + request.set_field_group_name(field_group_name); + for (uint32_t i = 0; i < num_field_ids; i++){ + request.add_field_ids(field_ids[i]); + } + + ::grpc::Status status = stub_-> + CreateFieldGroup(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + *rdc_field_group_id = reply.field_group_id(); + return RDC_ST_OK; } rdc_status_t RdcStandaloneHandler::rdc_group_field_get_info( rdc_field_grp_t rdc_field_group_id, rdc_field_group_info_t* field_group_info) { - // TODO(bill_liu): implement - (void)(rdc_field_group_id); - (void)(field_group_info); + if (!field_group_info) { + return RDC_ST_BAD_PARAMETER; + } + + ::rdc::GetFieldGroupInfoRequest request; + ::rdc::GetFieldGroupInfoResponse reply; + ::grpc::ClientContext context; + + request.set_field_group_id(rdc_field_group_id); + ::grpc::Status status = stub_-> + GetFieldGroupInfo(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + if (reply.field_ids_size() > RDC_MAX_FIELD_IDS_PER_FIELD_GROUP) { + return RDC_ST_MAX_LIMIT; + } + + field_group_info->count = reply.field_ids_size(); + strncpy_with_null(field_group_info->group_name, + reply.filed_group_name().c_str(), RDC_MAX_STR_LENGTH); + for (int i = 0; i < reply.field_ids_size(); i++) { + field_group_info->field_ids[i] = reply.field_ids(i); + } + return RDC_ST_OK; } rdc_status_t RdcStandaloneHandler::rdc_group_gpu_get_info( rdc_gpu_group_t p_rdc_group_id, rdc_group_info_t* p_rdc_group_info) { - // TODO(bill_liu): implement - (void)(p_rdc_group_id); - (void)(p_rdc_group_info); + if (!p_rdc_group_info) { + return RDC_ST_BAD_PARAMETER; + } + + ::rdc::GetGpuGroupInfoRequest request; + ::rdc::GetGpuGroupInfoResponse reply; + ::grpc::ClientContext context; + + request.set_group_id(p_rdc_group_id); + ::grpc::Status status = stub_-> + GetGpuGroupInfo(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + if (reply.entity_ids_size() > RDC_GROUP_MAX_ENTITIES) { + return RDC_ST_MAX_LIMIT; + } + + p_rdc_group_info->count = reply.entity_ids_size(); + strncpy_with_null(p_rdc_group_info->group_name, + reply.group_name().c_str(), RDC_MAX_STR_LENGTH); + for (int i = 0; i < reply.entity_ids_size(); i++) { + p_rdc_group_info->entity_ids[i] = reply.entity_ids(i); + } + + return RDC_ST_OK; +} + +rdc_status_t RdcStandaloneHandler::rdc_group_get_all_ids( + rdc_gpu_group_t group_id_list[], uint32_t* count) { + if (!count) { + return RDC_ST_BAD_PARAMETER; + } + ::rdc::Empty request; + ::rdc::GetGroupAllIdsResponse reply; + ::grpc::ClientContext context; + + ::grpc::Status status = stub_-> + GetGroupAllIds(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + *count = reply.group_ids_size(); + if (*count >= RDC_MAX_NUM_GROUPS) { + return RDC_ST_MAX_LIMIT; + } + for (uint32_t i =0 ; i < *count; i++) { + group_id_list[i] = reply.group_ids(i); + } + + return RDC_ST_OK; +} + +rdc_status_t RdcStandaloneHandler::rdc_group_field_get_all_ids( + rdc_field_grp_t field_group_id_list[], uint32_t* count) { + if (!count) { + return RDC_ST_BAD_PARAMETER; + } + + ::rdc::Empty request; + ::rdc::GetFieldGroupAllIdsResponse reply; + ::grpc::ClientContext context; + + ::grpc::Status status = stub_-> + GetFieldGroupAllIds(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + *count = reply.field_group_ids_size(); + if (*count >= RDC_MAX_NUM_FIELD_GROUPS) { + return RDC_ST_MAX_LIMIT; + } + for (uint32_t i =0 ; i < *count; i++) { + field_group_id_list[i] = reply.field_group_ids(i); + } + return RDC_ST_OK; } rdc_status_t RdcStandaloneHandler::rdc_group_gpu_destroy( rdc_gpu_group_t p_rdc_group_id) { - // TODO(bill_liu): implement - (void)(p_rdc_group_id); - return RDC_ST_OK; + ::rdc::DestroyGpuGroupRequest request; + ::rdc::DestroyGpuGroupResponse reply; + ::grpc::ClientContext context; + + request.set_group_id(p_rdc_group_id); + ::grpc::Status status = stub_-> + DestroyGpuGroup(&context, request, &reply); + return error_handle(status, reply.status()); } rdc_status_t RdcStandaloneHandler::rdc_group_field_destroy( rdc_field_grp_t rdc_field_group_id) { - // TODO(bill_liu): implement - (void)(rdc_field_group_id); - return RDC_ST_OK; + ::rdc::DestroyFieldGroupRequest request; + ::rdc::DestroyFieldGroupResponse reply; + ::grpc::ClientContext context; + + request.set_field_group_id(rdc_field_group_id); + ::grpc::Status status = stub_-> + DestroyFieldGroup(&context, request, &reply); + return error_handle(status, reply.status()); } // Field RdcAPI -rdc_status_t RdcStandaloneHandler::rdc_watch_fields(rdc_gpu_group_t group_id, +rdc_status_t RdcStandaloneHandler::rdc_field_watch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id, uint64_t update_freq, double max_keep_age, uint32_t max_keep_samples) { - // TODO(bill_liu): implement - (void)(group_id); - (void)(field_group_id); - (void)(update_freq); - (void)(max_keep_age); - (void)(max_keep_samples); - return RDC_ST_OK; + ::rdc::WatchFieldsRequest request; + ::rdc::WatchFieldsResponse reply; + ::grpc::ClientContext context; + + request.set_group_id(group_id); + request.set_field_group_id(field_group_id); + request.set_update_freq(update_freq); + request.set_max_keep_age(max_keep_age); + request.set_max_keep_samples(max_keep_samples); + ::grpc::Status status = stub_-> + WatchFields(&context, request, &reply); + + return error_handle(status, reply.status()); } -rdc_status_t RdcStandaloneHandler::rdc_get_latest_value_for_field( +rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value( uint32_t gpu_index, uint32_t field, rdc_field_value* value) { - // TODO(bill_liu): implement if (!value) { return RDC_ST_BAD_PARAMETER; } - (void)(gpu_index); - (void)(field); - return RDC_ST_NOT_FOUND; + + ::rdc::GetLatestFieldValueRequest request; + ::rdc::GetLatestFieldValueResponse reply; + ::grpc::ClientContext context; + + request.set_gpu_index(gpu_index); + request.set_field_id(field); + ::grpc::Status status = stub_-> + GetLatestFieldValue(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + value->field_id = reply.field_id(); + value->status = reply.rdc_status(); + value->ts = reply.ts(); + value->type = static_cast(reply.type()); + if (value->type == INTEGER) { + value->value.l_int = reply.l_int(); + } else if (value->type == DOUBLE) { + value->value.dbl = reply.dbl(); + } else if (value->type == STRING || value->type == BLOB) { + strncpy_with_null(value->value.str, + reply.str().c_str(), RDC_MAX_STR_LENGTH); + } + + return RDC_ST_OK; } -rdc_status_t RdcStandaloneHandler::rdc_get_field_value_since(uint32_t gpu_index, +rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp, uint64_t *next_since_time_stamp, rdc_field_value* value) { - // TODO(bill_liu): implement if (!next_since_time_stamp || !value) { return RDC_ST_BAD_PARAMETER; } - (void)(since_time_stamp); - (void)(gpu_index); - (void)(field); - (void)(value); - return RDC_ST_NOT_FOUND; + ::rdc::GetFieldSinceRequest request; + ::rdc::GetFieldSinceResponse reply; + ::grpc::ClientContext context; + + request.set_gpu_index(gpu_index); + request.set_field_id(field); + request.set_since_time_stamp(since_time_stamp); + ::grpc::Status status = stub_-> + GetFieldSince(&context, request, &reply); + rdc_status_t err_status = error_handle(status, reply.status()); + if (err_status != RDC_ST_OK) return err_status; + + value->field_id = reply.field_id(); + value->status = reply.rdc_status(); + value->ts = reply.ts(); + value->type = static_cast(reply.type()); + if (value->type == INTEGER) { + value->value.l_int = reply.l_int(); + } else if (value->type == DOUBLE) { + value->value.dbl = reply.dbl(); + } else if (value->type == STRING || value->type == BLOB) { + strncpy_with_null(value->value.str, + reply.str().c_str(), RDC_MAX_STR_LENGTH); + } + *next_since_time_stamp = reply.next_since_time_stamp(); + + return RDC_ST_OK; } -rdc_status_t RdcStandaloneHandler::rdc_unwatch_fields(rdc_gpu_group_t group_id, +rdc_status_t RdcStandaloneHandler::rdc_field_unwatch(rdc_gpu_group_t group_id, rdc_field_grp_t field_group_id) { - // TODO(bill_liu): implement - (void)(group_id); - (void)(field_group_id); - return RDC_ST_OK; + ::rdc::UnWatchFieldsRequest request; + ::rdc::UnWatchFieldsResponse reply; + ::grpc::ClientContext context; + + request.set_group_id(group_id); + request.set_field_group_id(field_group_id); + ::grpc::Status status = stub_-> + UnWatchFields(&context, request, &reply); + + return error_handle(status, reply.status()); } // Control RdcAPI -rdc_status_t RdcStandaloneHandler::rdc_update_all_fields( +rdc_status_t RdcStandaloneHandler::rdc_field_update_all( uint32_t wait_for_update) { - // TODO(bill_liu): implement - (void)(wait_for_update); - return RDC_ST_OK; + ::rdc::UpdateAllFieldsRequest request; + ::rdc::UpdateAllFieldsResponse reply; + ::grpc::ClientContext context; + + request.set_wait_for_update(wait_for_update); + ::grpc::Status status = stub_-> + UpdateAllFields(&context, request, &reply); + + return error_handle(status, reply.status()); } diff --git a/rdci/src/RdciDiscoverySubSystem.cc b/rdci/src/RdciDiscoverySubSystem.cc index cdfe62fa45..0ae9632bb2 100644 --- a/rdci/src/RdciDiscoverySubSystem.cc +++ b/rdci/src/RdciDiscoverySubSystem.cc @@ -39,13 +39,15 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) { {"host", required_argument, nullptr, HOST_OPTIONS }, {"help", optional_argument, nullptr, 'h' }, {"unauth", optional_argument, nullptr, 'u' }, + {"list", optional_argument, nullptr, 'l' }, { nullptr, 0 , nullptr, 0 } }; int option_index = 0; int opt = 0; + bool is_list = false; - while ((opt = getopt_long(argc, argv, "hu", + while ((opt = getopt_long(argc, argv, "hlu", long_options, &option_index)) != -1) { switch (opt) { case HOST_OPTIONS: @@ -57,6 +59,9 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) { case 'u': use_auth_ = false; break; + case 'l': + is_list = true; + break; default: show_help(); throw RdcException(RDC_ST_BAD_PARAMETER, @@ -64,15 +69,22 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) { } } + if (!is_list) { + show_help(); + throw RdcException(RDC_ST_BAD_PARAMETER, + "Need to specify operations"); + } } void RdciDiscoverySubSystem::show_help() const { std::cout << " discovery -- Used to discover and identify GPUs " << "and their attributes.\n\n"; std::cout << "Usage\n"; - std::cout << " rdci discovery [--host :port] [-u]\n"; + std::cout << " rdci discovery [--host :port] [-u] -l\n"; std::cout << "\nFlags:\n"; show_common_usage(); + std::cout << " -l --list list GPU discovered" + <<" on the system\n"; } @@ -83,7 +95,7 @@ void RdciDiscoverySubSystem::process() { uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES]; uint32_t count = 0; - rdc_status_t result = rdc_get_all_devices(rdc_handle_, + rdc_status_t result = rdc_device_get_all(rdc_handle_, gpu_index_list, &count); if (result != RDC_ST_OK) { throw RdcException(result, "Fail to get device information"); @@ -99,7 +111,7 @@ void RdciDiscoverySubSystem::process() { std::cout << "GPU Index\t Device Information\n"; for (uint32_t i = 0; i < count; i++) { rdc_device_attributes_t attribute; - result = rdc_get_device_attributes(rdc_handle_, + result = rdc_device_get_attributes(rdc_handle_, gpu_index_list[i], &attribute); if (result != RDC_ST_OK) { return; diff --git a/server/include/rdc/rdc_api_service.h b/server/include/rdc/rdc_api_service.h index 9fe05e8b57..50bbe7299c 100755 --- a/server/include/rdc/rdc_api_service.h +++ b/server/include/rdc/rdc_api_service.h @@ -43,6 +43,63 @@ class RdcAPIServiceImpl final : public ::rdc::RdcAPI::Service { const ::rdc::GetDeviceAttributesRequest* request, ::rdc::GetDeviceAttributesResponse* reply) override; + ::grpc::Status CreateGpuGroup(::grpc::ServerContext* context, + const ::rdc::CreateGpuGroupRequest* request, + ::rdc::CreateGpuGroupResponse* reply) override; + + ::grpc::Status AddToGpuGroup(::grpc::ServerContext* context, + const ::rdc::AddToGpuGroupRequest* request, + ::rdc::AddToGpuGroupResponse* reply) override; + + ::grpc::Status GetGpuGroupInfo(::grpc::ServerContext* context, + const ::rdc::GetGpuGroupInfoRequest* request, + ::rdc::GetGpuGroupInfoResponse* reply) override; + + ::grpc::Status GetGroupAllIds(::grpc::ServerContext* context, + const ::rdc::Empty* request, + ::rdc::GetGroupAllIdsResponse* reply) override; + + ::grpc::Status DestroyGpuGroup(::grpc::ServerContext* context, + const ::rdc::DestroyGpuGroupRequest* request, + ::rdc::DestroyGpuGroupResponse* reply) override; + + ::grpc::Status CreateFieldGroup(::grpc::ServerContext* context, + const ::rdc::CreateFieldGroupRequest* request, + ::rdc::CreateFieldGroupResponse* reply) override; + + ::grpc::Status GetFieldGroupInfo(::grpc::ServerContext* context, + const ::rdc::GetFieldGroupInfoRequest* request, + ::rdc::GetFieldGroupInfoResponse* reply) override; + + ::grpc::Status GetFieldGroupAllIds(::grpc::ServerContext* context, + const ::rdc::Empty* request, + ::rdc::GetFieldGroupAllIdsResponse* reply) override; + + ::grpc::Status DestroyFieldGroup(::grpc::ServerContext* context, + const ::rdc::DestroyFieldGroupRequest* request, + ::rdc::DestroyFieldGroupResponse* reply) override; + + ::grpc::Status WatchFields(::grpc::ServerContext* context, + const ::rdc::WatchFieldsRequest* request, + ::rdc::WatchFieldsResponse* reply) override; + + ::grpc::Status GetLatestFieldValue(::grpc::ServerContext* context, + const ::rdc::GetLatestFieldValueRequest* request, + ::rdc::GetLatestFieldValueResponse* reply) override; + + ::grpc::Status GetFieldSince(::grpc::ServerContext* context, + const ::rdc::GetFieldSinceRequest* request, + ::rdc::GetFieldSinceResponse* reply) override; + + ::grpc::Status UnWatchFields(::grpc::ServerContext* context, + const ::rdc::UnWatchFieldsRequest* request, + ::rdc::UnWatchFieldsResponse* reply) override; + + ::grpc::Status UpdateAllFields(::grpc::ServerContext* context, + const ::rdc::UpdateAllFieldsRequest* request, + ::rdc::UpdateAllFieldsResponse* reply) override; + + private: rdc_handle_t rdc_handle_; }; diff --git a/server/src/rdc_api_service.cc b/server/src/rdc_api_service.cc index b6734c8f86..3a184118e5 100755 --- a/server/src/rdc_api_service.cc +++ b/server/src/rdc_api_service.cc @@ -70,7 +70,7 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { } uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES]; uint32_t count = 0; - rdc_status_t result = rdc_get_all_devices(rdc_handle_, + rdc_status_t result = rdc_device_get_all(rdc_handle_, gpu_index_list, &count); reply->set_status(result); if (result != RDC_ST_OK) { @@ -93,7 +93,7 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { } uint32_t gpu_index = request->gpu_index(); rdc_device_attributes_t attribute; - rdc_status_t result = rdc_get_device_attributes(rdc_handle_, + rdc_status_t result = rdc_device_get_attributes(rdc_handle_, gpu_index, &attribute); ::rdc::DeviceAttributes* attr = reply->mutable_attributes(); @@ -104,6 +104,323 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() { return ::grpc::Status::OK; } +::grpc::Status RdcAPIServiceImpl::CreateGpuGroup( + ::grpc::ServerContext* context, + const ::rdc::CreateGpuGroupRequest* request, + ::rdc::CreateGpuGroupResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_gpu_group_t group_id = 0; + rdc_status_t result = rdc_group_gpu_create(rdc_handle_, + static_cast(request->type()), + request->group_name().c_str(), &group_id); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + reply->set_group_id(group_id); + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::AddToGpuGroup( + ::grpc::ServerContext* context, + const ::rdc::AddToGpuGroupRequest* request, + ::rdc::AddToGpuGroupResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_status_t result = rdc_group_gpu_add(rdc_handle_, + request->group_id(), request->gpu_index()); + reply->set_status(result); + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::GetGpuGroupInfo( + ::grpc::ServerContext* context, + const ::rdc::GetGpuGroupInfoRequest* request, + ::rdc::GetGpuGroupInfoResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_group_info_t group_info; + rdc_status_t result = rdc_group_gpu_get_info( + rdc_handle_, request->group_id(), &group_info); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + reply->set_group_name(group_info.group_name); + for (uint32_t i = 0; i < group_info.count; i++) { + reply->add_entity_ids(group_info.entity_ids[i]); + } + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::GetGroupAllIds( + ::grpc::ServerContext* context, + const ::rdc::Empty* request, + ::rdc::GetGroupAllIdsResponse* reply) { + if (!reply || !request || !context) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_gpu_group_t group_id_list[RDC_MAX_NUM_GROUPS]; + uint32_t count = 0; + rdc_status_t result = rdc_group_get_all_ids( + rdc_handle_, group_id_list, &count); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + for (uint32_t i = 0; i < count; i++) { + reply->add_group_ids(group_id_list[i]); + } + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::GetFieldGroupAllIds( + ::grpc::ServerContext* context, + const ::rdc::Empty* request, + ::rdc::GetFieldGroupAllIdsResponse* reply) { + if (!reply || !request || !context) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_field_grp_t field_group_id_list[RDC_MAX_NUM_FIELD_GROUPS]; + uint32_t count = 0; + rdc_status_t result = rdc_group_field_get_all_ids( + rdc_handle_, field_group_id_list, &count); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + for (uint32_t i = 0; i < count; i++) { + reply->add_field_group_ids(field_group_id_list[i]); + } + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::DestroyGpuGroup( + ::grpc::ServerContext* context, + const ::rdc::DestroyGpuGroupRequest* request, + ::rdc::DestroyGpuGroupResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_status_t result = rdc_group_gpu_destroy( + rdc_handle_, request->group_id()); + reply->set_status(result); + + return ::grpc::Status::OK; +} + + +::grpc::Status RdcAPIServiceImpl::CreateFieldGroup( + ::grpc::ServerContext* context, + const ::rdc::CreateFieldGroupRequest* request, + ::rdc::CreateFieldGroupResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_field_grp_t field_group_id; + uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP]; + for (int i = 0; i < request->field_ids_size(); i++) { + field_ids[i] = request->field_ids(i); + } + rdc_status_t result = rdc_group_field_create( + rdc_handle_, request->field_ids_size() , &field_ids[0], + request->field_group_name().c_str(), &field_group_id); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + reply->set_field_group_id(field_group_id); + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::GetFieldGroupInfo( + ::grpc::ServerContext* context, + const ::rdc::GetFieldGroupInfoRequest* request, + ::rdc::GetFieldGroupInfoResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_field_group_info_t field_info; + rdc_status_t result = rdc_group_field_get_info( + rdc_handle_, request->field_group_id(), &field_info); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + reply->set_filed_group_name(field_info.group_name); + for (uint32_t i = 0; i < field_info.count; i++) { + reply->add_field_ids(field_info.field_ids[i]); + } + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::DestroyFieldGroup( + ::grpc::ServerContext* context, + const ::rdc::DestroyFieldGroupRequest* request, + ::rdc::DestroyFieldGroupResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_status_t result = rdc_group_field_destroy( + rdc_handle_, request->field_group_id()); + reply->set_status(result); + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::WatchFields( + ::grpc::ServerContext* context, + const ::rdc::WatchFieldsRequest* request, + ::rdc::WatchFieldsResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_status_t result = rdc_field_watch( + rdc_handle_, request->group_id(), request->field_group_id(), + request->update_freq(), request->max_keep_age(), + request->max_keep_samples()); + reply->set_status(result); + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::GetLatestFieldValue( + ::grpc::ServerContext* context, + const ::rdc::GetLatestFieldValueRequest* request, + ::rdc::GetLatestFieldValueResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_field_value value; + rdc_status_t result = rdc_field_get_latest_value( + rdc_handle_, request->gpu_index(), request->field_id(), &value); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + reply->set_field_id(value.field_id); + reply->set_rdc_status(value.status); + reply->set_ts(value.ts); + reply->set_type(static_cast<::rdc::GetLatestFieldValueResponse_FieldType> + (value.type)); + if (value.type == INTEGER) { + reply->set_l_int(value.value.l_int); + } else if (value.type == DOUBLE) { + reply->set_dbl(value.value.dbl); + } else if (value.type == STRING || value.type == BLOB) { + reply->set_str(value.value.str); + } + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::GetFieldSince( + ::grpc::ServerContext* context, + const ::rdc::GetFieldSinceRequest* request, + ::rdc::GetFieldSinceResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_field_value value; + uint64_t next_timestamp; + rdc_status_t result = rdc_field_get_value_since( + rdc_handle_, request->gpu_index(), request->field_id(), + request->since_time_stamp(), &next_timestamp, &value); + reply->set_status(result); + if (result != RDC_ST_OK) { + return ::grpc::Status::OK; + } + + reply->set_next_since_time_stamp(next_timestamp); + reply->set_field_id(value.field_id); + reply->set_rdc_status(value.status); + reply->set_ts(value.ts); + reply->set_type(static_cast<::rdc::GetFieldSinceResponse_FieldType> + (value.type)); + if (value.type == INTEGER) { + reply->set_l_int(value.value.l_int); + } else if (value.type == DOUBLE) { + reply->set_dbl(value.value.dbl); + } else if (value.type == STRING || value.type == BLOB) { + reply->set_str(value.value.str); + } + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::UnWatchFields( + ::grpc::ServerContext* context, + const ::rdc::UnWatchFieldsRequest* request, + ::rdc::UnWatchFieldsResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_status_t result = rdc_field_unwatch( + rdc_handle_, request->group_id(), request->field_group_id()); + reply->set_status(result); + + return ::grpc::Status::OK; +} + +::grpc::Status RdcAPIServiceImpl::UpdateAllFields( + ::grpc::ServerContext* context, + const ::rdc::UpdateAllFieldsRequest* request, + ::rdc::UpdateAllFieldsResponse* reply) { + (void)(context); + if (!reply || !request) { + return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); + } + + rdc_status_t result = rdc_field_update_all( + rdc_handle_, request->wait_for_update()); + reply->set_status(result); + + return ::grpc::Status::OK; +} + + } // namespace rdc } // namespace amd