diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index 2d33294dfe..ff5a2e5cf8 100755 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -233,6 +233,7 @@ typedef struct { uint64_t max_value; //!< Maximum value measured uint64_t min_value; //!< Minimum value measured uint64_t average; //!< Average value measured + double standard_deviation; //!< The standard deviation } rdc_stats_summary_t; /** diff --git a/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h b/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h index 2a16fa671a..6e928fb4a8 100644 --- a/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h +++ b/projects/rdc/include/rdc_lib/impl/RdcCacheManagerImpl.h @@ -46,6 +46,15 @@ struct FieldSummaryStats { int64_t max_value; int64_t min_value; int64_t total_value; + + // Use Welford algorithm to calculate the standard deviations. + // https://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods + // https://www.johndcook.com/blog/standard_deviation/ + double old_m; + double old_s; + double new_m; + double new_s; + uint64_t last_time; uint64_t count; }; @@ -100,6 +109,8 @@ class RdcCacheManagerImpl: public RdcCacheManager { void set_summary(const FieldSummaryStats & stats, rdc_stats_summary_t& gpu, rdc_stats_summary_t& summary, // NOLINT unsigned int adjuster); + void set_average_summary( + rdc_stats_summary_t& summary, uint32_t num_gpus); // NOLINT RdcCacheSamples cache_samples_; RdcJobStatsCache cache_jobs_; std::mutex cache_mutex_; diff --git a/projects/rdc/protos/rdc.proto b/projects/rdc/protos/rdc.proto index 6e9a1b9627..0736a23706 100755 --- a/projects/rdc/protos/rdc.proto +++ b/projects/rdc/protos/rdc.proto @@ -414,7 +414,7 @@ message JobStatsSummary { uint64 max_value = 1; uint64 min_value = 2; uint64 average = 3; - + double standard_deviation = 4; } message GpuUsageInfo { diff --git a/projects/rdc/rdc_libs/CMakeLists.txt b/projects/rdc/rdc_libs/CMakeLists.txt index d3995d2499..b42f4b19e2 100755 --- a/projects/rdc/rdc_libs/CMakeLists.txt +++ b/projects/rdc/rdc_libs/CMakeLists.txt @@ -160,7 +160,7 @@ message("RDC_LIB_INC_LIST=${RDC_LIB_INC_LIST}") link_directories(${RSMI_LIB_DIR} "${GRPC_ROOT}/lib" "${GRPC_ROOT}/lib64") add_library(${RDC_LIB} SHARED ${RDC_LIB_SRC_LIST} ${RDC_LIB_INC_LIST}) -target_link_libraries(${RDC_LIB} pthread rocm_smi64) +target_link_libraries(${RDC_LIB} ${BOOTSTRAP_LIB} pthread rocm_smi64) target_include_directories(${RDC_LIB} PRIVATE "${PROJECT_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}/include" @@ -191,7 +191,7 @@ set(RDCCLIENT_LIB_INC_LIST ${RDCCLIENT_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib message("RDCCLIENT_LIB_INC_LIST=${RDCCLIENT_LIB_INC_LIST}") add_library(${RDCCLIENT_LIB} SHARED ${RDCCLIENT_LIB_SRC_LIST} ${RDCCLIENT_LIB_INC_LIST}) -target_link_libraries(${RDCCLIENT_LIB} pthread rt grpc grpc++ grpc++_reflection +target_link_libraries(${RDCCLIENT_LIB} ${BOOTSTRAP_LIB} pthread rt grpc grpc++ grpc++_reflection dl protobuf) target_include_directories(${RDCCLIENT_LIB} PRIVATE "${GRPC_ROOT}/include" diff --git a/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc index 110529bef8..7ba12e4413 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcCacheManagerImpl.cc @@ -21,6 +21,7 @@ THE SOFTWARE. */ #include "rdc_lib/impl/RdcCacheManagerImpl.h" #include +#include #include #include #include "rdc_lib/RdcLogger.h" @@ -213,6 +214,10 @@ rdc_status_t RdcCacheManagerImpl::rdc_update_job_stats(uint32_t gpu_index, if (value.field_id == RDC_FI_POWER_USAGE) { gpu_iter->second.energy_last_time = value.ts; } + + // https://www.johndcook.com/blog/standard_deviation/ + fsummary->second.old_s = 0; + fsummary->second.old_m = fsummary->second.new_m = value.value.l_int; return RDC_ST_OK; } if (value.field_id == RDC_FI_POWER_USAGE) { @@ -229,6 +234,15 @@ rdc_status_t RdcCacheManagerImpl::rdc_update_job_stats(uint32_t gpu_index, fsummary->second.last_time = value.ts; fsummary->second.count++; + // https://www.johndcook.com/blog/standard_deviation/ + fsummary->second.new_m = fsummary->second.old_m + + (value.value.l_int - fsummary->second.old_m)/fsummary->second.count; + fsummary->second.new_s = fsummary->second.old_s + + (value.value.l_int - fsummary->second.old_m)* + (value.value.l_int - fsummary->second.new_m); + fsummary->second.old_m = fsummary->second.new_m; + fsummary->second.old_s = fsummary->second.new_s; + return RDC_ST_OK; } @@ -248,6 +262,11 @@ void RdcCacheManagerImpl::set_summary(const FieldSummaryStats & stats, summary.min_value = std::min(summary.min_value, gpu.min_value); //< save total for future average calculation. summary.average += gpu.average; + + //< calculate the sample variance + gpu.standard_deviation = std::sqrt((stats.count > 1) + ? stats.new_s/(stats.count - 1) : 0.0)/adjuster; + summary.standard_deviation += gpu.standard_deviation; } rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(const char jobId[64], @@ -274,15 +293,17 @@ rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(const char jobId[64], summary_info.max_gpu_memory_used = 0; summary_info.ecc_correct = 0; summary_info.ecc_uncorrect = 0; - summary_info.power_usage = {0, std::numeric_limits::max(), 0}; - summary_info.pcie_tx = {0, std::numeric_limits::max(), 0}; - summary_info.pcie_rx = {0, std::numeric_limits::max(), 0}; - summary_info.gpu_temperature = {0, std::numeric_limits::max(), 0}; - summary_info.memory_clock = {0, std::numeric_limits::max(), 0}; - summary_info.gpu_clock = {0, std::numeric_limits::max(), 0}; - summary_info.gpu_utilization = {0, std::numeric_limits::max(), 0}; + summary_info.power_usage = {0, std::numeric_limits::max(), 0, 0}; + summary_info.pcie_tx = {0, std::numeric_limits::max(), 0, 0}; + summary_info.pcie_rx = {0, std::numeric_limits::max(), 0, 0}; + summary_info.gpu_temperature = + {0, std::numeric_limits::max(), 0, 0}; + summary_info.memory_clock = {0, std::numeric_limits::max(), 0, 0}; + summary_info.gpu_clock = {0, std::numeric_limits::max(), 0, 0}; + summary_info.gpu_utilization = + {0, std::numeric_limits::max(), 0, 0}; summary_info.memory_utilization = {0, - std::numeric_limits::max(), 0}; + std::numeric_limits::max(), 0, 0}; p_job_info->num_gpus = job_stats->second.gpu_stats.size(); @@ -363,27 +384,25 @@ rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(const char jobId[64], } } } - // Get the average of the summary - summary_info.power_usage.average = summary_info.power_usage.average/ - p_job_info->num_gpus; - summary_info.gpu_clock.average = summary_info.gpu_clock.average/ - p_job_info->num_gpus; - summary_info.gpu_utilization.average = summary_info.gpu_utilization.average/ - p_job_info->num_gpus; - summary_info.memory_utilization.average = - summary_info.memory_utilization.average/p_job_info->num_gpus; - summary_info.pcie_tx.average = summary_info.pcie_tx.average/ - p_job_info->num_gpus; - summary_info.pcie_rx.average = summary_info.pcie_rx.average/ - p_job_info->num_gpus; - summary_info.gpu_temperature.average = summary_info.gpu_temperature.average/ - p_job_info->num_gpus; - summary_info.memory_clock.average = summary_info.memory_clock.average/ - p_job_info->num_gpus; + // Set the average of the summary + set_average_summary(summary_info.power_usage, p_job_info->num_gpus); + set_average_summary(summary_info.gpu_clock, p_job_info->num_gpus); + set_average_summary(summary_info.gpu_utilization, p_job_info->num_gpus); + set_average_summary(summary_info.memory_utilization, p_job_info->num_gpus); + set_average_summary(summary_info.pcie_tx, p_job_info->num_gpus); + set_average_summary(summary_info.pcie_rx, p_job_info->num_gpus); + set_average_summary(summary_info.gpu_temperature, p_job_info->num_gpus); + set_average_summary(summary_info.memory_clock, p_job_info->num_gpus); return RDC_ST_OK; } +void RdcCacheManagerImpl::set_average_summary( + rdc_stats_summary_t& summary, uint32_t num_gpus) { + summary.average = summary.average/num_gpus; + summary.standard_deviation = summary.standard_deviation/num_gpus; +} + rdc_status_t RdcCacheManagerImpl::rdc_job_start_stats(const char job_id[64], const rdc_group_info_t& ginfo, const rdc_field_group_info_t& finfo, const rdc_gpu_gauges_t& gpu_gauges) { diff --git a/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc b/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc index 73ea341ba1..ff7a7f7127 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcGroupSettingsImpl.cc @@ -42,6 +42,7 @@ RdcGroupSettingsImpl::RdcGroupSettingsImpl() { rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create( const char* group_name, rdc_gpu_group_t* p_rdc_group_id) { + RDC_LOG(RDC_DEBUG, "Create group " << group_name); rdc_group_info_t ginfo; strncpy_with_null(ginfo.group_name, group_name, RDC_MAX_STR_LENGTH); ginfo.count = 0; @@ -135,6 +136,7 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create( uint32_t num_field_ids, uint32_t* field_ids, const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) { + RDC_LOG(RDC_DEBUG, "Create field group " << field_group_name); rdc_field_group_info_t finfo; finfo.count = num_field_ids; strncpy_with_null(finfo.group_name, field_group_name, RDC_MAX_STR_LENGTH); diff --git a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc index b810f66d54..4abaaa4318 100644 --- a/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc +++ b/projects/rdc/rdc_libs/rdc_client/src/RdcStandaloneHandler.cc @@ -95,41 +95,49 @@ bool RdcStandaloneHandler::copy_gpu_usage_info( target->power_usage.max_value = pstats.max_value(); target->power_usage.min_value = pstats.min_value(); target->power_usage.average = pstats.average(); + target->power_usage.standard_deviation = pstats.standard_deviation(); const ::rdc::JobStatsSummary& cstats = src.gpu_clock(); target->gpu_clock.max_value = cstats.max_value(); target->gpu_clock.min_value = cstats.min_value(); target->gpu_clock.average = cstats.average(); + target->gpu_clock.standard_deviation = cstats.standard_deviation(); const ::rdc::JobStatsSummary& ustats = src.gpu_utilization(); target->gpu_utilization.max_value = ustats.max_value(); target->gpu_utilization.min_value = ustats.min_value(); target->gpu_utilization.average = ustats.average(); + target->gpu_utilization.standard_deviation = ustats.standard_deviation(); const ::rdc::JobStatsSummary& mstats = src.memory_utilization(); target->memory_utilization.max_value = mstats.max_value(); target->memory_utilization.min_value = mstats.min_value(); target->memory_utilization.average = mstats.average(); + target->memory_utilization.standard_deviation = mstats.standard_deviation(); const ::rdc::JobStatsSummary& txstats = src.pcie_tx(); target->pcie_tx.max_value = txstats.max_value(); target->pcie_tx.min_value = txstats.min_value(); target->pcie_tx.average = txstats.average(); + target->pcie_tx.standard_deviation = txstats.standard_deviation(); const ::rdc::JobStatsSummary& rxstats = src.pcie_rx(); target->pcie_rx.max_value = rxstats.max_value(); target->pcie_rx.min_value = rxstats.min_value(); target->pcie_rx.average = rxstats.average(); + target->pcie_rx.standard_deviation = rxstats.standard_deviation(); const ::rdc::JobStatsSummary& mcstats = src.memory_clock(); target->memory_clock.max_value = mcstats.max_value(); target->memory_clock.min_value = mcstats.min_value(); target->memory_clock.average = mcstats.average(); + target->memory_clock.standard_deviation = mcstats.standard_deviation(); const ::rdc::JobStatsSummary& gtstats = src.gpu_temperature(); target->gpu_temperature.max_value = gtstats.max_value(); target->gpu_temperature.min_value = gtstats.min_value(); target->gpu_temperature.average = gtstats.average(); + target->gpu_temperature.standard_deviation = gtstats.standard_deviation(); return true; } diff --git a/projects/rdc/rdci/include/RdciStatsSubSystem.h b/projects/rdc/rdci/include/RdciStatsSubSystem.h index 69e9bdf40a..116f3211ef 100644 --- a/projects/rdc/rdci/include/RdciStatsSubSystem.h +++ b/projects/rdc/rdci/include/RdciStatsSubSystem.h @@ -39,6 +39,7 @@ class RdciStatsSubSystem: public RdciSubSystem { private: void show_help() const; void show_job_stats(const rdc_gpu_usage_info_t& gpu_info) const; + void show_job_stats_json(const rdc_gpu_usage_info_t& gpu_info) const; enum OPERATIONS { STATS_UNKNOWN = 0, diff --git a/projects/rdc/rdci/include/RdciSubSystem.h b/projects/rdc/rdci/include/RdciSubSystem.h index 6eb67f838e..86a0b793dd 100644 --- a/projects/rdc/rdci/include/RdciSubSystem.h +++ b/projects/rdc/rdci/include/RdciSubSystem.h @@ -39,7 +39,11 @@ class RdciSubSystem { virtual void process() = 0; virtual ~RdciSubSystem(); + + bool is_json_output() const; + protected: + void set_json_output(bool is_json); std::vector split_string(const std::string& s, char delimiter) const; void show_common_usage() const; @@ -52,6 +56,9 @@ class RdciSubSystem { std::string root_ca_; std::string client_cert_; std::string client_key_; + + private: + bool is_json_output_; }; typedef std::shared_ptr RdciSubSystemPtr; diff --git a/projects/rdc/rdci/src/RdciDiscoverySubSystem.cc b/projects/rdc/rdci/src/RdciDiscoverySubSystem.cc index 0ae9632bb2..02a411e8d1 100644 --- a/projects/rdc/rdci/src/RdciDiscoverySubSystem.cc +++ b/projects/rdc/rdci/src/RdciDiscoverySubSystem.cc @@ -35,11 +35,13 @@ RdciDiscoverySubSystem::RdciDiscoverySubSystem() : show_help_(false) { void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) { const int HOST_OPTIONS = 1000; + const int JSON_OPTIONS = 1001; const struct option long_options[] = { {"host", required_argument, nullptr, HOST_OPTIONS }, {"help", optional_argument, nullptr, 'h' }, {"unauth", optional_argument, nullptr, 'u' }, {"list", optional_argument, nullptr, 'l' }, + {"json", optional_argument, nullptr, JSON_OPTIONS }, { nullptr, 0 , nullptr, 0 } }; @@ -53,6 +55,9 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) { case HOST_OPTIONS: ip_port_ = optarg; break; + case JSON_OPTIONS: + set_json_output(true); + break; case 'h': show_help_ = true; return; @@ -77,12 +82,16 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) { } void RdciDiscoverySubSystem::show_help() const { + if (is_json_output()) return; std::cout << " discovery -- Used to discover and identify GPUs " << "and their attributes.\n\n"; std::cout << "Usage\n"; - std::cout << " rdci discovery [--host :port] [-u] -l\n"; + std::cout << " rdci discovery [--host :port] [--json]" + << " [-u] -l\n"; std::cout << "\nFlags:\n"; show_common_usage(); + std::cout << " --json " + << "Output using json.\n"; std::cout << " -l --list list GPU discovered" <<" on the system\n"; } @@ -101,14 +110,22 @@ void RdciDiscoverySubSystem::process() { throw RdcException(result, "Fail to get device information"); } if (count == 0) { - std::cout << "No GPUs find on the sytem\n"; + if (is_json_output()) { + std::cout << "\"gpus\" : [], \"status\": \"ok\""; + } else { + std::cout << "No GPUs find on the system\n"; + } return; } - std::cout << count << " GPUs found.\n"; - std::cout << "------------------------------------------------" - << "-----------------\n"; - std::cout << "GPU Index\t Device Information\n"; + if (is_json_output()) { + std::cout << "\"gpus\" : ["; + } else { + std::cout << count << " GPUs found.\n"; + std::cout << "------------------------------------------------" + << "-----------------\n"; + std::cout << "GPU Index\t Device Information\n"; + } for (uint32_t i = 0; i < count; i++) { rdc_device_attributes_t attribute; result = rdc_device_get_attributes(rdc_handle_, @@ -116,10 +133,22 @@ void RdciDiscoverySubSystem::process() { if (result != RDC_ST_OK) { return; } - std::cout << i << "\t\t" << attribute.device_name <:port] [-u] -l\n"; - std::cout << " rdci fieldgroup [--host :port] [-u] " - << "-c -f \n"; - std::cout << " rdci fieldgroup [--host :port] [-u] " + std::cout << " rdci fieldgroup [--host :port]" + << " [--json] [-u] -l\n"; + std::cout << " rdci fieldgroup [--host :port] [--json]" + << " [-u] -c -f \n"; + std::cout << " rdci fieldgroup [--host :port] [--json] [-u] " << "-g -i\n"; - std::cout << " rdci fieldgroup [--host :port] [-u] " + std::cout << " rdci fieldgroup [--host :port] [--json] [-u] " << "-d \n"; std::cout << "\nFlags:\n"; show_common_usage(); + std::cout << " --json " + << "Output using json.\n"; std::cout << " -l --list " << "List the field groups that currently exist for a host.\n"; std::cout << " -g --group groupId " @@ -143,6 +152,7 @@ void RdciFieldGroupSubSystem::process() { rdc_status_t result = RDC_ST_OK; rdc_field_group_info_t group_info; uint32_t count = 0; + std::string json_group_ids = "\"field_groups\": ["; switch (field_group_ops_) { case FIELD_GROUP_HELP: show_help(); @@ -170,9 +180,14 @@ void RdciFieldGroupSubSystem::process() { result = rdc_group_field_create(rdc_handle_, fields.size(), &field_ids[0], group_name_.c_str(), &group_id); if (result == RDC_ST_OK) { - std::cout << "Successfully created a field group" - << " with a group ID " << group_id << std::endl; - return; + if (is_json_output()) { + std::cout << "\"field_group_id\": \"" << group_id + <<"\", \"status\": \"ok\""; + } else { + std::cout << "Successfully created a field group" + << " with a group ID " << group_id << std::endl; + return; + } } break; } @@ -184,8 +199,13 @@ void RdciFieldGroupSubSystem::process() { } result = rdc_group_field_destroy(rdc_handle_, group_id_); if (result == RDC_ST_OK) { - std::cout << "Successfully deleted the field group " + if (is_json_output()) { + std::cout << "\"field_group_id\": \"" << group_id_ + <<"\", \"status\": \"ok\""; + } else { + std::cout << "Successfully deleted the field group " << group_id_ << std::endl; + } return; } break; @@ -195,8 +215,11 @@ void RdciFieldGroupSubSystem::process() { rdc_handle_, group_id_list, &count); if ( result != RDC_ST_OK) break; - std::cout << count << " field group found.\n"; - std::cout << "GroupID\t" << "GroupName\t" << "FieldIds\n"; + if (!is_json_output()) { + std::cout << count << " field group found.\n"; + std::cout << "GroupID\t" << "GroupName\t" << "FieldIds\n"; + } + for (uint32_t i = 0; i < count; i++) { result = rdc_group_field_get_info( rdc_handle_, group_id_list[i], &group_info); @@ -206,15 +229,44 @@ void RdciFieldGroupSubSystem::process() { std::to_string(group_id_list[i])); } - std::cout << group_id_list[i] << "\t" + if (!is_json_output()) { + std::cout << group_id_list[i] << "\t" << group_info.group_name << "\t\t"; + } else { + json_group_ids += "{\"group_id\": \""; + json_group_ids += std::to_string(group_id_list[i]); + json_group_ids += "\", \"group_name\": \""; + json_group_ids += group_info.group_name; + json_group_ids += "\", \"field_ids\": ["; + } + for (uint32_t j = 0; j < group_info.count; j++) { - std::cout << group_info.field_ids[j]; + if (!is_json_output()) { + std::cout << group_info.field_ids[j]; + } else { + json_group_ids += + std::to_string(group_info.field_ids[j]); + } if ( j < group_info.count -1 ) { - std::cout << ","; + if (!is_json_output()) { + std::cout << ","; + } else { + json_group_ids += ","; + } } } - std::cout << std::endl; + if (!is_json_output()) { + std::cout << std::endl; + } else { + json_group_ids += "]}"; + if (i != count -1) { + json_group_ids += ","; + } + } + } + if (is_json_output()) { + json_group_ids += "], \"status\": \"ok\""; + std::cout << json_group_ids; } break; case FIELD_GROUP_INFO: @@ -226,13 +278,29 @@ void RdciFieldGroupSubSystem::process() { result = rdc_group_field_get_info( rdc_handle_, group_id_, &group_info); if (result == RDC_ST_OK) { - std::cout << "Group name: " << group_info.group_name - << std::endl; - std::cout << "Field Ids: "; - for (uint32_t i = 0; i < group_info.count; i++) { - std::cout << group_info.field_ids[i] << " "; + if (is_json_output()) { + std::cout << "\"group_name\": \"" << group_info.group_name + << "\", \"field_ids\": ["; + } else { + std::cout << "Group name: " << group_info.group_name + << std::endl; + std::cout << "Field Ids: "; + } + for (uint32_t i = 0; i < group_info.count; i++) { + if (is_json_output()) { + std::cout << group_info.field_ids[i]; + if ( i != group_info.count-1 ) { + std::cout << ","; + } + } else { + std::cout << group_info.field_ids[i] << " "; + } + } + if (is_json_output()) { + std::cout << "], \"status\": \"ok\""; + } else { + std::cout << std::endl; } - std::cout << std::endl; return; } break; diff --git a/projects/rdc/rdci/src/RdciGroupSubSystem.cc b/projects/rdc/rdci/src/RdciGroupSubSystem.cc index 2c7a159f6f..301bbed067 100644 --- a/projects/rdc/rdci/src/RdciGroupSubSystem.cc +++ b/projects/rdc/rdci/src/RdciGroupSubSystem.cc @@ -37,6 +37,7 @@ RdciGroupSubSystem::RdciGroupSubSystem(): void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) { const int HOST_OPTIONS = 1000; + const int JSON_OPTIONS = 1001; const struct option long_options[] = { {"host", required_argument, nullptr, HOST_OPTIONS }, {"help", optional_argument, nullptr, 'h' }, @@ -47,6 +48,7 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) { {"add", required_argument, nullptr, 'a' }, {"info", optional_argument, nullptr, 'i' }, {"delete", required_argument, nullptr, 'd' }, + {"json", optional_argument, nullptr, JSON_OPTIONS }, { nullptr, 0 , nullptr, 0 } }; @@ -59,6 +61,9 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) { case HOST_OPTIONS: ip_port_ = optarg; break; + case JSON_OPTIONS: + set_json_output(true); + break; case 'h': group_ops_ = GROUP_HELP; return; @@ -116,18 +121,22 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) { } void RdciGroupSubSystem::show_help() const { + if (is_json_output()) return; std::cout << " group -- Used to create and maintain groups of GPUs.\n\n"; std::cout << "Usage\n"; - std::cout << " rdci group [--host :port] [-u] -l\n"; - std::cout << " rdci group [--host :port] [-u] -c " - << "[-a ]\n"; - std::cout << " rdci group [--host :port] [-u] -g " - << "[-a ]\n"; - std::cout << " rdci group [--host :port] [-u] " + std::cout << " rdci group [--host :port] [--json] [-u] -l\n"; + std::cout << " rdci group [--host :port] [--json] [-u]" + << " -c [-a ]\n"; + std::cout << " rdci group [--host :port] [--json] [-u]" + << " -g [-a ]\n"; + std::cout << " rdci group [--host :port] [--json] [-u] " << "-g [-i]\n"; - std::cout << " rdci group [--host :port] [-u] -d \n"; + std::cout << " rdci group [--host :port] [--json] [-u] " + << "-d \n"; std::cout << "\nFlags:\n"; show_common_usage(); + std::cout << " --json " + << "Output using json.\n"; std::cout << " -l --list " << "List the groups that currently exist for a host.\n"; std::cout << " -g --group groupId " @@ -148,6 +157,7 @@ void RdciGroupSubSystem::process() { std::vector gpu_ids; rdc_group_info_t group_info; uint32_t count = 0; + std::string json_group_ids = "\"gpu_groups\": ["; switch (group_ops_) { case GROUP_HELP: show_help(); @@ -181,8 +191,13 @@ void RdciGroupSubSystem::process() { } if (result == RDC_ST_OK) { - std::cout << "Successfully created group with a group ID " - << group_id << std::endl; + if (is_json_output()) { + std::cout << "\"group_id\": \"" << group_id + <<"\", \"status\": \"ok\""; + } else { + std::cout << "Successfully created group with a group ID " + << group_id << std::endl; + } return; } break; @@ -194,8 +209,13 @@ void RdciGroupSubSystem::process() { } result = rdc_group_gpu_destroy(rdc_handle_, group_id_); if (result == RDC_ST_OK) { - std::cout << "Successfully deleted the group " - << group_id_ << std::endl; + if (is_json_output()) { + std::cout << "\"group_id\": \"" << group_id_ + <<"\", \"status\": \"ok\""; + } else { + std::cout << "Successfully deleted the group " + << group_id_ << std::endl; + } return; } break; @@ -204,8 +224,10 @@ void RdciGroupSubSystem::process() { result = rdc_group_get_all_ids(rdc_handle_, group_id_list, &count); if ( result != RDC_ST_OK) break; - std::cout << count << " group found.\n"; - std::cout << "GroupID\t" << "GroupName\t" << "GPUIndex\n"; + if (!is_json_output()) { + std::cout << count << " group found.\n"; + std::cout << "GroupID\t" << "GroupName\t" << "GPUIndex\n"; + } for (uint32_t i = 0; i < count; i++) { result = rdc_group_gpu_get_info(rdc_handle_, group_id_list[i], &group_info); @@ -215,15 +237,43 @@ void RdciGroupSubSystem::process() { + std::to_string(group_id_list[i])); } - std::cout << group_id_list[i] << "\t" - << group_info.group_name << "\t\t"; + if (!is_json_output()) { + std::cout << group_id_list[i] << "\t" + << group_info.group_name << "\t\t"; + } else { + json_group_ids += "{\"group_id\": \""; + json_group_ids += std::to_string(group_id_list[i]); + json_group_ids += "\", \"group_name\": \""; + json_group_ids += group_info.group_name; + json_group_ids += "\", \"gpu_indexes\": ["; + } for (uint32_t j = 0; j < group_info.count; j++) { - std::cout << group_info.entity_ids[j]; + if (!is_json_output()) { + std::cout << group_info.entity_ids[j]; + } else { + json_group_ids += + std::to_string(group_info.entity_ids[j]); + } if (j < group_info.count -1) { - std::cout << ","; + if (!is_json_output()) { + std::cout << ","; + } else { + json_group_ids += ","; + } } } - std::cout << std::endl; + if (!is_json_output()) { + std::cout << std::endl; + } else { + json_group_ids += "]}"; + if (i != count -1) { + json_group_ids += ","; + } + } + } + if (is_json_output()) { + json_group_ids += "], \"status\": \"ok\""; + std::cout << json_group_ids; } break; case GROUP_ADD_GPUS: @@ -247,8 +297,13 @@ void RdciGroupSubSystem::process() { } } if (result == RDC_ST_OK) { - std::cout << "Successfully added the GPU " << gpu_ids_ - << " to group "<< group_id_ << std::endl; + if (is_json_output()) { + std::cout << "\"group_id\": \"" << group_id_ + <<"\", \"status\": \"ok\""; + } else { + std::cout << "Successfully added the GPU " << gpu_ids_ + << " to group "<< group_id_ << std::endl; + } return; } break; @@ -261,13 +316,29 @@ void RdciGroupSubSystem::process() { result = rdc_group_gpu_get_info(rdc_handle_, group_id_, &group_info); if (result == RDC_ST_OK) { - std::cout << "Group name: " - << group_info.group_name << std::endl; - std::cout << "Gpu indexes: "; - for (uint32_t i = 0; i < group_info.count; i++) { - std::cout << group_info.entity_ids[i] << " "; + if (is_json_output()) { + std::cout << "\"group_name\": \"" << group_info.group_name + << "\", \"gpu_indexes\": ["; + } else { + std::cout << "Group name: " + << group_info.group_name << std::endl; + std::cout << "Gpu indexes: "; + } + for (uint32_t i = 0; i < group_info.count; i++) { + if (is_json_output()) { + std::cout << group_info.entity_ids[i]; + if ( i != group_info.count-1 ) { + std::cout << ","; + } + } else { + std::cout << group_info.entity_ids[i] << " "; + } + } + if (is_json_output()) { + std::cout << "], \"status\": \"ok\""; + } else { + std::cout << std::endl; } - std::cout << std::endl; return; } break; diff --git a/projects/rdc/rdci/src/RdciStatsSubSystem.cc b/projects/rdc/rdci/src/RdciStatsSubSystem.cc index 5410160ad8..e47904abe8 100644 --- a/projects/rdc/rdci/src/RdciStatsSubSystem.cc +++ b/projects/rdc/rdci/src/RdciStatsSubSystem.cc @@ -23,6 +23,7 @@ THE SOFTWARE. #include #include #include +#include #include #include #include "rdc_lib/rdc_common.h" @@ -42,6 +43,7 @@ RdciStatsSubSystem::~RdciStatsSubSystem() { void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) { const int HOST_OPTIONS = 1000; + const int JSON_OPTIONS = 1001; const struct option long_options[] = { {"host", required_argument, nullptr, HOST_OPTIONS }, {"help", optional_argument, nullptr, 'h' }, @@ -53,6 +55,7 @@ void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) { {"jremoveall", optional_argument, nullptr, 'a' }, {"verbose", optional_argument, nullptr, 'v'}, {"group", required_argument, nullptr, 'g'}, + {"json", optional_argument, nullptr, JSON_OPTIONS}, { nullptr, 0 , nullptr, 0 } }; @@ -66,6 +69,9 @@ void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) { case HOST_OPTIONS: ip_port_ = optarg; break; + case JSON_OPTIONS: + set_json_output(true); + break; case 'h': stats_ops_ = STATS_HELP; return; @@ -119,17 +125,22 @@ void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) { } void RdciStatsSubSystem::show_help() const { + if (is_json_output()) return; std::cout << " stats -- Used to view job statistics.\n\n"; std::cout << "Usage\n"; - std::cout << " rdci stats [--host :port] [-u] -s " - << " -g \n"; - std::cout << " rdci stats [--host :port] [-u] -x \n"; - std::cout << " rdci stats [--host :port] [-u] [-v] " + std::cout << " rdci stats [--host :port] [-u] [--json] " + << "-s -g \n"; + std::cout << " rdci stats [--host :port] [-u] [--json] " + << "-x \n"; + std::cout << " rdci stats [--host :port] [-u] [--json] [-v] " << "-j \n"; - std::cout << " rdci stats [--host :port] [-u] -r \n"; - std::cout << " rdci stats [--host :port] [-u] -a\n"; + std::cout << " rdci stats [--host :port] [-u] [--json] " + << "-r \n"; + std::cout << " rdci stats [--host :port] [-u] [--json] -a\n"; std::cout << "\nFlags:\n"; show_common_usage(); + std::cout << " --json " + << "Output using json.\n"; std::cout << " -s --jstart Start recording " << "job statistics.\n"; std::cout << " -g --group-id The GPU group to query " @@ -146,14 +157,103 @@ void RdciStatsSubSystem::show_help() const { << "all job statistics.\n"; } +void RdciStatsSubSystem::show_job_stats_json( + const rdc_gpu_usage_info_t& gpu_info) const { + std::cout << "\"start_time\": " << gpu_info.start_time << ","; + std::cout << "\"end_time\": " << gpu_info.end_time << ","; + std::cout << "\"execution_time\": " << + (gpu_info.end_time-gpu_info.start_time) << ","; + std::cout << "\"energy_consumed\": " << gpu_info.energy_consumed << ","; + + std::cout << "\"power_usage_max\": " + << gpu_info.power_usage.max_value << ","; + std::cout << "\"power_usage_min\": " + << gpu_info.power_usage.min_value << ","; + std::cout << "\"power_usage_avg\": " + << gpu_info.power_usage.average << ","; + std::cout << "\"power_usage_stanard_deviation\": " + << gpu_info.power_usage.standard_deviation << ","; + + std::cout << "\"gpu_clock_max\": " + << gpu_info.gpu_clock.max_value << ","; + std::cout << "\"gpu_clock_min\": " + << gpu_info.gpu_clock.min_value << ","; + std::cout << "\"gpu_clock_avg\": " + << gpu_info.gpu_clock.average << ","; + std::cout << "\"gpu_clock_stanard_deviation\": " + << gpu_info.gpu_clock.standard_deviation << ","; + + std::cout << "\"memory_clock_max\": " + << gpu_info.memory_clock.max_value << ","; + std::cout << "\"memory_clock_min\": " + << gpu_info.memory_clock.min_value << ","; + std::cout << "\"memory_clock_avg\": " + << gpu_info.memory_clock.average << ","; + std::cout << "\"memory_clock_stanard_deviation\": " + << gpu_info.memory_clock.standard_deviation << ","; + + std::cout << "\"gpu_utilization_max\": " + << gpu_info.gpu_utilization.max_value << ","; + std::cout << "\"gpu_utilization_min\": " + << gpu_info.gpu_utilization.min_value << ","; + std::cout << "\"gpu_utilization_avg\": " + << gpu_info.gpu_utilization.average << ","; + std::cout << "\"gpu_utilization_deviation\": " + << gpu_info.gpu_utilization.standard_deviation << ","; + + std::cout << "\"max_gpu_memory_used\": " + << gpu_info.max_gpu_memory_used << ","; + + std::cout << "\"memory_utilization_max\": " + << gpu_info.memory_utilization.max_value << ","; + std::cout << "\"memory_utilization_min\": " + << gpu_info.memory_utilization.min_value << ","; + std::cout << "\"memory_utilization_avg\": " + << gpu_info.memory_utilization.average << ","; + std::cout << "\"memory_utilization_stanard_deviation\": " + << gpu_info.memory_utilization.standard_deviation << ","; + + std::cout << "\"gpu_temperature_max\": " + << gpu_info.gpu_temperature.max_value << ","; + std::cout << "\"gpu_temperature_min\": " + << gpu_info.gpu_temperature.min_value << ","; + std::cout << "\"gpu_temperature_avg\": " + << gpu_info.gpu_temperature.average << ","; + std::cout << "\"gpu_temperature_stanard_deviation\": " + << gpu_info.gpu_temperature.standard_deviation << ","; + + std::cout << "\"pcie_rx_max\": " + << gpu_info.pcie_rx.max_value << ","; + std::cout << "\"pcie_rx_min\": " + << gpu_info.pcie_rx.min_value << ","; + std::cout << "\"pcie_rx_avg\": " + << gpu_info.pcie_rx.average << ","; + std::cout << "\"pcie_rx_stanard_deviation\": " + << gpu_info.pcie_rx.standard_deviation << ","; + + std::cout << "\"pcie_tx_max\": " + << gpu_info.pcie_tx.max_value << ","; + std::cout << "\"pcie_tx_min\": " + << gpu_info.pcie_tx.min_value << ","; + std::cout << "\"pcie_tx_avg\": " + << gpu_info.pcie_tx.average << ","; + std::cout << "\"pcie_tx_stanard_deviation\": " + << gpu_info.pcie_tx.standard_deviation << ","; + + std::cout << "\"ecc_correct\": " << gpu_info.ecc_correct << ","; + std::cout << "\"ecc_uncorrect\": " << gpu_info.ecc_uncorrect; +} + void RdciStatsSubSystem::show_job_stats( const rdc_gpu_usage_info_t& gpu_info) const { std::cout << "|------- Execution Stats ----------" << "+------------------------------------\n"; std::cout << "| Start Time | " - << gpu_info.start_time << "\n"; + << std::put_time(std::gmtime(reinterpret_cast + (&gpu_info.start_time)), "%c %Z") << "\n"; std::cout << "| End Time | " - << gpu_info.end_time << "\n"; + << std::put_time(std::gmtime(reinterpret_cast + (&gpu_info.end_time)), "%c %Z") << "\n"; std::cout << "| Total Execution Time (sec) | " << (gpu_info.end_time-gpu_info.start_time) << "\n"; std::cout << "+------- Performance Stats --------" @@ -163,37 +263,53 @@ void RdciStatsSubSystem::show_job_stats( std::cout << "| Power Usage (Watts) | " << "Max: " << gpu_info.power_usage.max_value<< " Min: "<< gpu_info.power_usage.min_value << " Avg: " - << gpu_info.power_usage.average << "\n"; + << gpu_info.power_usage.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.power_usage.standard_deviation << "\n"; std::cout << "| GPU Clock (MHz) | " << "Max: " << gpu_info.gpu_clock.max_value << " Min: " << gpu_info.gpu_clock.min_value << " Avg: " - << gpu_info.gpu_clock.average << "\n"; + << gpu_info.gpu_clock.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.gpu_clock.standard_deviation << "\n"; std::cout << "| Memory Clock (MHz) | " << "Max: " << gpu_info.memory_clock.max_value << " Min: " << gpu_info.memory_clock.min_value << " Avg: " - << gpu_info.memory_clock.average << "\n"; + << gpu_info.memory_clock.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.memory_clock.standard_deviation << "\n"; std::cout << "| GPU Utilization (%) | " << "Max: " << gpu_info.gpu_utilization.max_value <<" Min: " << gpu_info.gpu_utilization.min_value << " Avg: " << - gpu_info.gpu_utilization.average << "\n"; + gpu_info.gpu_utilization.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.gpu_utilization.standard_deviation << "\n"; std::cout << "| Max GPU Memory Used (bytes) | " << gpu_info.max_gpu_memory_used << "\n"; std::cout << "| Memory Utilization (%) | " << "Max: " << gpu_info.memory_utilization.max_value <<" Min: "<< gpu_info.memory_utilization.min_value - << " Avg: " << gpu_info.memory_utilization.average << "\n"; + << " Avg: " << gpu_info.memory_utilization.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.memory_utilization.standard_deviation << "\n"; std::cout << "| GPU Temperature (Celsius) | " << "Max: " << gpu_info.gpu_temperature.max_value <<" Min: "<< gpu_info.gpu_temperature.min_value - << " Avg: " << gpu_info.gpu_temperature.average << "\n"; + << " Avg: " << gpu_info.gpu_temperature.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.gpu_temperature.standard_deviation << "\n"; std::cout << "| PCIe Rx Bandwidth (megabytes) | " << "Max: " << gpu_info.pcie_rx.max_value <<" Min: "<< gpu_info.pcie_rx.min_value - << " Avg: " << gpu_info.pcie_rx.average << "\n"; + << " Avg: " << gpu_info.pcie_rx.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.pcie_rx.standard_deviation << "\n"; std::cout << "| PCIe Tx Bandwidth (megabytes) | " << "Max: " << gpu_info.pcie_tx.max_value <<" Min: "<< gpu_info.pcie_tx.min_value - << " Avg: " << gpu_info.pcie_tx.average << "\n"; + << " Avg: " << gpu_info.pcie_tx.average << " SD: " + << std::fixed << std::setprecision(2) + << gpu_info.pcie_tx.standard_deviation << "\n"; std::cout << "| Correctable ECC Errors | " << gpu_info.ecc_correct << "\n"; std::cout << "| Uncorrectable ECC Errors | " @@ -217,8 +333,13 @@ void RdciStatsSubSystem::process() { if (result != RDC_ST_OK) { throw RdcException(result, rdc_status_string(result)); } - std::cout << "Successfully started recording job " + if (is_json_output()) { + std::cout << "\"job_id\": \"" << job_id_ << "\", \"group_id\": \"" + << group_id_ <<"\", \"status\": \"ok\""; + } else { + std::cout << "Successfully started recording job " << job_id_ << " with a group ID " << group_id_ << std::endl; + } return; } @@ -228,8 +349,13 @@ void RdciStatsSubSystem::process() { if (result != RDC_ST_OK) { throw RdcException(result, rdc_status_string(result)); } - std::cout << "Successfully stopped recording job " + if (is_json_output()) { + std::cout << "\"job_id\": \"" << job_id_ + << "\", \"status\": \"ok\""; + } else { + std::cout << "Successfully stopped recording job " << job_id_ << std::endl; + } return; } @@ -241,14 +367,26 @@ void RdciStatsSubSystem::process() { throw RdcException(result, rdc_status_string(result)); } - std::cout << "| Summary \n"; - show_job_stats(job_info.summary); + if (!is_json_output()) { + std::cout << "| Summary \n"; + show_job_stats(job_info.summary); + } else { + std::cout << "\"job_summary\" : {"; + show_job_stats_json(job_info.summary); + std::cout << "}"; + } if (is_verbose_ == false) { return; } for (uint32_t i = 0; i < job_info.num_gpus; i++) { - std::cout << "| GPU " << i << "\n"; - show_job_stats(job_info.gpus[i]); + if (!is_json_output()) { + std::cout << "| GPU " << i << "\n"; + show_job_stats(job_info.gpus[i]); + } else { + std:: cout << ", \"gpu_" << i << "\": {"; + show_job_stats_json(job_info.gpus[i]); + std::cout << "}"; + } } return; } @@ -259,8 +397,13 @@ void RdciStatsSubSystem::process() { if (result != RDC_ST_OK) { throw RdcException(result, rdc_status_string(result)); } - std::cout << "Successfully removed job " + if (is_json_output()) { + std::cout << "\"job_id\": \"" << job_id_ + << "\", \"status\": \"ok\""; + } else { + std::cout << "Successfully removed job " << job_id_ << std::endl; + } return; } @@ -269,7 +412,11 @@ void RdciStatsSubSystem::process() { if (result != RDC_ST_OK) { throw RdcException(result, rdc_status_string(result)); } - std::cout << "Successfully removed all jobs\n"; + if (is_json_output()) { + std::cout << "\"status\": \"ok\""; + } else { + std::cout << "Successfully removed all jobs\n"; + } return; } } diff --git a/projects/rdc/rdci/src/RdciSubSystem.cc b/projects/rdc/rdci/src/RdciSubSystem.cc index 8d324d4fab..7d98525865 100644 --- a/projects/rdc/rdci/src/RdciSubSystem.cc +++ b/projects/rdc/rdci/src/RdciSubSystem.cc @@ -33,13 +33,18 @@ RdciSubSystem::RdciSubSystem(): , use_auth_(true) , root_ca_("/etc/rdc/client/certs/rdc_cacert.pem") , client_cert_("/etc/rdc/client/certs/rdc_client_cert.pem") - , client_key_("/etc/rdc/client/private/rdc_client_cert.key") { + , client_key_("/etc/rdc/client/private/rdc_client_cert.key") + , is_json_output_(false) { rdc_status_t status = rdc_init(0); if (status != RDC_ST_OK) { throw RdcException(status, "RDC initialize fail"); } } +bool RdciSubSystem::is_json_output() const { + return is_json_output_; +} + bool RdciSubSystem::get_field_id_from_name( const std::string& name, uint32_t& value) const { const std::map field_name_to_id = { @@ -154,6 +159,11 @@ void RdciSubSystem::show_common_usage() const { << "information and exits.\n"; } +void RdciSubSystem::set_json_output(bool is_json) { + is_json_output_ = is_json; + std::cout << "{"; +} + RdciSubSystem::~RdciSubSystem() { if (rdc_handle_) { rdc_disconnect(rdc_handle_); @@ -161,6 +171,10 @@ RdciSubSystem::~RdciSubSystem() { } rdc_shutdown(); + + if (is_json_output_) { + std::cout << "}" << std::endl; + } } } // namespace rdc diff --git a/projects/rdc/rdci/src/rdci.cc b/projects/rdc/rdci/src/rdci.cc index 63a9a1c457..cbee32dae1 100644 --- a/projects/rdc/rdci/src/rdci.cc +++ b/projects/rdc/rdci/src/rdci.cc @@ -42,9 +42,9 @@ int main(int argc, char ** argv) { exit(0); } + amd::rdc::RdciSubSystemPtr subsystem; try { std::string subsystem_name = argv[1]; - amd::rdc::RdciSubSystemPtr subsystem; if (subsystem_name == "discovery") { subsystem.reset(new amd::rdc::RdciDiscoverySubSystem()); } else if (subsystem_name == "dmon") { @@ -66,11 +66,20 @@ int main(int argc, char ** argv) { subsystem->process(); } catch (const amd::rdc::RdcException& e) { - std::cout << "rdci Error: " << e.what() << std::endl; + if (subsystem && subsystem->is_json_output()) { + std::cout << "\"status\": \"error\", \"description\": \"" + << e.what() << '"'; + } else { + std::cout << "rdci Error: " << e.what() << std::endl; + } return e.error_code(); } catch (...) { - std::cout << "Unhandled exception." << std::endl; - return 1; + if (subsystem && subsystem->is_json_output()) { + std::cout << "\"status\": \"error\", \"description\": " + << "\"Unhandled exception.\""; + } else { + std::cout << "Unhandled exception." << std::endl; + } return 1; } return 0; diff --git a/projects/rdc/server/src/rdc_api_service.cc b/projects/rdc/server/src/rdc_api_service.cc index e7e3119181..dd7e99bdcc 100755 --- a/projects/rdc/server/src/rdc_api_service.cc +++ b/projects/rdc/server/src/rdc_api_service.cc @@ -30,6 +30,8 @@ THE SOFTWARE. #include "rdc.grpc.pb.h" // NOLINT #include "rdc/rdc_api_service.h" #include "rdc/rdc.h" +#include "rdc_lib/RdcLogger.h" +#include "rdc_lib/rdc_common.h" namespace amd { namespace rdc { @@ -488,41 +490,49 @@ bool RdcAPIServiceImpl::copy_gpu_usage_info(const rdc_gpu_usage_info_t& src, stats->set_max_value(src.power_usage.max_value); stats->set_min_value(src.power_usage.min_value); stats->set_average(src.power_usage.average); + stats->set_standard_deviation(src.power_usage.standard_deviation); stats = target->mutable_gpu_clock(); stats->set_max_value(src.gpu_clock.max_value); stats->set_min_value(src.gpu_clock.min_value); stats->set_average(src.gpu_clock.average); + stats->set_standard_deviation(src.gpu_clock.standard_deviation); stats = target->mutable_gpu_utilization(); stats->set_max_value(src.gpu_utilization.max_value); stats->set_min_value(src.gpu_utilization.min_value); stats->set_average(src.gpu_utilization.average); + stats->set_standard_deviation(src.gpu_utilization.standard_deviation); stats = target->mutable_memory_utilization(); stats->set_max_value(src.memory_utilization.max_value); stats->set_min_value(src.memory_utilization.min_value); stats->set_average(src.memory_utilization.average); + stats->set_standard_deviation(src.memory_utilization.standard_deviation); stats = target->mutable_pcie_tx(); stats->set_max_value(src.pcie_tx.max_value); stats->set_min_value(src.pcie_tx.min_value); stats->set_average(src.pcie_tx.average); + stats->set_standard_deviation(src.pcie_tx.standard_deviation); stats = target->mutable_pcie_rx(); stats->set_max_value(src.pcie_rx.max_value); stats->set_min_value(src.pcie_rx.min_value); stats->set_average(src.pcie_rx.average); + stats->set_standard_deviation(src.pcie_rx.standard_deviation); stats = target->mutable_memory_clock(); stats->set_max_value(src.memory_clock.max_value); stats->set_min_value(src.memory_clock.min_value); stats->set_average(src.memory_clock.average); + stats->set_standard_deviation(src.memory_clock.standard_deviation); stats = target->mutable_gpu_temperature(); stats->set_max_value(src.gpu_temperature.max_value); stats->set_min_value(src.gpu_temperature.min_value); stats->set_average(src.gpu_temperature.average); + stats->set_standard_deviation(src.gpu_temperature.standard_deviation); return true; } diff --git a/projects/rdc/server/src/rdc_server_main.cc b/projects/rdc/server/src/rdc_server_main.cc index 5a5a5ddf3b..ee169a300b 100755 --- a/projects/rdc/server/src/rdc_server_main.cc +++ b/projects/rdc/server/src/rdc_server_main.cc @@ -312,6 +312,12 @@ RDCServer::ShutDown(void) { delete rdc_admin_service_; rdc_admin_service_ = nullptr; } + + if (api_service_) { + delete api_service_; + api_service_ = nullptr; + } + } static void * ProcessSignalLoop(void *server_ptr) {