Support standard deviation and json output for job stats

In the job stats, in addition to the max, min and average,
it will also display the standard deviation.

A new option --json is added to the rdci to output the results
in json format.

In the job stats, using the GMT time instead of timestamp
for start and end time.

Change-Id: If245c4fc4854a1dc867f97ff5aa9112af7962eca


[ROCm/rdc commit: e6d910f67a]
This commit is contained in:
Bill(Shuzhou) Liu
2020-07-14 15:20:08 -04:00
committad av Chris Freehill
förälder bf248131cb
incheckning 588ea96dd2
17 ändrade filer med 515 tillägg och 112 borttagningar
+1
Visa fil
@@ -233,6 +233,7 @@ typedef struct {
uint64_t max_value; //!< Maximum value measured
uint64_t min_value; //!< Minimum value measured
uint64_t average; //!< Average value measured
double standard_deviation; //!< The standard deviation
} rdc_stats_summary_t;
/**
@@ -46,6 +46,15 @@ struct FieldSummaryStats {
int64_t max_value;
int64_t min_value;
int64_t total_value;
// Use Welford algorithm to calculate the standard deviations.
// https://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
// https://www.johndcook.com/blog/standard_deviation/
double old_m;
double old_s;
double new_m;
double new_s;
uint64_t last_time;
uint64_t count;
};
@@ -100,6 +109,8 @@ class RdcCacheManagerImpl: public RdcCacheManager {
void set_summary(const FieldSummaryStats & stats,
rdc_stats_summary_t& gpu, rdc_stats_summary_t& summary, // NOLINT
unsigned int adjuster);
void set_average_summary(
rdc_stats_summary_t& summary, uint32_t num_gpus); // NOLINT
RdcCacheSamples cache_samples_;
RdcJobStatsCache cache_jobs_;
std::mutex cache_mutex_;
+1 -1
Visa fil
@@ -414,7 +414,7 @@ message JobStatsSummary {
uint64 max_value = 1;
uint64 min_value = 2;
uint64 average = 3;
double standard_deviation = 4;
}
message GpuUsageInfo {
+2 -2
Visa fil
@@ -160,7 +160,7 @@ message("RDC_LIB_INC_LIST=${RDC_LIB_INC_LIST}")
link_directories(${RSMI_LIB_DIR} "${GRPC_ROOT}/lib" "${GRPC_ROOT}/lib64")
add_library(${RDC_LIB} SHARED ${RDC_LIB_SRC_LIST} ${RDC_LIB_INC_LIST})
target_link_libraries(${RDC_LIB} pthread rocm_smi64)
target_link_libraries(${RDC_LIB} ${BOOTSTRAP_LIB} pthread rocm_smi64)
target_include_directories(${RDC_LIB} PRIVATE
"${PROJECT_SOURCE_DIR}"
"${PROJECT_SOURCE_DIR}/include"
@@ -191,7 +191,7 @@ set(RDCCLIENT_LIB_INC_LIST ${RDCCLIENT_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib
message("RDCCLIENT_LIB_INC_LIST=${RDCCLIENT_LIB_INC_LIST}")
add_library(${RDCCLIENT_LIB} SHARED ${RDCCLIENT_LIB_SRC_LIST} ${RDCCLIENT_LIB_INC_LIST})
target_link_libraries(${RDCCLIENT_LIB} pthread rt grpc grpc++ grpc++_reflection
target_link_libraries(${RDCCLIENT_LIB} ${BOOTSTRAP_LIB} pthread rt grpc grpc++ grpc++_reflection
dl protobuf)
target_include_directories(${RDCCLIENT_LIB} PRIVATE
"${GRPC_ROOT}/include"
@@ -21,6 +21,7 @@ THE SOFTWARE.
*/
#include "rdc_lib/impl/RdcCacheManagerImpl.h"
#include <sys/time.h>
#include <cmath>
#include <ctime>
#include <sstream>
#include "rdc_lib/RdcLogger.h"
@@ -213,6 +214,10 @@ rdc_status_t RdcCacheManagerImpl::rdc_update_job_stats(uint32_t gpu_index,
if (value.field_id == RDC_FI_POWER_USAGE) {
gpu_iter->second.energy_last_time = value.ts;
}
// https://www.johndcook.com/blog/standard_deviation/
fsummary->second.old_s = 0;
fsummary->second.old_m = fsummary->second.new_m = value.value.l_int;
return RDC_ST_OK;
}
if (value.field_id == RDC_FI_POWER_USAGE) {
@@ -229,6 +234,15 @@ rdc_status_t RdcCacheManagerImpl::rdc_update_job_stats(uint32_t gpu_index,
fsummary->second.last_time = value.ts;
fsummary->second.count++;
// https://www.johndcook.com/blog/standard_deviation/
fsummary->second.new_m = fsummary->second.old_m +
(value.value.l_int - fsummary->second.old_m)/fsummary->second.count;
fsummary->second.new_s = fsummary->second.old_s +
(value.value.l_int - fsummary->second.old_m)*
(value.value.l_int - fsummary->second.new_m);
fsummary->second.old_m = fsummary->second.new_m;
fsummary->second.old_s = fsummary->second.new_s;
return RDC_ST_OK;
}
@@ -248,6 +262,11 @@ void RdcCacheManagerImpl::set_summary(const FieldSummaryStats & stats,
summary.min_value = std::min(summary.min_value, gpu.min_value);
//< save total for future average calculation.
summary.average += gpu.average;
//< calculate the sample variance
gpu.standard_deviation = std::sqrt((stats.count > 1)
? stats.new_s/(stats.count - 1) : 0.0)/adjuster;
summary.standard_deviation += gpu.standard_deviation;
}
rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(const char jobId[64],
@@ -274,15 +293,17 @@ rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(const char jobId[64],
summary_info.max_gpu_memory_used = 0;
summary_info.ecc_correct = 0;
summary_info.ecc_uncorrect = 0;
summary_info.power_usage = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.pcie_tx = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.pcie_rx = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.gpu_temperature = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.memory_clock = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.gpu_clock = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.gpu_utilization = {0, std::numeric_limits<uint64_t>::max(), 0};
summary_info.power_usage = {0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.pcie_tx = {0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.pcie_rx = {0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.gpu_temperature =
{0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.memory_clock = {0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.gpu_clock = {0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.gpu_utilization =
{0, std::numeric_limits<uint64_t>::max(), 0, 0};
summary_info.memory_utilization = {0,
std::numeric_limits<uint64_t>::max(), 0};
std::numeric_limits<uint64_t>::max(), 0, 0};
p_job_info->num_gpus = job_stats->second.gpu_stats.size();
@@ -363,27 +384,25 @@ rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(const char jobId[64],
}
}
}
// Get the average of the summary
summary_info.power_usage.average = summary_info.power_usage.average/
p_job_info->num_gpus;
summary_info.gpu_clock.average = summary_info.gpu_clock.average/
p_job_info->num_gpus;
summary_info.gpu_utilization.average = summary_info.gpu_utilization.average/
p_job_info->num_gpus;
summary_info.memory_utilization.average =
summary_info.memory_utilization.average/p_job_info->num_gpus;
summary_info.pcie_tx.average = summary_info.pcie_tx.average/
p_job_info->num_gpus;
summary_info.pcie_rx.average = summary_info.pcie_rx.average/
p_job_info->num_gpus;
summary_info.gpu_temperature.average = summary_info.gpu_temperature.average/
p_job_info->num_gpus;
summary_info.memory_clock.average = summary_info.memory_clock.average/
p_job_info->num_gpus;
// Set the average of the summary
set_average_summary(summary_info.power_usage, p_job_info->num_gpus);
set_average_summary(summary_info.gpu_clock, p_job_info->num_gpus);
set_average_summary(summary_info.gpu_utilization, p_job_info->num_gpus);
set_average_summary(summary_info.memory_utilization, p_job_info->num_gpus);
set_average_summary(summary_info.pcie_tx, p_job_info->num_gpus);
set_average_summary(summary_info.pcie_rx, p_job_info->num_gpus);
set_average_summary(summary_info.gpu_temperature, p_job_info->num_gpus);
set_average_summary(summary_info.memory_clock, p_job_info->num_gpus);
return RDC_ST_OK;
}
void RdcCacheManagerImpl::set_average_summary(
rdc_stats_summary_t& summary, uint32_t num_gpus) {
summary.average = summary.average/num_gpus;
summary.standard_deviation = summary.standard_deviation/num_gpus;
}
rdc_status_t RdcCacheManagerImpl::rdc_job_start_stats(const char job_id[64],
const rdc_group_info_t& ginfo, const rdc_field_group_info_t& finfo,
const rdc_gpu_gauges_t& gpu_gauges) {
@@ -42,6 +42,7 @@ RdcGroupSettingsImpl::RdcGroupSettingsImpl() {
rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create(
const char* group_name, rdc_gpu_group_t* p_rdc_group_id) {
RDC_LOG(RDC_DEBUG, "Create group " << group_name);
rdc_group_info_t ginfo;
strncpy_with_null(ginfo.group_name, group_name, RDC_MAX_STR_LENGTH);
ginfo.count = 0;
@@ -135,6 +136,7 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create(
uint32_t num_field_ids, uint32_t* field_ids,
const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) {
RDC_LOG(RDC_DEBUG, "Create field group " << field_group_name);
rdc_field_group_info_t finfo;
finfo.count = num_field_ids;
strncpy_with_null(finfo.group_name, field_group_name, RDC_MAX_STR_LENGTH);
@@ -95,41 +95,49 @@ bool RdcStandaloneHandler::copy_gpu_usage_info(
target->power_usage.max_value = pstats.max_value();
target->power_usage.min_value = pstats.min_value();
target->power_usage.average = pstats.average();
target->power_usage.standard_deviation = pstats.standard_deviation();
const ::rdc::JobStatsSummary& cstats = src.gpu_clock();
target->gpu_clock.max_value = cstats.max_value();
target->gpu_clock.min_value = cstats.min_value();
target->gpu_clock.average = cstats.average();
target->gpu_clock.standard_deviation = cstats.standard_deviation();
const ::rdc::JobStatsSummary& ustats = src.gpu_utilization();
target->gpu_utilization.max_value = ustats.max_value();
target->gpu_utilization.min_value = ustats.min_value();
target->gpu_utilization.average = ustats.average();
target->gpu_utilization.standard_deviation = ustats.standard_deviation();
const ::rdc::JobStatsSummary& mstats = src.memory_utilization();
target->memory_utilization.max_value = mstats.max_value();
target->memory_utilization.min_value = mstats.min_value();
target->memory_utilization.average = mstats.average();
target->memory_utilization.standard_deviation = mstats.standard_deviation();
const ::rdc::JobStatsSummary& txstats = src.pcie_tx();
target->pcie_tx.max_value = txstats.max_value();
target->pcie_tx.min_value = txstats.min_value();
target->pcie_tx.average = txstats.average();
target->pcie_tx.standard_deviation = txstats.standard_deviation();
const ::rdc::JobStatsSummary& rxstats = src.pcie_rx();
target->pcie_rx.max_value = rxstats.max_value();
target->pcie_rx.min_value = rxstats.min_value();
target->pcie_rx.average = rxstats.average();
target->pcie_rx.standard_deviation = rxstats.standard_deviation();
const ::rdc::JobStatsSummary& mcstats = src.memory_clock();
target->memory_clock.max_value = mcstats.max_value();
target->memory_clock.min_value = mcstats.min_value();
target->memory_clock.average = mcstats.average();
target->memory_clock.standard_deviation = mcstats.standard_deviation();
const ::rdc::JobStatsSummary& gtstats = src.gpu_temperature();
target->gpu_temperature.max_value = gtstats.max_value();
target->gpu_temperature.min_value = gtstats.min_value();
target->gpu_temperature.average = gtstats.average();
target->gpu_temperature.standard_deviation = gtstats.standard_deviation();
return true;
}
@@ -39,6 +39,7 @@ class RdciStatsSubSystem: public RdciSubSystem {
private:
void show_help() const;
void show_job_stats(const rdc_gpu_usage_info_t& gpu_info) const;
void show_job_stats_json(const rdc_gpu_usage_info_t& gpu_info) const;
enum OPERATIONS {
STATS_UNKNOWN = 0,
+7
Visa fil
@@ -39,7 +39,11 @@ class RdciSubSystem {
virtual void process() = 0;
virtual ~RdciSubSystem();
bool is_json_output() const;
protected:
void set_json_output(bool is_json);
std::vector<std::string> split_string(const std::string& s,
char delimiter) const;
void show_common_usage() const;
@@ -52,6 +56,9 @@ class RdciSubSystem {
std::string root_ca_;
std::string client_cert_;
std::string client_key_;
private:
bool is_json_output_;
};
typedef std::shared_ptr<RdciSubSystem> RdciSubSystemPtr;
+37 -8
Visa fil
@@ -35,11 +35,13 @@ RdciDiscoverySubSystem::RdciDiscoverySubSystem() : show_help_(false) {
void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) {
const int HOST_OPTIONS = 1000;
const int JSON_OPTIONS = 1001;
const struct option long_options[] = {
{"host", required_argument, nullptr, HOST_OPTIONS },
{"help", optional_argument, nullptr, 'h' },
{"unauth", optional_argument, nullptr, 'u' },
{"list", optional_argument, nullptr, 'l' },
{"json", optional_argument, nullptr, JSON_OPTIONS },
{ nullptr, 0 , nullptr, 0 }
};
@@ -53,6 +55,9 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) {
case HOST_OPTIONS:
ip_port_ = optarg;
break;
case JSON_OPTIONS:
set_json_output(true);
break;
case 'h':
show_help_ = true;
return;
@@ -77,12 +82,16 @@ void RdciDiscoverySubSystem::parse_cmd_opts(int argc, char ** argv) {
}
void RdciDiscoverySubSystem::show_help() const {
if (is_json_output()) return;
std::cout << " discovery -- Used to discover and identify GPUs "
<< "and their attributes.\n\n";
std::cout << "Usage\n";
std::cout << " rdci discovery [--host <IP/FQDN>:port] [-u] -l\n";
std::cout << " rdci discovery [--host <IP/FQDN>:port] [--json]"
<< " [-u] -l\n";
std::cout << "\nFlags:\n";
show_common_usage();
std::cout << " --json "
<< "Output using json.\n";
std::cout << " -l --list list GPU discovered"
<<" on the system\n";
}
@@ -101,14 +110,22 @@ void RdciDiscoverySubSystem::process() {
throw RdcException(result, "Fail to get device information");
}
if (count == 0) {
std::cout << "No GPUs find on the sytem\n";
if (is_json_output()) {
std::cout << "\"gpus\" : [], \"status\": \"ok\"";
} else {
std::cout << "No GPUs find on the system\n";
}
return;
}
std::cout << count << " GPUs found.\n";
std::cout << "------------------------------------------------"
<< "-----------------\n";
std::cout << "GPU Index\t Device Information\n";
if (is_json_output()) {
std::cout << "\"gpus\" : [";
} else {
std::cout << count << " GPUs found.\n";
std::cout << "------------------------------------------------"
<< "-----------------\n";
std::cout << "GPU Index\t Device Information\n";
}
for (uint32_t i = 0; i < count; i++) {
rdc_device_attributes_t attribute;
result = rdc_device_get_attributes(rdc_handle_,
@@ -116,10 +133,22 @@ void RdciDiscoverySubSystem::process() {
if (result != RDC_ST_OK) {
return;
}
std::cout << i << "\t\t" << attribute.device_name <<std::endl;
if (is_json_output()) {
std::cout << "{\"gpu_index\": \"" << i << "\", \"device_name\": \""
<< attribute.device_name << "\"}";
if (i != count -1) {
std::cout << ",";
}
} else {
std::cout << i << "\t\t" << attribute.device_name <<std::endl;
}
}
std::cout << "------------------------------------------------"
if (is_json_output()) {
std::cout << ']';
} else {
std::cout << "------------------------------------------------"
<< "-----------------\n";
}
}
+89 -21
Visa fil
@@ -37,6 +37,7 @@ RdciFieldGroupSubSystem::RdciFieldGroupSubSystem():
void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
const int HOST_OPTIONS = 1000;
const int JSON_OPTIONS = 1001;
const struct option long_options[] = {
{"host", required_argument, nullptr, HOST_OPTIONS },
{"help", optional_argument, nullptr, 'h' },
@@ -47,6 +48,7 @@ void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
{"fieldids", required_argument, nullptr, 'f'},
{"info", optional_argument, nullptr, 'i' },
{"delete", required_argument, nullptr, 'd' },
{"json", optional_argument, nullptr, JSON_OPTIONS },
{ nullptr, 0 , nullptr, 0 }
};
@@ -59,6 +61,9 @@ void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
case HOST_OPTIONS:
ip_port_ = optarg;
break;
case JSON_OPTIONS:
set_json_output(true);
break;
case 'h':
field_group_ops_ = FIELD_GROUP_HELP;
return;
@@ -112,18 +117,22 @@ void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
}
void RdciFieldGroupSubSystem::show_help() const {
if (is_json_output()) return;
std::cout << " fieldgroup -- Used to create and maintain groups "
<< "of field Ids.\n\n";
std::cout << "Usage\n";
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] -l\n";
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] "
<< "-c <groupName> -f <filedIds>\n";
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] "
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port]"
<< " [--json] [-u] -l\n";
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [--json]"
<< " [-u] -c <groupName> -f <filedIds>\n";
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [--json] [-u] "
<< "-g <groupId> -i\n";
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] "
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [--json] [-u] "
<< "-d <groupId>\n";
std::cout << "\nFlags:\n";
show_common_usage();
std::cout << " --json "
<< "Output using json.\n";
std::cout << " -l --list "
<< "List the field groups that currently exist for a host.\n";
std::cout << " -g --group groupId "
@@ -143,6 +152,7 @@ void RdciFieldGroupSubSystem::process() {
rdc_status_t result = RDC_ST_OK;
rdc_field_group_info_t group_info;
uint32_t count = 0;
std::string json_group_ids = "\"field_groups\": [";
switch (field_group_ops_) {
case FIELD_GROUP_HELP:
show_help();
@@ -170,9 +180,14 @@ void RdciFieldGroupSubSystem::process() {
result = rdc_group_field_create(rdc_handle_, fields.size(),
&field_ids[0], group_name_.c_str(), &group_id);
if (result == RDC_ST_OK) {
std::cout << "Successfully created a field group"
<< " with a group ID " << group_id << std::endl;
return;
if (is_json_output()) {
std::cout << "\"field_group_id\": \"" << group_id
<<"\", \"status\": \"ok\"";
} else {
std::cout << "Successfully created a field group"
<< " with a group ID " << group_id << std::endl;
return;
}
}
break;
}
@@ -184,8 +199,13 @@ void RdciFieldGroupSubSystem::process() {
}
result = rdc_group_field_destroy(rdc_handle_, group_id_);
if (result == RDC_ST_OK) {
std::cout << "Successfully deleted the field group "
if (is_json_output()) {
std::cout << "\"field_group_id\": \"" << group_id_
<<"\", \"status\": \"ok\"";
} else {
std::cout << "Successfully deleted the field group "
<< group_id_ << std::endl;
}
return;
}
break;
@@ -195,8 +215,11 @@ void RdciFieldGroupSubSystem::process() {
rdc_handle_, group_id_list, &count);
if ( result != RDC_ST_OK) break;
std::cout << count << " field group found.\n";
std::cout << "GroupID\t" << "GroupName\t" << "FieldIds\n";
if (!is_json_output()) {
std::cout << count << " field group found.\n";
std::cout << "GroupID\t" << "GroupName\t" << "FieldIds\n";
}
for (uint32_t i = 0; i < count; i++) {
result = rdc_group_field_get_info(
rdc_handle_, group_id_list[i], &group_info);
@@ -206,15 +229,44 @@ void RdciFieldGroupSubSystem::process() {
std::to_string(group_id_list[i]));
}
std::cout << group_id_list[i] << "\t"
if (!is_json_output()) {
std::cout << group_id_list[i] << "\t"
<< group_info.group_name << "\t\t";
} else {
json_group_ids += "{\"group_id\": \"";
json_group_ids += std::to_string(group_id_list[i]);
json_group_ids += "\", \"group_name\": \"";
json_group_ids += group_info.group_name;
json_group_ids += "\", \"field_ids\": [";
}
for (uint32_t j = 0; j < group_info.count; j++) {
std::cout << group_info.field_ids[j];
if (!is_json_output()) {
std::cout << group_info.field_ids[j];
} else {
json_group_ids +=
std::to_string(group_info.field_ids[j]);
}
if ( j < group_info.count -1 ) {
std::cout << ",";
if (!is_json_output()) {
std::cout << ",";
} else {
json_group_ids += ",";
}
}
}
std::cout << std::endl;
if (!is_json_output()) {
std::cout << std::endl;
} else {
json_group_ids += "]}";
if (i != count -1) {
json_group_ids += ",";
}
}
}
if (is_json_output()) {
json_group_ids += "], \"status\": \"ok\"";
std::cout << json_group_ids;
}
break;
case FIELD_GROUP_INFO:
@@ -226,13 +278,29 @@ void RdciFieldGroupSubSystem::process() {
result = rdc_group_field_get_info(
rdc_handle_, group_id_, &group_info);
if (result == RDC_ST_OK) {
std::cout << "Group name: " << group_info.group_name
<< std::endl;
std::cout << "Field Ids: ";
for (uint32_t i = 0; i < group_info.count; i++) {
std::cout << group_info.field_ids[i] << " ";
if (is_json_output()) {
std::cout << "\"group_name\": \"" << group_info.group_name
<< "\", \"field_ids\": [";
} else {
std::cout << "Group name: " << group_info.group_name
<< std::endl;
std::cout << "Field Ids: ";
}
for (uint32_t i = 0; i < group_info.count; i++) {
if (is_json_output()) {
std::cout << group_info.field_ids[i];
if ( i != group_info.count-1 ) {
std::cout << ",";
}
} else {
std::cout << group_info.field_ids[i] << " ";
}
}
if (is_json_output()) {
std::cout << "], \"status\": \"ok\"";
} else {
std::cout << std::endl;
}
std::cout << std::endl;
return;
}
break;
+97 -26
Visa fil
@@ -37,6 +37,7 @@ RdciGroupSubSystem::RdciGroupSubSystem():
void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
const int HOST_OPTIONS = 1000;
const int JSON_OPTIONS = 1001;
const struct option long_options[] = {
{"host", required_argument, nullptr, HOST_OPTIONS },
{"help", optional_argument, nullptr, 'h' },
@@ -47,6 +48,7 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
{"add", required_argument, nullptr, 'a' },
{"info", optional_argument, nullptr, 'i' },
{"delete", required_argument, nullptr, 'd' },
{"json", optional_argument, nullptr, JSON_OPTIONS },
{ nullptr, 0 , nullptr, 0 }
};
@@ -59,6 +61,9 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
case HOST_OPTIONS:
ip_port_ = optarg;
break;
case JSON_OPTIONS:
set_json_output(true);
break;
case 'h':
group_ops_ = GROUP_HELP;
return;
@@ -116,18 +121,22 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
}
void RdciGroupSubSystem::show_help() const {
if (is_json_output()) return;
std::cout << " group -- Used to create and maintain groups of GPUs.\n\n";
std::cout << "Usage\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -l\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -c <groupName> "
<< "[-a <entityId>]\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -g <groupId> "
<< "[-a <entityId>]\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] "
std::cout << " rdci group [--host <IP/FQDN>:port] [--json] [-u] -l\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [--json] [-u]"
<< " -c <groupName> [-a <entityId>]\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [--json] [-u]"
<< " -g <groupId> [-a <entityId>]\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [--json] [-u] "
<< "-g <groupId> [-i]\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -d <groupId>\n";
std::cout << " rdci group [--host <IP/FQDN>:port] [--json] [-u] "
<< "-d <groupId>\n";
std::cout << "\nFlags:\n";
show_common_usage();
std::cout << " --json "
<< "Output using json.\n";
std::cout << " -l --list "
<< "List the groups that currently exist for a host.\n";
std::cout << " -g --group groupId "
@@ -148,6 +157,7 @@ void RdciGroupSubSystem::process() {
std::vector<std::string> gpu_ids;
rdc_group_info_t group_info;
uint32_t count = 0;
std::string json_group_ids = "\"gpu_groups\": [";
switch (group_ops_) {
case GROUP_HELP:
show_help();
@@ -181,8 +191,13 @@ void RdciGroupSubSystem::process() {
}
if (result == RDC_ST_OK) {
std::cout << "Successfully created group with a group ID "
<< group_id << std::endl;
if (is_json_output()) {
std::cout << "\"group_id\": \"" << group_id
<<"\", \"status\": \"ok\"";
} else {
std::cout << "Successfully created group with a group ID "
<< group_id << std::endl;
}
return;
}
break;
@@ -194,8 +209,13 @@ void RdciGroupSubSystem::process() {
}
result = rdc_group_gpu_destroy(rdc_handle_, group_id_);
if (result == RDC_ST_OK) {
std::cout << "Successfully deleted the group "
<< group_id_ << std::endl;
if (is_json_output()) {
std::cout << "\"group_id\": \"" << group_id_
<<"\", \"status\": \"ok\"";
} else {
std::cout << "Successfully deleted the group "
<< group_id_ << std::endl;
}
return;
}
break;
@@ -204,8 +224,10 @@ void RdciGroupSubSystem::process() {
result = rdc_group_get_all_ids(rdc_handle_, group_id_list, &count);
if ( result != RDC_ST_OK) break;
std::cout << count << " group found.\n";
std::cout << "GroupID\t" << "GroupName\t" << "GPUIndex\n";
if (!is_json_output()) {
std::cout << count << " group found.\n";
std::cout << "GroupID\t" << "GroupName\t" << "GPUIndex\n";
}
for (uint32_t i = 0; i < count; i++) {
result = rdc_group_gpu_get_info(rdc_handle_,
group_id_list[i], &group_info);
@@ -215,15 +237,43 @@ void RdciGroupSubSystem::process() {
+ std::to_string(group_id_list[i]));
}
std::cout << group_id_list[i] << "\t"
<< group_info.group_name << "\t\t";
if (!is_json_output()) {
std::cout << group_id_list[i] << "\t"
<< group_info.group_name << "\t\t";
} else {
json_group_ids += "{\"group_id\": \"";
json_group_ids += std::to_string(group_id_list[i]);
json_group_ids += "\", \"group_name\": \"";
json_group_ids += group_info.group_name;
json_group_ids += "\", \"gpu_indexes\": [";
}
for (uint32_t j = 0; j < group_info.count; j++) {
std::cout << group_info.entity_ids[j];
if (!is_json_output()) {
std::cout << group_info.entity_ids[j];
} else {
json_group_ids +=
std::to_string(group_info.entity_ids[j]);
}
if (j < group_info.count -1) {
std::cout << ",";
if (!is_json_output()) {
std::cout << ",";
} else {
json_group_ids += ",";
}
}
}
std::cout << std::endl;
if (!is_json_output()) {
std::cout << std::endl;
} else {
json_group_ids += "]}";
if (i != count -1) {
json_group_ids += ",";
}
}
}
if (is_json_output()) {
json_group_ids += "], \"status\": \"ok\"";
std::cout << json_group_ids;
}
break;
case GROUP_ADD_GPUS:
@@ -247,8 +297,13 @@ void RdciGroupSubSystem::process() {
}
}
if (result == RDC_ST_OK) {
std::cout << "Successfully added the GPU " << gpu_ids_
<< " to group "<< group_id_ << std::endl;
if (is_json_output()) {
std::cout << "\"group_id\": \"" << group_id_
<<"\", \"status\": \"ok\"";
} else {
std::cout << "Successfully added the GPU " << gpu_ids_
<< " to group "<< group_id_ << std::endl;
}
return;
}
break;
@@ -261,13 +316,29 @@ void RdciGroupSubSystem::process() {
result = rdc_group_gpu_get_info(rdc_handle_,
group_id_, &group_info);
if (result == RDC_ST_OK) {
std::cout << "Group name: "
<< group_info.group_name << std::endl;
std::cout << "Gpu indexes: ";
for (uint32_t i = 0; i < group_info.count; i++) {
std::cout << group_info.entity_ids[i] << " ";
if (is_json_output()) {
std::cout << "\"group_name\": \"" << group_info.group_name
<< "\", \"gpu_indexes\": [";
} else {
std::cout << "Group name: "
<< group_info.group_name << std::endl;
std::cout << "Gpu indexes: ";
}
for (uint32_t i = 0; i < group_info.count; i++) {
if (is_json_output()) {
std::cout << group_info.entity_ids[i];
if ( i != group_info.count-1 ) {
std::cout << ",";
}
} else {
std::cout << group_info.entity_ids[i] << " ";
}
}
if (is_json_output()) {
std::cout << "], \"status\": \"ok\"";
} else {
std::cout << std::endl;
}
std::cout << std::endl;
return;
}
break;
+171 -24
Visa fil
@@ -23,6 +23,7 @@ THE SOFTWARE.
#include <getopt.h>
#include <unistd.h>
#include <signal.h>
#include <ctime>
#include <limits>
#include <iomanip>
#include "rdc_lib/rdc_common.h"
@@ -42,6 +43,7 @@ RdciStatsSubSystem::~RdciStatsSubSystem() {
void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) {
const int HOST_OPTIONS = 1000;
const int JSON_OPTIONS = 1001;
const struct option long_options[] = {
{"host", required_argument, nullptr, HOST_OPTIONS },
{"help", optional_argument, nullptr, 'h' },
@@ -53,6 +55,7 @@ void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) {
{"jremoveall", optional_argument, nullptr, 'a' },
{"verbose", optional_argument, nullptr, 'v'},
{"group", required_argument, nullptr, 'g'},
{"json", optional_argument, nullptr, JSON_OPTIONS},
{ nullptr, 0 , nullptr, 0 }
};
@@ -66,6 +69,9 @@ void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) {
case HOST_OPTIONS:
ip_port_ = optarg;
break;
case JSON_OPTIONS:
set_json_output(true);
break;
case 'h':
stats_ops_ = STATS_HELP;
return;
@@ -119,17 +125,22 @@ void RdciStatsSubSystem::parse_cmd_opts(int argc, char ** argv) {
}
void RdciStatsSubSystem::show_help() const {
if (is_json_output()) return;
std::cout << " stats -- Used to view job statistics.\n\n";
std::cout << "Usage\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] -s <jobId>"
<< " -g <groupId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] -x <jobId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] [-v] "
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] [--json] "
<< "-s <jobId> -g <groupId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] [--json] "
<< "-x <jobId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] [--json] [-v] "
<< "-j <jobId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] -r <jobId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] -a\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] [--json] "
<< "-r <jobId>\n";
std::cout << " rdci stats [--host <IP/FQDN>:port] [-u] [--json] -a\n";
std::cout << "\nFlags:\n";
show_common_usage();
std::cout << " --json "
<< "Output using json.\n";
std::cout << " -s --jstart Start recording "
<< "job statistics.\n";
std::cout << " -g --group-id The GPU group to query "
@@ -146,14 +157,103 @@ void RdciStatsSubSystem::show_help() const {
<< "all job statistics.\n";
}
void RdciStatsSubSystem::show_job_stats_json(
const rdc_gpu_usage_info_t& gpu_info) const {
std::cout << "\"start_time\": " << gpu_info.start_time << ",";
std::cout << "\"end_time\": " << gpu_info.end_time << ",";
std::cout << "\"execution_time\": " <<
(gpu_info.end_time-gpu_info.start_time) << ",";
std::cout << "\"energy_consumed\": " << gpu_info.energy_consumed << ",";
std::cout << "\"power_usage_max\": "
<< gpu_info.power_usage.max_value << ",";
std::cout << "\"power_usage_min\": "
<< gpu_info.power_usage.min_value << ",";
std::cout << "\"power_usage_avg\": "
<< gpu_info.power_usage.average << ",";
std::cout << "\"power_usage_stanard_deviation\": "
<< gpu_info.power_usage.standard_deviation << ",";
std::cout << "\"gpu_clock_max\": "
<< gpu_info.gpu_clock.max_value << ",";
std::cout << "\"gpu_clock_min\": "
<< gpu_info.gpu_clock.min_value << ",";
std::cout << "\"gpu_clock_avg\": "
<< gpu_info.gpu_clock.average << ",";
std::cout << "\"gpu_clock_stanard_deviation\": "
<< gpu_info.gpu_clock.standard_deviation << ",";
std::cout << "\"memory_clock_max\": "
<< gpu_info.memory_clock.max_value << ",";
std::cout << "\"memory_clock_min\": "
<< gpu_info.memory_clock.min_value << ",";
std::cout << "\"memory_clock_avg\": "
<< gpu_info.memory_clock.average << ",";
std::cout << "\"memory_clock_stanard_deviation\": "
<< gpu_info.memory_clock.standard_deviation << ",";
std::cout << "\"gpu_utilization_max\": "
<< gpu_info.gpu_utilization.max_value << ",";
std::cout << "\"gpu_utilization_min\": "
<< gpu_info.gpu_utilization.min_value << ",";
std::cout << "\"gpu_utilization_avg\": "
<< gpu_info.gpu_utilization.average << ",";
std::cout << "\"gpu_utilization_deviation\": "
<< gpu_info.gpu_utilization.standard_deviation << ",";
std::cout << "\"max_gpu_memory_used\": "
<< gpu_info.max_gpu_memory_used << ",";
std::cout << "\"memory_utilization_max\": "
<< gpu_info.memory_utilization.max_value << ",";
std::cout << "\"memory_utilization_min\": "
<< gpu_info.memory_utilization.min_value << ",";
std::cout << "\"memory_utilization_avg\": "
<< gpu_info.memory_utilization.average << ",";
std::cout << "\"memory_utilization_stanard_deviation\": "
<< gpu_info.memory_utilization.standard_deviation << ",";
std::cout << "\"gpu_temperature_max\": "
<< gpu_info.gpu_temperature.max_value << ",";
std::cout << "\"gpu_temperature_min\": "
<< gpu_info.gpu_temperature.min_value << ",";
std::cout << "\"gpu_temperature_avg\": "
<< gpu_info.gpu_temperature.average << ",";
std::cout << "\"gpu_temperature_stanard_deviation\": "
<< gpu_info.gpu_temperature.standard_deviation << ",";
std::cout << "\"pcie_rx_max\": "
<< gpu_info.pcie_rx.max_value << ",";
std::cout << "\"pcie_rx_min\": "
<< gpu_info.pcie_rx.min_value << ",";
std::cout << "\"pcie_rx_avg\": "
<< gpu_info.pcie_rx.average << ",";
std::cout << "\"pcie_rx_stanard_deviation\": "
<< gpu_info.pcie_rx.standard_deviation << ",";
std::cout << "\"pcie_tx_max\": "
<< gpu_info.pcie_tx.max_value << ",";
std::cout << "\"pcie_tx_min\": "
<< gpu_info.pcie_tx.min_value << ",";
std::cout << "\"pcie_tx_avg\": "
<< gpu_info.pcie_tx.average << ",";
std::cout << "\"pcie_tx_stanard_deviation\": "
<< gpu_info.pcie_tx.standard_deviation << ",";
std::cout << "\"ecc_correct\": " << gpu_info.ecc_correct << ",";
std::cout << "\"ecc_uncorrect\": " << gpu_info.ecc_uncorrect;
}
void RdciStatsSubSystem::show_job_stats(
const rdc_gpu_usage_info_t& gpu_info) const {
std::cout << "|------- Execution Stats ----------"
<< "+------------------------------------\n";
std::cout << "| Start Time | "
<< gpu_info.start_time << "\n";
<< std::put_time(std::gmtime(reinterpret_cast<const time_t*>
(&gpu_info.start_time)), "%c %Z") << "\n";
std::cout << "| End Time | "
<< gpu_info.end_time << "\n";
<< std::put_time(std::gmtime(reinterpret_cast<const time_t*>
(&gpu_info.end_time)), "%c %Z") << "\n";
std::cout << "| Total Execution Time (sec) | "
<< (gpu_info.end_time-gpu_info.start_time) << "\n";
std::cout << "+------- Performance Stats --------"
@@ -163,37 +263,53 @@ void RdciStatsSubSystem::show_job_stats(
std::cout << "| Power Usage (Watts) | " << "Max: "
<< gpu_info.power_usage.max_value<< " Min: "<<
gpu_info.power_usage.min_value << " Avg: "
<< gpu_info.power_usage.average << "\n";
<< gpu_info.power_usage.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.power_usage.standard_deviation << "\n";
std::cout << "| GPU Clock (MHz) | " << "Max: "
<< gpu_info.gpu_clock.max_value << " Min: " <<
gpu_info.gpu_clock.min_value << " Avg: "
<< gpu_info.gpu_clock.average << "\n";
<< gpu_info.gpu_clock.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.gpu_clock.standard_deviation << "\n";
std::cout << "| Memory Clock (MHz) | " << "Max: "
<< gpu_info.memory_clock.max_value << " Min: " <<
gpu_info.memory_clock.min_value << " Avg: "
<< gpu_info.memory_clock.average << "\n";
<< gpu_info.memory_clock.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.memory_clock.standard_deviation << "\n";
std::cout << "| GPU Utilization (%) | " << "Max: "
<< gpu_info.gpu_utilization.max_value <<" Min: " <<
gpu_info.gpu_utilization.min_value << " Avg: " <<
gpu_info.gpu_utilization.average << "\n";
gpu_info.gpu_utilization.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.gpu_utilization.standard_deviation << "\n";
std::cout << "| Max GPU Memory Used (bytes) | " <<
gpu_info.max_gpu_memory_used << "\n";
std::cout << "| Memory Utilization (%) | "
<< "Max: " << gpu_info.memory_utilization.max_value
<<" Min: "<< gpu_info.memory_utilization.min_value
<< " Avg: " << gpu_info.memory_utilization.average << "\n";
<< " Avg: " << gpu_info.memory_utilization.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.memory_utilization.standard_deviation << "\n";
std::cout << "| GPU Temperature (Celsius) | "
<< "Max: " << gpu_info.gpu_temperature.max_value
<<" Min: "<< gpu_info.gpu_temperature.min_value
<< " Avg: " << gpu_info.gpu_temperature.average << "\n";
<< " Avg: " << gpu_info.gpu_temperature.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.gpu_temperature.standard_deviation << "\n";
std::cout << "| PCIe Rx Bandwidth (megabytes) | "
<< "Max: " << gpu_info.pcie_rx.max_value
<<" Min: "<< gpu_info.pcie_rx.min_value
<< " Avg: " << gpu_info.pcie_rx.average << "\n";
<< " Avg: " << gpu_info.pcie_rx.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.pcie_rx.standard_deviation << "\n";
std::cout << "| PCIe Tx Bandwidth (megabytes) | "
<< "Max: " << gpu_info.pcie_tx.max_value
<<" Min: "<< gpu_info.pcie_tx.min_value
<< " Avg: " << gpu_info.pcie_tx.average << "\n";
<< " Avg: " << gpu_info.pcie_tx.average << " SD: "
<< std::fixed << std::setprecision(2)
<< gpu_info.pcie_tx.standard_deviation << "\n";
std::cout << "| Correctable ECC Errors | "
<< gpu_info.ecc_correct << "\n";
std::cout << "| Uncorrectable ECC Errors | "
@@ -217,8 +333,13 @@ void RdciStatsSubSystem::process() {
if (result != RDC_ST_OK) {
throw RdcException(result, rdc_status_string(result));
}
std::cout << "Successfully started recording job "
if (is_json_output()) {
std::cout << "\"job_id\": \"" << job_id_ << "\", \"group_id\": \""
<< group_id_ <<"\", \"status\": \"ok\"";
} else {
std::cout << "Successfully started recording job "
<< job_id_ << " with a group ID " << group_id_ << std::endl;
}
return;
}
@@ -228,8 +349,13 @@ void RdciStatsSubSystem::process() {
if (result != RDC_ST_OK) {
throw RdcException(result, rdc_status_string(result));
}
std::cout << "Successfully stopped recording job "
if (is_json_output()) {
std::cout << "\"job_id\": \"" << job_id_
<< "\", \"status\": \"ok\"";
} else {
std::cout << "Successfully stopped recording job "
<< job_id_ << std::endl;
}
return;
}
@@ -241,14 +367,26 @@ void RdciStatsSubSystem::process() {
throw RdcException(result, rdc_status_string(result));
}
std::cout << "| Summary \n";
show_job_stats(job_info.summary);
if (!is_json_output()) {
std::cout << "| Summary \n";
show_job_stats(job_info.summary);
} else {
std::cout << "\"job_summary\" : {";
show_job_stats_json(job_info.summary);
std::cout << "}";
}
if (is_verbose_ == false) {
return;
}
for (uint32_t i = 0; i < job_info.num_gpus; i++) {
std::cout << "| GPU " << i << "\n";
show_job_stats(job_info.gpus[i]);
if (!is_json_output()) {
std::cout << "| GPU " << i << "\n";
show_job_stats(job_info.gpus[i]);
} else {
std:: cout << ", \"gpu_" << i << "\": {";
show_job_stats_json(job_info.gpus[i]);
std::cout << "}";
}
}
return;
}
@@ -259,8 +397,13 @@ void RdciStatsSubSystem::process() {
if (result != RDC_ST_OK) {
throw RdcException(result, rdc_status_string(result));
}
std::cout << "Successfully removed job "
if (is_json_output()) {
std::cout << "\"job_id\": \"" << job_id_
<< "\", \"status\": \"ok\"";
} else {
std::cout << "Successfully removed job "
<< job_id_ << std::endl;
}
return;
}
@@ -269,7 +412,11 @@ void RdciStatsSubSystem::process() {
if (result != RDC_ST_OK) {
throw RdcException(result, rdc_status_string(result));
}
std::cout << "Successfully removed all jobs\n";
if (is_json_output()) {
std::cout << "\"status\": \"ok\"";
} else {
std::cout << "Successfully removed all jobs\n";
}
return;
}
}
+15 -1
Visa fil
@@ -33,13 +33,18 @@ RdciSubSystem::RdciSubSystem():
, use_auth_(true)
, root_ca_("/etc/rdc/client/certs/rdc_cacert.pem")
, client_cert_("/etc/rdc/client/certs/rdc_client_cert.pem")
, client_key_("/etc/rdc/client/private/rdc_client_cert.key") {
, client_key_("/etc/rdc/client/private/rdc_client_cert.key")
, is_json_output_(false) {
rdc_status_t status = rdc_init(0);
if (status != RDC_ST_OK) {
throw RdcException(status, "RDC initialize fail");
}
}
bool RdciSubSystem::is_json_output() const {
return is_json_output_;
}
bool RdciSubSystem::get_field_id_from_name(
const std::string& name, uint32_t& value) const {
const std::map<std::string, uint32_t> field_name_to_id = {
@@ -154,6 +159,11 @@ void RdciSubSystem::show_common_usage() const {
<< "information and exits.\n";
}
void RdciSubSystem::set_json_output(bool is_json) {
is_json_output_ = is_json;
std::cout << "{";
}
RdciSubSystem::~RdciSubSystem() {
if (rdc_handle_) {
rdc_disconnect(rdc_handle_);
@@ -161,6 +171,10 @@ RdciSubSystem::~RdciSubSystem() {
}
rdc_shutdown();
if (is_json_output_) {
std::cout << "}" << std::endl;
}
}
} // namespace rdc
+13 -4
Visa fil
@@ -42,9 +42,9 @@ int main(int argc, char ** argv) {
exit(0);
}
amd::rdc::RdciSubSystemPtr subsystem;
try {
std::string subsystem_name = argv[1];
amd::rdc::RdciSubSystemPtr subsystem;
if (subsystem_name == "discovery") {
subsystem.reset(new amd::rdc::RdciDiscoverySubSystem());
} else if (subsystem_name == "dmon") {
@@ -66,11 +66,20 @@ int main(int argc, char ** argv) {
subsystem->process();
} catch (const amd::rdc::RdcException& e) {
std::cout << "rdci Error: " << e.what() << std::endl;
if (subsystem && subsystem->is_json_output()) {
std::cout << "\"status\": \"error\", \"description\": \""
<< e.what() << '"';
} else {
std::cout << "rdci Error: " << e.what() << std::endl;
}
return e.error_code();
} catch (...) {
std::cout << "Unhandled exception." << std::endl;
return 1;
if (subsystem && subsystem->is_json_output()) {
std::cout << "\"status\": \"error\", \"description\": "
<< "\"Unhandled exception.\"";
} else {
std::cout << "Unhandled exception." << std::endl;
} return 1;
}
return 0;
+10
Visa fil
@@ -30,6 +30,8 @@ THE SOFTWARE.
#include "rdc.grpc.pb.h" // NOLINT
#include "rdc/rdc_api_service.h"
#include "rdc/rdc.h"
#include "rdc_lib/RdcLogger.h"
#include "rdc_lib/rdc_common.h"
namespace amd {
namespace rdc {
@@ -488,41 +490,49 @@ bool RdcAPIServiceImpl::copy_gpu_usage_info(const rdc_gpu_usage_info_t& src,
stats->set_max_value(src.power_usage.max_value);
stats->set_min_value(src.power_usage.min_value);
stats->set_average(src.power_usage.average);
stats->set_standard_deviation(src.power_usage.standard_deviation);
stats = target->mutable_gpu_clock();
stats->set_max_value(src.gpu_clock.max_value);
stats->set_min_value(src.gpu_clock.min_value);
stats->set_average(src.gpu_clock.average);
stats->set_standard_deviation(src.gpu_clock.standard_deviation);
stats = target->mutable_gpu_utilization();
stats->set_max_value(src.gpu_utilization.max_value);
stats->set_min_value(src.gpu_utilization.min_value);
stats->set_average(src.gpu_utilization.average);
stats->set_standard_deviation(src.gpu_utilization.standard_deviation);
stats = target->mutable_memory_utilization();
stats->set_max_value(src.memory_utilization.max_value);
stats->set_min_value(src.memory_utilization.min_value);
stats->set_average(src.memory_utilization.average);
stats->set_standard_deviation(src.memory_utilization.standard_deviation);
stats = target->mutable_pcie_tx();
stats->set_max_value(src.pcie_tx.max_value);
stats->set_min_value(src.pcie_tx.min_value);
stats->set_average(src.pcie_tx.average);
stats->set_standard_deviation(src.pcie_tx.standard_deviation);
stats = target->mutable_pcie_rx();
stats->set_max_value(src.pcie_rx.max_value);
stats->set_min_value(src.pcie_rx.min_value);
stats->set_average(src.pcie_rx.average);
stats->set_standard_deviation(src.pcie_rx.standard_deviation);
stats = target->mutable_memory_clock();
stats->set_max_value(src.memory_clock.max_value);
stats->set_min_value(src.memory_clock.min_value);
stats->set_average(src.memory_clock.average);
stats->set_standard_deviation(src.memory_clock.standard_deviation);
stats = target->mutable_gpu_temperature();
stats->set_max_value(src.gpu_temperature.max_value);
stats->set_min_value(src.gpu_temperature.min_value);
stats->set_average(src.gpu_temperature.average);
stats->set_standard_deviation(src.gpu_temperature.standard_deviation);
return true;
}
@@ -312,6 +312,12 @@ RDCServer::ShutDown(void) {
delete rdc_admin_service_;
rdc_admin_service_ = nullptr;
}
if (api_service_) {
delete api_service_;
api_service_ = nullptr;
}
}
static void * ProcessSignalLoop(void *server_ptr) {