Implement the rdc_lib API to support the job stats
Add the function to start and stop the job recording.
Add the function to get the job stats for each GPU and summary of multiple GPUs
Add the function to remove the jobs.
Add a class RdcLogger which can control the log level using the environment variable RDC_LOG.
This is similar to GRPC_VERBOSITY gRPC. When the customer has the issues, he can enable the verbose
log to help us to troubleshoot the issues.
Add the -u support in the rdci group, fieldgroup and dmon for connecting to rdcd without authentication.
Change-Id: I22c591823c1ee6485db106b911bed8271d1b2769
[ROCm/rdc commit: a547dc7efd]
This commit is contained in:
committato da
Chris Freehill
parent
aef3d29925
commit
0813e7052f
@@ -16,9 +16,16 @@ LD_LIBRARY_PATH=$PWD/rdc_libs/ ./rdci/rdci discovery -l
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
Enable the debug log:
|
||||
```
|
||||
sudo RDC_LOG=DEBUG ./server/rdcd
|
||||
```
|
||||
|
||||
Check the ssl connection in rdci:
|
||||
```
|
||||
rdcd_hostname= "" # Set the rdcd you want to connect
|
||||
openssl s_client -connect $rdcd_hostname:50051 -cert /etc/rdc/client/certs/rdc_client_cert.pem -key /etc/rdc/client/private/rdc_client_cert.key -CAfile /etc/rdc/client/certs/rdc_cacert.pem
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -92,11 +92,9 @@ int main(int, char **) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// (2) start the recording. Set the sample frequency to once per second, the
|
||||
// max keep age to one hour and the maximum number of samples to
|
||||
// keep to unlimited.
|
||||
// (2) start the recording. Set the sample frequency to once per second.
|
||||
result = rdc_job_start_stats(rdc_handle, group_id,
|
||||
job_id, 1000000, 3600, 0);
|
||||
job_id, 1000000);
|
||||
if (result != RDC_ST_OK) {
|
||||
std::cout << "Error start job stats. Return: "
|
||||
<< rdc_status_string(result);
|
||||
|
||||
@@ -52,6 +52,7 @@ typedef enum {
|
||||
RDC_ST_NOT_FOUND, //!< Cannot find the value
|
||||
RDC_ST_CONFLICT, //!< Conflict with current state
|
||||
RDC_ST_CLIENT_ERROR, //!< The RDC client error
|
||||
RDC_ST_ALREADY_EXIST, //!< The item already exists
|
||||
RDC_ST_MAX_LIMIT //!< Max limit recording for the object
|
||||
} rdc_status_t;
|
||||
|
||||
@@ -371,15 +372,10 @@ rdc_status_t rdc_disconnect(rdc_handle_t p_rdc_handle);
|
||||
*
|
||||
* @param[in] update_freq How often to update this field in usec.
|
||||
*
|
||||
* @param[in] max_keep_age How long to keep data for this field in seconds.
|
||||
*
|
||||
* @param[in] max_keep_samples Maximum number of samples to keep. 0=no limit.
|
||||
*
|
||||
* @retval ::RDC_ST_OK is returned upon successful call.
|
||||
*/
|
||||
rdc_status_t rdc_job_start_stats(rdc_handle_t p_rdc_handle,
|
||||
rdc_gpu_group_t group_id, char job_id[64], uint64_t update_freq,
|
||||
double max_keep_age, uint32_t max_keep_samples);
|
||||
rdc_gpu_group_t group_id, char job_id[64], uint64_t update_freq);
|
||||
|
||||
/**
|
||||
* @brief Get the stats of the job using the job id.
|
||||
@@ -415,6 +411,35 @@ rdc_status_t rdc_job_get_stats(rdc_handle_t p_rdc_handle, char job_id[64],
|
||||
rdc_status_t rdc_job_stop_stats(rdc_handle_t p_rdc_handle,
|
||||
char job_id[64]);
|
||||
|
||||
/**
|
||||
* @brief Request RDC to stop tracking the job given by job_id
|
||||
*
|
||||
* @details After this call, you will no longer be able to call
|
||||
* rdc_job_get_stats() on this job_id. But you will be able to reuse
|
||||
* the job_id after this call.
|
||||
*
|
||||
* @param[in] p_rdc_handle The RDC handler.
|
||||
*
|
||||
* @param[in] job_id The name of the job.
|
||||
*
|
||||
* @retval ::RDC_ST_OK is returned upon successful call.
|
||||
*/
|
||||
rdc_status_t rdc_job_remove(rdc_handle_t p_rdc_handle,
|
||||
char job_id[64]);
|
||||
|
||||
/**
|
||||
* @brief Request RDC to stop tracking all the jobs
|
||||
*
|
||||
* @details After this call, you will no longer be able to call
|
||||
* rdc_job_get_stats() on any job id. But you will be able to reuse
|
||||
* the any previous used job id after this call.
|
||||
*
|
||||
* @param[in] p_rdc_handle The RDC handler.
|
||||
*
|
||||
* @retval ::RDC_ST_OK is returned upon successful call.
|
||||
*/
|
||||
rdc_status_t rdc_job_remove_all(rdc_handle_t p_rdc_handle);
|
||||
|
||||
/**
|
||||
* @brief Request RDC to update all fields to be watched.
|
||||
*
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
@@ -31,6 +32,7 @@ THE SOFTWARE.
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
typedef std::map<uint32_t, uint64_t> rdc_gpu_total_memory_t;
|
||||
|
||||
class RdcCacheManager {
|
||||
public:
|
||||
@@ -43,7 +45,19 @@ class RdcCacheManager {
|
||||
const rdc_field_value& value) = 0;
|
||||
virtual rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id,
|
||||
uint64_t max_keep_samples, double max_keep_age) = 0;
|
||||
virtual uint32_t get_cache_size() = 0;
|
||||
virtual std::string get_cache_stats() = 0;
|
||||
|
||||
virtual rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
const rdc_gpu_total_memory_t& total_memory,
|
||||
rdc_job_info_t* p_job_info) = 0;
|
||||
virtual rdc_status_t rdc_job_start_stats(char jobId[64],
|
||||
const rdc_group_info_t& group,
|
||||
const rdc_field_group_info_t& finfo) = 0;
|
||||
virtual rdc_status_t rdc_job_stop_stats(char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_update_job_stats(uint32_t gpu_index,
|
||||
const std::string& job_id, const rdc_field_value& value) = 0;
|
||||
virtual rdc_status_t rdc_job_remove(char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_job_remove_all() = 0;
|
||||
|
||||
virtual ~RdcCacheManager() {}
|
||||
};
|
||||
|
||||
@@ -59,7 +59,7 @@ class RdcGroupSettings {
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<RdcGroupSettings> RdcGroupSettingsPtr;
|
||||
|
||||
const uint32_t JOB_FIELD_ID = 0;
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
@@ -33,12 +33,12 @@ class RdcHandler {
|
||||
public:
|
||||
// Job API
|
||||
virtual rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId,
|
||||
char job_id[64], uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) = 0;
|
||||
virtual rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
char job_id[64], uint64_t update_freq) = 0;
|
||||
virtual rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
rdc_job_info_t* p_job_info)= 0;
|
||||
virtual rdc_status_t rdc_job_stop_stats(char job_id[64]) = 0;
|
||||
|
||||
virtual rdc_status_t rdc_job_stop_stats(char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_job_remove(char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_job_remove_all() = 0;
|
||||
|
||||
// Discovery API
|
||||
virtual rdc_status_t rdc_device_get_all(
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCLOGGER_H_
|
||||
#define RDC_LIB_RDCLOGGER_H_
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <chrono> // NOLINT
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
class RdcLogger {
|
||||
public:
|
||||
explicit RdcLogger(std::ostream& os);
|
||||
|
||||
static RdcLogger& getLogger() {
|
||||
static RdcLogger logger(std::cout);
|
||||
return logger;
|
||||
}
|
||||
|
||||
bool should_log(uint32_t severity) {
|
||||
return log_level_ >= severity;
|
||||
}
|
||||
|
||||
std::ostream& get_ostream() {
|
||||
return os_;
|
||||
}
|
||||
|
||||
std::string get_log_header(uint32_t severity,
|
||||
const char* file, int line);
|
||||
|
||||
private:
|
||||
std::ostream& os_;
|
||||
uint32_t log_level_;
|
||||
};
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_RDCLOGGER_H_
|
||||
@@ -36,10 +36,10 @@ class RdcWatchTable {
|
||||
virtual rdc_status_t rdc_field_update_all() = 0;
|
||||
|
||||
virtual rdc_status_t rdc_job_start_stats(rdc_gpu_group_t group_id,
|
||||
char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_watch_job_fields(rdc_gpu_group_t group_id,
|
||||
uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) = 0;
|
||||
char job_id[64], uint64_t update_freq) = 0;
|
||||
virtual rdc_status_t rdc_job_stop_stats(char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_job_remove(char job_id[64]) = 0;
|
||||
virtual rdc_status_t rdc_job_remove_all() = 0;
|
||||
|
||||
virtual rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id, uint64_t update_freq,
|
||||
|
||||
@@ -23,7 +23,8 @@ THE SOFTWARE.
|
||||
#define RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <mutex> // NOLINT(build/c++11)
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "rdc_lib/RdcCacheManager.h"
|
||||
@@ -41,6 +42,29 @@ struct RdcCacheEntry {
|
||||
|
||||
typedef std::map<RdcFieldKey, std::vector<RdcCacheEntry>> RdcCacheSamples;
|
||||
|
||||
struct FieldSummaryStats {
|
||||
int64_t max_value;
|
||||
int64_t min_value;
|
||||
int64_t total_value;
|
||||
uint64_t last_time;
|
||||
uint64_t count;
|
||||
};
|
||||
|
||||
struct GpuSummaryStats {
|
||||
uint64_t energy_consumed;
|
||||
uint64_t energy_last_time;
|
||||
std::map<uint32_t, FieldSummaryStats> field_summaries;
|
||||
};
|
||||
|
||||
// Per job entry
|
||||
struct RdcJobStatsCacheEntry {
|
||||
uint64_t start_time;
|
||||
uint64_t end_time;
|
||||
std::map<uint32_t, GpuSummaryStats> gpu_stats;
|
||||
};
|
||||
|
||||
// <job_id, job_stats>
|
||||
typedef std::map<std::string, RdcJobStatsCacheEntry> RdcJobStatsCache;
|
||||
|
||||
class RdcCacheManagerImpl: public RdcCacheManager {
|
||||
public:
|
||||
@@ -53,10 +77,27 @@ class RdcCacheManagerImpl: public RdcCacheManager {
|
||||
const rdc_field_value& value) override;
|
||||
rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id,
|
||||
uint64_t max_keep_samples, double max_keep_age) override;
|
||||
uint32_t get_cache_size() override;
|
||||
std::string get_cache_stats() override;
|
||||
|
||||
rdc_status_t rdc_job_get_stats(char job_id[64],
|
||||
const rdc_gpu_total_memory_t& total_memory,
|
||||
rdc_job_info_t* p_job_info) override;
|
||||
rdc_status_t rdc_job_start_stats(char job_id[64],
|
||||
const rdc_group_info_t& group,
|
||||
const rdc_field_group_info_t& finfo) override;
|
||||
rdc_status_t rdc_job_stop_stats(char job_id[64]) override;
|
||||
rdc_status_t rdc_update_job_stats(uint32_t gpu_index,
|
||||
const std::string& job_id,
|
||||
const rdc_field_value& value) override;
|
||||
rdc_status_t rdc_job_remove(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove_all() override;
|
||||
|
||||
private:
|
||||
void set_summary(const FieldSummaryStats & stats,
|
||||
rdc_stats_summary_t& gpu, rdc_stats_summary_t& summary, // NOLINT
|
||||
unsigned int adjuster);
|
||||
RdcCacheSamples cache_samples_;
|
||||
RdcJobStatsCache cache_jobs_;
|
||||
std::mutex cache_mutex_;
|
||||
};
|
||||
|
||||
|
||||
@@ -36,11 +36,12 @@ class RdcEmbeddedHandler: public RdcHandler {
|
||||
public:
|
||||
// Job API
|
||||
rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId,
|
||||
char job_id[64], uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) override;
|
||||
rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
char job_id[64], uint64_t update_freq) override;
|
||||
rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
rdc_job_info_t* p_job_info) override;
|
||||
rdc_status_t rdc_job_stop_stats(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_stop_stats(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove_all() override;
|
||||
|
||||
// Discovery API
|
||||
rdc_status_t rdc_device_get_all(
|
||||
|
||||
@@ -61,8 +61,8 @@ class RdcGroupSettingsImpl: public RdcGroupSettings {
|
||||
private:
|
||||
std::map<rdc_gpu_group_t, rdc_group_info_t> gpu_group_;
|
||||
std::map<rdc_field_grp_t, rdc_field_group_info_t> field_group_;
|
||||
uint32_t cur_group_id_ = 0;
|
||||
uint32_t cur_filed_group_id_ = 0;
|
||||
uint32_t cur_group_id_ = 1;
|
||||
uint32_t cur_field_group_id_ = 0;
|
||||
std::mutex group_mutex_;
|
||||
std::mutex field_group_mutex_;
|
||||
};
|
||||
|
||||
@@ -33,11 +33,12 @@ class RdcStandaloneHandler: public RdcHandler {
|
||||
public:
|
||||
// Job RdcAPI
|
||||
rdc_status_t rdc_job_start_stats(rdc_gpu_group_t groupId,
|
||||
char job_id[64], uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) override;
|
||||
rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
char job_id[64], uint64_t update_freq) override;
|
||||
rdc_status_t rdc_job_get_stats(char jobId[64],
|
||||
rdc_job_info_t* p_job_info) override;
|
||||
rdc_status_t rdc_job_stop_stats(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_stop_stats(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove_all() override;
|
||||
|
||||
// Discovery RdcAPI
|
||||
rdc_status_t rdc_device_get_all(
|
||||
|
||||
@@ -22,11 +22,12 @@ THE SOFTWARE.
|
||||
#ifndef RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
#define RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <mutex> // NOLINT
|
||||
#include <atomic>
|
||||
#include "rdc_lib/RdcWatchTable.h"
|
||||
#include "rdc_lib/RdcGroupSettings.h"
|
||||
@@ -45,14 +46,18 @@ struct FieldSettings {
|
||||
uint64_t last_update_time;
|
||||
};
|
||||
|
||||
struct JobWatchTableEntry {
|
||||
uint32_t group_id;
|
||||
std::vector<RdcFieldKey> fields; //< store fields for faster query
|
||||
};
|
||||
|
||||
class RdcWatchTableImpl : public RdcWatchTable {
|
||||
public:
|
||||
rdc_status_t rdc_job_start_stats(rdc_gpu_group_t group_id,
|
||||
char job_id[64]) override;
|
||||
rdc_status_t rdc_watch_job_fields(rdc_gpu_group_t group_id,
|
||||
uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) override;
|
||||
char job_id[64], uint64_t update_freq) override;
|
||||
rdc_status_t rdc_job_stop_stats(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove(char job_id[64]) override;
|
||||
rdc_status_t rdc_job_remove_all() override;
|
||||
|
||||
rdc_status_t rdc_field_watch(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id, uint64_t update_freq,
|
||||
@@ -84,10 +89,16 @@ class RdcWatchTableImpl : public RdcWatchTable {
|
||||
//!< Helper function to clean up the watch table and cache
|
||||
void clean_up();
|
||||
|
||||
//!< Helper function for debug information in watch table and cache
|
||||
void debug_status();
|
||||
|
||||
//!< Helper function to get the fields using the group and the field group.
|
||||
rdc_status_t get_fields_from_group(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id, std::vector<RdcFieldKey> & fields);
|
||||
rdc_field_grp_t field_group_id,
|
||||
std::vector<RdcFieldKey> & fields); // NOLINT
|
||||
|
||||
bool is_job_watch_field(uint32_t gpu_index, uint32_t field_id,
|
||||
std::string& job_id) const; // NOLINT
|
||||
|
||||
RdcGroupSettingsPtr group_settings_;
|
||||
RdcCacheManagerPtr cache_mgr_;
|
||||
@@ -96,6 +107,10 @@ class RdcWatchTableImpl : public RdcWatchTable {
|
||||
//!< The watch table to store the watch settings.
|
||||
std::map<RdcFieldKey, FieldSettings> watch_table_;
|
||||
|
||||
//!< <job_id, gpu_group_id> pairs
|
||||
std::map<std::string, JobWatchTableEntry> job_watch_table_;
|
||||
|
||||
|
||||
//!< The settings for each field can be deduced from watch_table. But every
|
||||
//!< rdc_field_update_all() call needs to deduce them. To improve the
|
||||
//!< performance, the fields_to_watch_ is used to track the field settings.
|
||||
|
||||
@@ -24,12 +24,18 @@ THE SOFTWARE.
|
||||
#define RDC_LIB_RDC_COMMON_H_
|
||||
#include <iostream>
|
||||
|
||||
#define RDC_ERROR 0
|
||||
#define RDC_INFO 1
|
||||
#define RDC_DEBUG 2
|
||||
|
||||
#ifdef DEBUG
|
||||
#define LOG_DEBUG(message) std::cout << message << std::endl
|
||||
#else
|
||||
#define LOG_DEBUG(message)
|
||||
#endif
|
||||
#define RDC_LOG(debug_level, msg) do { \
|
||||
auto& logger = amd::rdc::RdcLogger::getLogger(); \
|
||||
if (logger.should_log((debug_level))) { \
|
||||
logger.get_ostream() << \
|
||||
logger.get_log_header((debug_level), __FILE__, __LINE__) << \
|
||||
msg << std::endl; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* @brief The strncpy but with null terminated
|
||||
|
||||
@@ -115,8 +115,10 @@ set(CMAKE_VERBOSE_MAKEFILE on)
|
||||
set(BOOTSTRAP_LIB "rdc_bootstrap")
|
||||
set(BOOTSTRAP_LIB_COMPONENT "lib${BOOTSTRAP_LIB}")
|
||||
set(BOOTSTRAP_LIB_SRC_LIST "${SRC_DIR}/bootstrap/src/RdcBootStrap.cc")
|
||||
set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLogger.cc")
|
||||
set(BOOTSTRAP_LIB_INC_LIST "${RDC_LIB_INC_DIR}/rdc/rdc.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/rdc_common.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLogger.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcHandler.h")
|
||||
|
||||
message("BOOTSTRAP_LIB_INC_LIST=${BOOTSTRAP_LIB_INC_LIST}")
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
#include <map>
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcHandler.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
static void* libHandler = nullptr;
|
||||
@@ -96,7 +97,7 @@ rdc_status_t rdc_start_embedded(rdc_operation_mode_t op_mode,
|
||||
|
||||
if (!libHandler) {
|
||||
error = dlerror();
|
||||
LOG_DEBUG("Fail to open librdc.so: " << error);
|
||||
RDC_LOG(RDC_ERROR, "Fail to open librdc.so: " << error);
|
||||
return RDC_ST_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
@@ -104,7 +105,8 @@ rdc_status_t rdc_start_embedded(rdc_operation_mode_t op_mode,
|
||||
dlsym(libHandler, "make_handler");
|
||||
if (!func_make_handler) {
|
||||
error = dlerror();
|
||||
LOG_DEBUG("Fail to find function make_handler:" << error);
|
||||
RDC_LOG(RDC_ERROR,
|
||||
"Fail to find function make_handler:" << error);
|
||||
return RDC_ST_FAIL_LOAD_MODULE;
|
||||
}
|
||||
|
||||
@@ -144,15 +146,32 @@ rdc_status_t rdc_job_get_stats(rdc_handle_t p_rdc_handle, char job_id[64] ,
|
||||
}
|
||||
|
||||
rdc_status_t rdc_job_start_stats(rdc_handle_t p_rdc_handle,
|
||||
rdc_gpu_group_t groupId, char job_id[64], uint64_t update_freq,
|
||||
double max_keep_age, uint32_t max_keep_samples ) {
|
||||
rdc_gpu_group_t groupId, char job_id[64],
|
||||
uint64_t update_freq) {
|
||||
if (!p_rdc_handle) {
|
||||
return RDC_ST_INVALID_HANDLER;
|
||||
}
|
||||
|
||||
return static_cast<amd::rdc::RdcHandler*>(p_rdc_handle)->
|
||||
rdc_job_start_stats(groupId, job_id, update_freq,
|
||||
max_keep_age, max_keep_samples);
|
||||
rdc_job_start_stats(groupId, job_id, update_freq);
|
||||
}
|
||||
|
||||
rdc_status_t rdc_job_remove(rdc_handle_t p_rdc_handle, char job_id[64]) {
|
||||
if (!p_rdc_handle) {
|
||||
return RDC_ST_INVALID_HANDLER;
|
||||
}
|
||||
|
||||
return static_cast<amd::rdc::RdcHandler*>(p_rdc_handle)->
|
||||
rdc_job_remove(job_id);
|
||||
}
|
||||
|
||||
rdc_status_t rdc_job_remove_all(rdc_handle_t p_rdc_handle) {
|
||||
if (!p_rdc_handle) {
|
||||
return RDC_ST_INVALID_HANDLER;
|
||||
}
|
||||
|
||||
return static_cast<amd::rdc::RdcHandler*>(p_rdc_handle)->
|
||||
rdc_job_remove_all();
|
||||
}
|
||||
|
||||
|
||||
@@ -344,6 +363,8 @@ const char* rdc_status_string(rdc_status_t result) {
|
||||
return "The max limit reached";
|
||||
case RDC_ST_CONFLICT:
|
||||
return "Conflict with current state";
|
||||
case RDC_ST_ALREADY_EXIST:
|
||||
return "The value already exists";
|
||||
case RDC_ST_CLIENT_ERROR:
|
||||
return "RDC Client error";
|
||||
default:
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <chrono> // NOLINT
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdcLogger::RdcLogger(std::ostream& os):
|
||||
os_(os) {
|
||||
char* verbose = getenv("RDC_LOG");
|
||||
if (verbose == nullptr) {
|
||||
log_level_ = RDC_ERROR;
|
||||
} else if (strcmp(verbose, "DEBUG") == 0) {
|
||||
log_level_ = RDC_DEBUG;
|
||||
} else if (strcmp(verbose, "INFO") == 0) {
|
||||
log_level_ = RDC_INFO;
|
||||
} else {
|
||||
log_level_ = RDC_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
std::string RdcLogger::get_log_header(uint32_t severity,
|
||||
const char* file, int line) {
|
||||
std::stringstream strstream;
|
||||
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>
|
||||
(std::chrono::system_clock::now().time_since_epoch()).count();
|
||||
strstream << std::fixed << std::setprecision(3) << (ms/1000.0) << " ";
|
||||
if (severity == RDC_DEBUG) {
|
||||
strstream << "DEBUG ";
|
||||
} else if (severity == RDC_INFO) {
|
||||
strstream << "INFO ";
|
||||
} else {
|
||||
strstream << "ERROR ";
|
||||
}
|
||||
|
||||
// extract out the file path as it may be very long.
|
||||
if (file != nullptr) {
|
||||
std::string file_str(file);
|
||||
auto found = file_str.find_last_of("/");
|
||||
if (found != std::string::npos) {
|
||||
file_str = file_str.substr(found+1);
|
||||
}
|
||||
strstream << file_str << "(" << line << "): ";
|
||||
}
|
||||
|
||||
return strstream.str();
|
||||
}
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
@@ -22,6 +22,8 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcCacheManagerImpl.h"
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <sstream>
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
|
||||
@@ -127,15 +129,26 @@ rdc_status_t RdcCacheManagerImpl::rdc_field_get_latest_value(
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
uint32_t RdcCacheManagerImpl::get_cache_size() {
|
||||
uint32_t cache_size = 0;
|
||||
std::string RdcCacheManagerImpl::get_cache_stats() {
|
||||
std::stringstream strstream;
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
|
||||
strstream << "Cache samples:";
|
||||
auto cache_samples_ite = cache_samples_.begin();
|
||||
for (; cache_samples_ite != cache_samples_.end(); cache_samples_ite++) {
|
||||
cache_size+=cache_samples_ite->second.size();
|
||||
strstream << "<" << cache_samples_ite->first.first << ","
|
||||
<< cache_samples_ite->first.second << ":"
|
||||
<< cache_samples_ite->second.size() << "> ";
|
||||
}
|
||||
return cache_size;
|
||||
|
||||
strstream <<" Job caches:";
|
||||
auto job_ite = cache_jobs_.begin();
|
||||
for ( ; job_ite != cache_jobs_.end(); job_ite++ ) {
|
||||
strstream << "<" << job_ite->first << ":"
|
||||
<< job_ite->second.gpu_stats.size() << "> ";
|
||||
}
|
||||
|
||||
return strstream.str();
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_update_cache(uint32_t gpu_index,
|
||||
@@ -162,5 +175,188 @@ rdc_status_t RdcCacheManagerImpl::rdc_update_cache(uint32_t gpu_index,
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_job_remove(char job_id[64]) {
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
cache_jobs_.erase(job_id);
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_job_remove_all() {
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
cache_jobs_.clear();
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_update_job_stats(uint32_t gpu_index,
|
||||
const std::string& job_id, const rdc_field_value& value) {
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
auto job_iter = cache_jobs_.find(job_id);
|
||||
if (job_iter == cache_jobs_.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
|
||||
auto gpu_iter = job_iter->second.gpu_stats.find(gpu_index);
|
||||
if (gpu_iter == job_iter->second.gpu_stats.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
|
||||
auto fsummary = gpu_iter->second.field_summaries.find(value.field_id);
|
||||
if (fsummary == gpu_iter->second.field_summaries.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
if (fsummary->second.count == 0) { // first item
|
||||
fsummary->second.count = 1;
|
||||
fsummary->second.max_value = value.value.l_int;
|
||||
fsummary->second.min_value = value.value.l_int;
|
||||
fsummary->second.total_value = value.value.l_int;
|
||||
fsummary->second.last_time = value.ts;
|
||||
if (value.field_id == RDC_FI_POWER_USAGE) {
|
||||
gpu_iter->second.energy_last_time = value.ts;
|
||||
}
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
if (value.field_id == RDC_FI_POWER_USAGE) {
|
||||
uint64_t time_elapsed = value.ts - gpu_iter->second.energy_last_time;
|
||||
// Stored in cache as microseconds and microwats
|
||||
gpu_iter->second.energy_consumed +=
|
||||
(time_elapsed * value.value.l_int)/(1000.0*1000000);
|
||||
}
|
||||
fsummary->second.max_value = std::max(fsummary->second.max_value,
|
||||
static_cast<int64_t>(value.value.l_int));
|
||||
fsummary->second.min_value = std::min(fsummary->second.min_value,
|
||||
static_cast<int64_t>(value.value.l_int));
|
||||
fsummary->second.total_value += value.value.l_int;
|
||||
fsummary->second.last_time = value.ts;
|
||||
fsummary->second.count++;
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
void RdcCacheManagerImpl::set_summary(const FieldSummaryStats & stats,
|
||||
rdc_stats_summary_t & gpu, rdc_stats_summary_t& summary,
|
||||
unsigned int adjuster) {
|
||||
if (stats.count == 0) return;
|
||||
|
||||
gpu.max_value = stats.max_value / adjuster;
|
||||
gpu.min_value = stats.min_value / adjuster;
|
||||
gpu.average = stats.total_value / stats.count / adjuster;
|
||||
summary.max_value = std::max(summary.max_value, gpu.max_value);
|
||||
summary.min_value = std::min(summary.min_value, gpu.min_value);
|
||||
//< save total for future average calculation.
|
||||
summary.average += gpu.average;
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_job_get_stats(char jobId[64],
|
||||
const rdc_gpu_total_memory_t& total_memory,
|
||||
rdc_job_info_t* p_job_info) {
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
auto job_stats = cache_jobs_.find(jobId);
|
||||
|
||||
if (job_stats == cache_jobs_.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
|
||||
//< Init the summary info
|
||||
RDC_LOG(RDC_DEBUG, "rdc_job_get_stats for job " <<jobId);
|
||||
auto& summary_info = p_job_info->summary;
|
||||
summary_info.start_time = job_stats->second.start_time;
|
||||
if (job_stats->second.end_time == 0) {
|
||||
summary_info.end_time = time(nullptr);
|
||||
} else {
|
||||
summary_info.end_time = job_stats->second.end_time;
|
||||
}
|
||||
summary_info.energy_consumed = 0;
|
||||
summary_info.max_gpu_memory_used = 0;
|
||||
summary_info.power_usage = {0, std::numeric_limits<uint64_t>::max(), 0};
|
||||
summary_info.gpu_clock = {0, std::numeric_limits<uint64_t>::max(), 0};
|
||||
summary_info.gpu_utilization = {0, std::numeric_limits<uint64_t>::max(), 0};
|
||||
summary_info.memory_utilization = {0,
|
||||
std::numeric_limits<uint64_t>::max(), 0};
|
||||
|
||||
p_job_info->num_gpus = job_stats->second.gpu_stats.size();
|
||||
|
||||
//< Populate information for each GPUs
|
||||
|
||||
auto gpus = job_stats->second.gpu_stats.begin();
|
||||
for (; gpus != job_stats->second.gpu_stats.end(); gpus++) {
|
||||
auto & gpu_info = p_job_info->gpus[gpus->first];
|
||||
gpu_info.start_time = summary_info.start_time;
|
||||
gpu_info.end_time = summary_info.end_time;
|
||||
gpu_info.energy_consumed = gpus->second.energy_consumed;
|
||||
summary_info.energy_consumed += gpu_info.energy_consumed;
|
||||
|
||||
auto ite = gpus->second.field_summaries.begin();
|
||||
for (; ite != gpus->second.field_summaries.end(); ite++) {
|
||||
if (ite->first == RDC_FI_POWER_USAGE) {
|
||||
set_summary(ite->second,
|
||||
gpu_info.power_usage, summary_info.power_usage, 1000000);
|
||||
} else if (ite->first == RDC_FI_GPU_MEMORY_USAGE) {
|
||||
auto tmemory = total_memory.at(gpus->first);
|
||||
set_summary(ite->second, gpu_info.memory_utilization,
|
||||
summary_info.memory_utilization, tmemory/100);
|
||||
gpu_info.max_gpu_memory_used = ite->second.max_value;
|
||||
summary_info.max_gpu_memory_used = std::max(
|
||||
summary_info.max_gpu_memory_used,
|
||||
gpu_info.max_gpu_memory_used);
|
||||
} else if (ite->first == RDC_FI_GPU_SM_CLOCK) {
|
||||
set_summary(ite->second, gpu_info.gpu_clock,
|
||||
summary_info.gpu_clock, 1000000);
|
||||
} else if (ite->first == RDC_FI_GPU_UTIL) {
|
||||
set_summary(ite->second, gpu_info.gpu_utilization,
|
||||
summary_info.gpu_utilization, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Get the average of the summary
|
||||
summary_info.power_usage.average = summary_info.power_usage.average/
|
||||
p_job_info->num_gpus;
|
||||
summary_info.gpu_clock.average = summary_info.gpu_clock.average/
|
||||
p_job_info->num_gpus;
|
||||
summary_info.gpu_utilization.average = summary_info.gpu_utilization.average/
|
||||
p_job_info->num_gpus;
|
||||
summary_info.memory_utilization.average =
|
||||
summary_info.memory_utilization.average/p_job_info->num_gpus;
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_job_start_stats(char job_id[64],
|
||||
const rdc_group_info_t& ginfo, const rdc_field_group_info_t& finfo) {
|
||||
RdcJobStatsCacheEntry cacheEntry;
|
||||
cacheEntry.start_time = std::time(nullptr);
|
||||
cacheEntry.end_time = 0;
|
||||
for (unsigned int i=0 ; i < ginfo.count; i++) { // GPUs
|
||||
GpuSummaryStats gstats;
|
||||
gstats.energy_consumed = 0;
|
||||
gstats.energy_last_time = 0;
|
||||
for (unsigned int j = 0; j < finfo.count; j++) { // init fields
|
||||
FieldSummaryStats s;
|
||||
s.count = 0;
|
||||
s.max_value = s.min_value = s.total_value = 0;
|
||||
gstats.field_summaries.insert({finfo.field_ids[j], s});
|
||||
}
|
||||
|
||||
cacheEntry.gpu_stats.insert({ginfo.entity_ids[i], gstats});
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
cache_jobs_.insert({job_id, cacheEntry});
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_job_stop_stats(char job_id[64]) {
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
auto job_stats = cache_jobs_.find(job_id);
|
||||
|
||||
if (job_stats == cache_jobs_.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
|
||||
job_stats->second.end_time = std::time(nullptr);
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
@@ -27,6 +27,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcCacheManagerImpl.h"
|
||||
#include "rdc_lib/impl/RdcWatchTableImpl.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
@@ -73,6 +74,7 @@ RdcEmbeddedHandler::RdcEmbeddedHandler(rdc_operation_mode_t mode):
|
||||
, metrics_updater_(new RdcMetricsUpdaterImpl(watch_table_,
|
||||
METIC_UPDATE_FREQUENCY)) {
|
||||
if (mode == RDC_OPERATION_MODE_AUTO) {
|
||||
RDC_LOG(RDC_DEBUG, "Run RDC with RDC_OPERATION_MODE_AUTO");
|
||||
metrics_updater_->start();
|
||||
}
|
||||
}
|
||||
@@ -83,32 +85,49 @@ RdcEmbeddedHandler::~RdcEmbeddedHandler() {
|
||||
|
||||
// JOB API
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_start_stats(rdc_gpu_group_t groupId,
|
||||
char job_id[64], uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(groupId);
|
||||
(void)(job_id);
|
||||
(void)(update_freq);
|
||||
(void)(max_keep_age);
|
||||
(void)(max_keep_samples);
|
||||
|
||||
return RDC_ST_OK;
|
||||
char job_id[64], uint64_t update_freq) {
|
||||
return watch_table_->rdc_job_start_stats(groupId, job_id, update_freq);
|
||||
}
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_get_stats(char job_id[64],
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_get_stats(char job_id[64],
|
||||
rdc_job_info_t* p_job_info) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(job_id);
|
||||
(void)(p_job_info);
|
||||
return RDC_ST_OK;
|
||||
uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES];
|
||||
uint32_t count = 0;
|
||||
rdc_status_t status = rdc_device_get_all(
|
||||
gpu_index_list, &count);
|
||||
if (status != RDC_ST_OK) {
|
||||
return status;
|
||||
}
|
||||
|
||||
rdc_gpu_total_memory_t all_total_memory;
|
||||
|
||||
for (uint32_t i = 0; i < count ; i++) {
|
||||
rdc_field_value total_memory;
|
||||
status = metric_fetcher_->fetch_smi_field(gpu_index_list[i],
|
||||
RDC_FI_GPU_MEMORY_TOTAL, &total_memory);
|
||||
if (status != RDC_ST_OK) {
|
||||
RDC_LOG(RDC_ERROR, "Fail to get total memory of GPU "
|
||||
<< gpu_index_list[i]);
|
||||
return status;
|
||||
}
|
||||
all_total_memory.insert({gpu_index_list[i], total_memory.value.l_int});
|
||||
}
|
||||
|
||||
return cache_mgr_->rdc_job_get_stats(job_id, all_total_memory, p_job_info);
|
||||
}
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_stop_stats(char job_id[64] ) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(job_id);
|
||||
return RDC_ST_OK;
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_stop_stats(char job_id[64]) {
|
||||
return watch_table_->rdc_job_stop_stats(job_id);
|
||||
}
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_remove(char job_id[64]) {
|
||||
return watch_table_->rdc_job_remove(job_id);
|
||||
}
|
||||
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_job_remove_all() {
|
||||
return watch_table_->rdc_job_remove_all();
|
||||
}
|
||||
|
||||
// Discovery API
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_device_get_all(
|
||||
@@ -194,6 +213,8 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id,
|
||||
}
|
||||
|
||||
if (!is_gpu_exist) {
|
||||
RDC_LOG(RDC_INFO, "Fail to add GPU index " << gpu_index << " to group "
|
||||
<< group_id <<" as the GPU index is invalid.");
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
|
||||
@@ -211,6 +232,9 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids,
|
||||
if (num_field_ids <= RDC_MAX_FIELD_IDS_PER_FIELD_GROUP) {
|
||||
for (uint32_t i = 0; i < num_field_ids; i++) {
|
||||
if (!metric_fetcher_->is_field_valid(field_ids[i])) {
|
||||
RDC_LOG(RDC_INFO,
|
||||
"Fail to create field group with unknown field id "
|
||||
<< field_ids[i]);
|
||||
return RDC_ST_NOT_SUPPORTED;
|
||||
}
|
||||
}
|
||||
@@ -285,6 +309,9 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value(
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
if (!metric_fetcher_->is_field_valid(field)) {
|
||||
RDC_LOG(RDC_INFO,
|
||||
"Fail to get latest value with unknown field id "
|
||||
<< field);
|
||||
return RDC_ST_NOT_SUPPORTED;
|
||||
}
|
||||
return cache_mgr_->rdc_field_get_latest_value(gpu_index, field, value);
|
||||
@@ -297,6 +324,9 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_get_value_since(uint32_t gpu_index,
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
if (!metric_fetcher_->is_field_valid(field)) {
|
||||
RDC_LOG(RDC_INFO,
|
||||
"Fail to get value since with unknown field id "
|
||||
<< field);
|
||||
return RDC_ST_NOT_SUPPORTED;
|
||||
}
|
||||
return cache_mgr_->rdc_field_get_value_since(gpu_index, field,
|
||||
|
||||
@@ -22,11 +22,20 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcGroupSettingsImpl.h"
|
||||
#include <ctime>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdcGroupSettingsImpl::RdcGroupSettingsImpl() {
|
||||
// Add the default job stats fields
|
||||
uint32_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE,
|
||||
RDC_FI_POWER_USAGE, RDC_FI_GPU_SM_CLOCK, RDC_FI_GPU_UTIL};
|
||||
char job_field_group[] = "JobStatsFields";
|
||||
rdc_field_grp_t fgid = JOB_FIELD_ID;
|
||||
|
||||
rdc_group_field_create(sizeof(job_fields)/sizeof(uint32_t),
|
||||
job_fields, job_field_group, &fgid);
|
||||
}
|
||||
|
||||
rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create(
|
||||
@@ -62,6 +71,8 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_add(
|
||||
// Check whether the index already exists
|
||||
for (uint32_t i=0; i < ite->second.count; i++) {
|
||||
if (ite->second.entity_ids[i] == gpu_index) {
|
||||
RDC_LOG(RDC_INFO, "Fail to add " << gpu_index
|
||||
<<" to GPU group " << groupId << " as it is already exists");
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
}
|
||||
@@ -136,15 +147,19 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create(
|
||||
if (field_group_.size() >= RDC_MAX_NUM_FIELD_GROUPS) {
|
||||
return RDC_ST_MAX_LIMIT;
|
||||
}
|
||||
field_group_.emplace(cur_filed_group_id_, finfo);
|
||||
*rdc_field_group_id = cur_filed_group_id_;
|
||||
cur_filed_group_id_++;
|
||||
field_group_.emplace(cur_field_group_id_, finfo);
|
||||
*rdc_field_group_id = cur_field_group_id_;
|
||||
cur_field_group_id_++;
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcGroupSettingsImpl::rdc_group_field_destroy(
|
||||
rdc_field_grp_t rdc_field_group_id) {
|
||||
if (rdc_field_group_id == JOB_FIELD_ID) {
|
||||
RDC_LOG(RDC_INFO, "Cannot delete system JOB_FIELD_ID field group");
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
std::lock_guard<std::mutex> guard(field_group_mutex_);
|
||||
field_group_.erase(rdc_field_group_id);
|
||||
return RDC_ST_OK;
|
||||
@@ -183,6 +198,10 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_field_get_all_ids(
|
||||
if (*count >= RDC_MAX_NUM_FIELD_GROUPS) {
|
||||
return RDC_ST_MAX_LIMIT;
|
||||
}
|
||||
|
||||
// Skip system defined JOB_FIELD_ID
|
||||
if (ite->first == JOB_FIELD_ID) continue;
|
||||
|
||||
field_group_id_list[*count] = ite->first;
|
||||
(*count)++;
|
||||
}
|
||||
|
||||
@@ -22,10 +22,11 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcMetricFetcherImpl.h"
|
||||
#include <sys/time.h>
|
||||
#include <string.h>
|
||||
#include <chrono>
|
||||
#include <chrono> //NOLINT
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
namespace amd {
|
||||
@@ -48,6 +49,8 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index,
|
||||
uint64_t i64 = 0;
|
||||
|
||||
if (!is_field_valid(field_id)) {
|
||||
RDC_LOG(RDC_ERROR, "Fail to fetch field " << field_id
|
||||
<< " which is not supported");
|
||||
return RDC_ST_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@@ -125,6 +128,27 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index,
|
||||
break;
|
||||
}
|
||||
|
||||
gettimeofday(&tv, NULL);
|
||||
int64_t latency = static_cast<uint64_t>(tv.tv_sec)*1000+tv.tv_usec/1000
|
||||
- value->ts;
|
||||
if (value->status != RSMI_STATUS_SUCCESS) {
|
||||
RDC_LOG(RDC_ERROR, "Fail to fetch " << gpu_index << ":" <<
|
||||
field_id_string(field_id) << " with rsmi error code "
|
||||
<< value->status <<", latency " << latency);
|
||||
} else if (value->type == INTEGER) {
|
||||
RDC_LOG(RDC_DEBUG, "Fetch " << gpu_index << ":" <<
|
||||
field_id_string(field_id) << ":" << value->value.l_int
|
||||
<< ", latency " << latency);
|
||||
} else if (value->type == DOUBLE) {
|
||||
RDC_LOG(RDC_DEBUG, "Fetch " << gpu_index << ":" <<
|
||||
field_id_string(field_id) << ":" << value->value.dbl
|
||||
<< ", latency " << latency);
|
||||
} else if (value->type == STRING) {
|
||||
RDC_LOG(RDC_DEBUG, "Fetch " << gpu_index << ":" <<
|
||||
field_id_string(field_id) << ":" << value->value.str
|
||||
<< ", latency " << latency);
|
||||
}
|
||||
|
||||
return value->status == RSMI_STATUS_SUCCESS ? RDC_ST_OK : RDC_ST_MSI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@@ -23,8 +23,10 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcWatchTableImpl.h"
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
@@ -39,24 +41,92 @@ RdcWatchTableImpl::RdcWatchTableImpl(const RdcGroupSettingsPtr& group_settings,
|
||||
}
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::rdc_job_start_stats(rdc_gpu_group_t group_id,
|
||||
char job_id[64]) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(group_id);
|
||||
(void)(job_id);
|
||||
return RDC_ST_OK;
|
||||
char job_id[64], uint64_t update_freq) {
|
||||
do { //< lock guard for thread safe
|
||||
std::lock_guard<std::mutex> guard(watch_mutex_);
|
||||
if (job_watch_table_.find(job_id) != job_watch_table_.end()) {
|
||||
return RDC_ST_ALREADY_EXIST;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
std::vector<RdcFieldKey> fields_in_watch;
|
||||
rdc_status_t result = get_fields_from_group(group_id,
|
||||
JOB_FIELD_ID, fields_in_watch);
|
||||
JobWatchTableEntry jentry {group_id, fields_in_watch};
|
||||
job_watch_table_.insert({job_id, jentry});
|
||||
|
||||
result = rdc_field_watch(group_id, JOB_FIELD_ID, update_freq, 0, 0);
|
||||
if (result != RDC_ST_OK) {
|
||||
return result;
|
||||
}
|
||||
|
||||
rdc_field_group_info_t finfo;
|
||||
rdc_group_info_t ginfo;
|
||||
result = group_settings_->rdc_group_gpu_get_info(group_id, &ginfo);
|
||||
if (result != RDC_ST_OK) {
|
||||
return result;
|
||||
}
|
||||
result = group_settings_->rdc_group_field_get_info(JOB_FIELD_ID, &finfo);
|
||||
if (result != RDC_ST_OK) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result = cache_mgr_->rdc_job_start_stats(job_id, ginfo, finfo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::rdc_watch_job_fields(rdc_gpu_group_t group_id,
|
||||
uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(group_id);
|
||||
(void)(update_freq);
|
||||
(void)(max_keep_age);
|
||||
(void)(max_keep_samples);
|
||||
return RDC_ST_OK;
|
||||
rdc_status_t RdcWatchTableImpl::rdc_job_stop_stats(char job_id[64]) {
|
||||
uint32_t job_group_id;
|
||||
do { //< lock guard for thread safe
|
||||
std::lock_guard<std::mutex> guard(watch_mutex_);
|
||||
auto job = job_watch_table_.find(job_id);
|
||||
if (job == job_watch_table_.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
job_group_id = job->second.group_id;
|
||||
} while (0);
|
||||
|
||||
rdc_status_t result = rdc_field_unwatch(job_group_id, JOB_FIELD_ID);
|
||||
if (result != RDC_ST_OK) {
|
||||
return result;
|
||||
}
|
||||
|
||||
do { //< lock guard for thread safe
|
||||
std::lock_guard<std::mutex> guard(watch_mutex_);
|
||||
job_watch_table_.erase(job_id);
|
||||
} while (0);
|
||||
|
||||
result = cache_mgr_->rdc_job_stop_stats(job_id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::rdc_job_remove(char job_id[64]) {
|
||||
rdc_job_stop_stats(job_id);
|
||||
return cache_mgr_->rdc_job_remove(job_id);
|
||||
}
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::rdc_job_remove_all() {
|
||||
// Get all the job ids;
|
||||
std::vector<std::string> v;
|
||||
do { //< lock guard for thread safe
|
||||
std::lock_guard<std::mutex> guard(watch_mutex_);
|
||||
for (auto ite = job_watch_table_.begin();
|
||||
ite != job_watch_table_.end(); ite++) {
|
||||
v.push_back(ite->first);
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// Stop them
|
||||
for (auto job = v.begin(); job != v.end(); job++) {
|
||||
rdc_job_stop_stats(const_cast<char*>(job->c_str()));
|
||||
}
|
||||
|
||||
return cache_mgr_->rdc_job_remove_all();
|
||||
}
|
||||
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::get_fields_from_group(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id, std::vector<RdcFieldKey> & fields) {
|
||||
rdc_field_group_info_t finfo;
|
||||
@@ -228,6 +298,21 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch(
|
||||
return update_field_in_table_when_unwatch(ite->first);
|
||||
}
|
||||
|
||||
bool RdcWatchTableImpl::is_job_watch_field(uint32_t gpu_index,
|
||||
uint32_t field_id, std::string& job_id) const {
|
||||
RdcFieldKey key{gpu_index, field_id};
|
||||
|
||||
for (auto ite = job_watch_table_.begin();
|
||||
ite != job_watch_table_.end(); ite++) {
|
||||
auto& fields = ite->second.fields;
|
||||
if (std::find(fields.begin(), fields.end(), key) != fields.end()) {
|
||||
job_id = ite->first;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::rdc_field_update_all() {
|
||||
uint32_t items_fetched = 0;
|
||||
@@ -251,13 +336,19 @@ rdc_status_t RdcWatchTableImpl::rdc_field_update_all() {
|
||||
result = metric_fetcher_->fetch_smi_field(
|
||||
fite->first.first, fite->first.second, &value);
|
||||
if (result != RDC_ST_OK) {
|
||||
LOG_DEBUG("Fail to fetch the field: " << rdc_status_string(result));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update the cache
|
||||
cache_mgr_->rdc_update_cache(fite->first.first, value);
|
||||
|
||||
// Update the job stats cache
|
||||
std::string job_id;
|
||||
if (is_job_watch_field(fite->first.first, fite->first.second, job_id)) {
|
||||
cache_mgr_->rdc_update_job_stats(fite->first.first, job_id, value);
|
||||
}
|
||||
|
||||
|
||||
// Update the last_upate_time
|
||||
gettimeofday(&tv, NULL);
|
||||
now = static_cast<uint64_t>(tv.tv_sec)*1000+tv.tv_usec/1000;
|
||||
@@ -303,6 +394,56 @@ void RdcWatchTableImpl::clean_up() {
|
||||
++wite;
|
||||
}
|
||||
}
|
||||
|
||||
// Debug log every 30 seconds
|
||||
if (now/1000%30 == 0) {
|
||||
debug_status();
|
||||
}
|
||||
}
|
||||
|
||||
void RdcWatchTableImpl::debug_status() {
|
||||
RDC_LOG(RDC_DEBUG, "fields_to_watch_:" << fields_to_watch_.size()
|
||||
<< " watch_table_:" << watch_table_.size()
|
||||
<< " job_watch_table_:" << job_watch_table_.size()
|
||||
<< " cache stats:" << cache_mgr_->get_cache_stats());
|
||||
|
||||
if (watch_table_.size() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "watch table details:");
|
||||
}
|
||||
for (auto wite = watch_table_.begin(); wite != watch_table_.end(); wite++) {
|
||||
RDC_LOG(RDC_DEBUG, wite->first.first << "," << wite->first.second
|
||||
<< ": age:" << wite->second.max_keep_age << ", samples:"
|
||||
<< wite->second.max_keep_samples << ", is_watching:"
|
||||
<< wite->second.is_watching << ", last_update_time:"
|
||||
<< wite->second.last_update_time <<", update_freq:"
|
||||
<< wite->second.update_freq);
|
||||
}
|
||||
|
||||
if (job_watch_table_.size() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "job watch table details: ");
|
||||
}
|
||||
for (auto jite = job_watch_table_.begin();
|
||||
jite !=job_watch_table_.end(); jite++) {
|
||||
std::stringstream strstream;
|
||||
for (const auto& p : jite->second.fields) {
|
||||
strstream << "<" << p.first << "," << p.second << "> ";
|
||||
}
|
||||
RDC_LOG(RDC_DEBUG, jite->first << ": " << jite->second.group_id
|
||||
<< " fields : "<< strstream.str());
|
||||
}
|
||||
|
||||
if (fields_to_watch_.size() > 0) {
|
||||
RDC_LOG(RDC_DEBUG, "fields to watch details:");
|
||||
}
|
||||
for (auto fite = fields_to_watch_.begin(); fite != fields_to_watch_.end();
|
||||
fite++) {
|
||||
RDC_LOG(RDC_DEBUG, fite->first.first << "," << fite->first.second
|
||||
<< ": age:" << fite->second.max_keep_age << ", samples:"
|
||||
<< fite->second.max_keep_samples << ", is_watching:"
|
||||
<< fite->second.is_watching << ", last_update_time:"
|
||||
<< fite->second.last_update_time <<", update_freq:"
|
||||
<< fite->second.update_freq);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
|
||||
@@ -62,19 +62,16 @@ rdc_status_t RdcStandaloneHandler::error_handle(::grpc::Status status,
|
||||
|
||||
// JOB RdcAPI
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_start_stats(rdc_gpu_group_t groupId,
|
||||
char job_id[64], uint64_t update_freq, double max_keep_age,
|
||||
uint32_t max_keep_samples) {
|
||||
char job_id[64], uint64_t update_freq) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(groupId);
|
||||
(void)(job_id);
|
||||
(void)(update_freq);
|
||||
(void)(max_keep_age);
|
||||
(void)(max_keep_samples);
|
||||
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_get_stats(char job_id[64],
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_get_stats(char job_id[64],
|
||||
rdc_job_info_t* p_job_info) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(job_id);
|
||||
@@ -82,13 +79,25 @@ rdc_status_t RdcStandaloneHandler::rdc_job_get_stats(char job_id[64],
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_stop_stats(char job_id[64] ) {
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_stop_stats(char job_id[64]) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(job_id);
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_remove(char job_id[64]) {
|
||||
// TODO(bill_liu): implement
|
||||
(void)(job_id);
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_job_remove_all() {
|
||||
// TODO(bill_liu): implement
|
||||
return RDC_ST_OK;
|
||||
}
|
||||
|
||||
|
||||
// Discovery RdcAPI
|
||||
rdc_status_t RdcStandaloneHandler::rdc_device_get_all(
|
||||
uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES], uint32_t* count) {
|
||||
|
||||
@@ -58,6 +58,7 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
const struct option long_options[] = {
|
||||
{"host", required_argument, nullptr, HOST_OPTIONS },
|
||||
{"help", optional_argument, nullptr, 'h' },
|
||||
{"unauth", optional_argument, nullptr, 'u' },
|
||||
{"list", optional_argument, nullptr, 'l' },
|
||||
{"field-group-id", required_argument, nullptr, 'f' },
|
||||
{"field-id", required_argument, nullptr, 'e' },
|
||||
@@ -73,7 +74,7 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
std::string gpu_indexes;
|
||||
std::string field_ids;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hlf:g:c:d:e:i:",
|
||||
while ((opt = getopt_long(argc, argv, "hluf:g:c:d:e:i:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
@@ -82,9 +83,12 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
case 'h':
|
||||
dmon_ops_ = DMON_HELP;
|
||||
return;
|
||||
case 'u':
|
||||
use_auth_ = false;
|
||||
break;
|
||||
case 'l':
|
||||
dmon_ops_ = DMON_LIST_FIELDS;
|
||||
return;
|
||||
break;
|
||||
case 'f':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
@@ -130,6 +134,10 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
|
||||
if (dmon_ops_ == DMON_LIST_FIELDS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (options_.find(OPTIONS_FIELD_GROUP_ID) == options_.end()) {
|
||||
if (field_ids == "") {
|
||||
show_help();
|
||||
|
||||
@@ -40,6 +40,7 @@ void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
const struct option long_options[] = {
|
||||
{"host", required_argument, nullptr, HOST_OPTIONS },
|
||||
{"help", optional_argument, nullptr, 'h' },
|
||||
{"unauth", optional_argument, nullptr, 'u' },
|
||||
{"list", optional_argument, nullptr, 'l' },
|
||||
{"group", required_argument, nullptr, 'g'},
|
||||
{"create", required_argument, nullptr, 'c' },
|
||||
@@ -52,7 +53,7 @@ void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
int option_index = 0;
|
||||
int opt = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hlif:c:g:d:",
|
||||
while ((opt = getopt_long(argc, argv, "hluif:c:g:d:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
@@ -61,6 +62,9 @@ void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
case 'h':
|
||||
field_group_ops_ = FIELD_GROUP_HELP;
|
||||
return;
|
||||
case 'u':
|
||||
use_auth_ = false;
|
||||
break;
|
||||
case 'l':
|
||||
field_group_ops_ = FIELD_GROUP_LIST;
|
||||
break;
|
||||
|
||||
@@ -40,6 +40,7 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
const struct option long_options[] = {
|
||||
{"host", required_argument, nullptr, HOST_OPTIONS },
|
||||
{"help", optional_argument, nullptr, 'h' },
|
||||
{"unauth", optional_argument, nullptr, 'u' },
|
||||
{"list", optional_argument, nullptr, 'l' },
|
||||
{"group", required_argument, nullptr, 'g'},
|
||||
{"create", required_argument, nullptr, 'c' },
|
||||
@@ -52,7 +53,7 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
int option_index = 0;
|
||||
int opt = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hlic:g:a:d:",
|
||||
while ((opt = getopt_long(argc, argv, "hluic:g:a:d:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
@@ -61,6 +62,9 @@ void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
case 'h':
|
||||
group_ops_ = GROUP_HELP;
|
||||
return;
|
||||
case 'u':
|
||||
use_auth_ = false;
|
||||
break;
|
||||
case 'l':
|
||||
group_ops_ = GROUP_LIST;
|
||||
break;
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user