Implement the rdci subsystem: group, fieldgroup and dmon
Add the support for rdci subsystem group create, delete and query
Add the support for rdci subsystem fieldgroup create, delete and query
Add the support for rdci dmon system. The dmon system may show the stats every
a few seconds until press Ctrl-C. To cleanup the resources (for example, unwatch),
a signal handler is added.
Change-Id: Ib22a8a43b7083c7c72819ca21145e22702d9ad6c
[ROCm/rdc commit: 16bce67835]
このコミットが含まれているのは:
@@ -2,15 +2,17 @@
|
||||
Radeon Data Center
|
||||
|
||||
## To run the rdcd and rdci from the build folder without authentication
|
||||
Note: Only if RDC not installed requires the LD_LIBRARY_PATH.
|
||||
```
|
||||
sudo LD_LIBRARY_PATH=$PWD/rdc_libs/ ./server/rdcd -u
|
||||
LD_LIBRARY_PATH=$PWD/rdc_libs/ ./rdci/rdci discovery -u
|
||||
LD_LIBRARY_PATH=$PWD/rdc_libs/ ./rdci/rdci discovery -u -l
|
||||
```
|
||||
|
||||
## To run the rdcd and rdci from the build folder with authentication
|
||||
Note: Only if RDC not installed requires the LD_LIBRARY_PATH.
|
||||
```
|
||||
sudo LD_LIBRARY_PATH=$PWD/rdc_libs/ ./server/rdcd
|
||||
LD_LIBRARY_PATH=$PWD/rdc_libs/ ./rdci/rdci discovery
|
||||
LD_LIBRARY_PATH=$PWD/rdc_libs/ ./rdci/rdci discovery -l
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
@@ -70,6 +70,9 @@ include_directories(${INC_DIR} ${PROJECT_SOURCE_DIR}/include
|
||||
set(RDCI_SRC_LIST "${SRC_DIR}/rdci.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciDiscoverySubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciGroupSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciFieldGroupSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciDmonSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${PROJECT_SOURCE_DIR}/common/rdc_utils.cc")
|
||||
message("RDCI_SRC_LIST=${RDCI_SRC_LIST}")
|
||||
set(RDCI_EXE "rdci")
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDCI_INCLUDE_RDCIDMONSUBSYSTEM_H_
|
||||
#define RDCI_INCLUDE_RDCIDMONSUBSYSTEM_H_
|
||||
#include <signal.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "RdciSubSystem.h"
|
||||
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdciDmonSubSystem: public RdciSubSystem {
|
||||
public:
|
||||
RdciDmonSubSystem();
|
||||
~RdciDmonSubSystem();
|
||||
void parse_cmd_opts(int argc, char ** argv) override;
|
||||
void process() override;
|
||||
|
||||
private:
|
||||
void show_help() const;
|
||||
void show_field_usage() const;
|
||||
void clean_up();
|
||||
|
||||
void create_temp_group();
|
||||
void create_temp_field_group();
|
||||
|
||||
enum OPERATIONS {
|
||||
DMON_UNKNOWN = 0,
|
||||
DMON_HELP,
|
||||
DMON_LIST_FIELDS,
|
||||
DMON_MONITOR
|
||||
} dmon_ops_;
|
||||
|
||||
enum OPTIONS {
|
||||
OPTIONS_UNKNOWN = 0,
|
||||
OPTIONS_COUNT,
|
||||
OPTIONS_DELAY,
|
||||
OPTIONS_FIELD_GROUP_ID,
|
||||
OPTIONS_GROUP_ID
|
||||
};
|
||||
|
||||
std::map<OPTIONS, uint32_t> options_;
|
||||
std::vector<uint32_t> field_ids_;
|
||||
std::vector<uint32_t> gpu_indexes_;
|
||||
bool need_cleanup_;
|
||||
|
||||
static volatile sig_atomic_t is_terminating_;
|
||||
static void set_terminating(int sig);
|
||||
};
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDCI_INCLUDE_RDCIDMONSUBSYSTEM_H_
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDCI_INCLUDE_RDCIFIELDGROUPSUBSYSTEM_H_
|
||||
#define RDCI_INCLUDE_RDCIFIELDGROUPSUBSYSTEM_H_
|
||||
|
||||
#include <string>
|
||||
#include "RdciSubSystem.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdciFieldGroupSubSystem: public RdciSubSystem {
|
||||
public:
|
||||
RdciFieldGroupSubSystem();
|
||||
void parse_cmd_opts(int argc, char ** argv) override;
|
||||
void process() override;
|
||||
private:
|
||||
void show_help() const;
|
||||
|
||||
enum OPERATIONS {
|
||||
FIELD_GROUP_UNKNOWN = 0,
|
||||
FIELD_GROUP_HELP,
|
||||
FIELD_GROUP_CREATE,
|
||||
FIELD_GROUP_DELETE,
|
||||
FIELD_GROUP_LIST,
|
||||
FIELD_GROUP_INFO
|
||||
} field_group_ops_;
|
||||
|
||||
bool is_group_set_;
|
||||
uint32_t group_id_;
|
||||
std::string group_name_;
|
||||
std::string field_ids_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDCI_INCLUDE_RDCIFIELDGROUPSUBSYSTEM_H_
|
||||
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDCI_INCLUDE_RDCIGROUPSUBSYSTEM_H_
|
||||
#define RDCI_INCLUDE_RDCIGROUPSUBSYSTEM_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "RdciSubSystem.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
class RdciGroupSubSystem: public RdciSubSystem {
|
||||
public:
|
||||
RdciGroupSubSystem();
|
||||
void parse_cmd_opts(int argc, char ** argv) override;
|
||||
void process() override;
|
||||
|
||||
private:
|
||||
void show_help() const;
|
||||
|
||||
enum OPERATIONS {
|
||||
GROUP_UNKNOWN = 0,
|
||||
GROUP_HELP,
|
||||
GROUP_CREATE,
|
||||
GROUP_DELETE,
|
||||
GROUP_LIST,
|
||||
GROUP_ADD_GPUS,
|
||||
GROUP_INFO
|
||||
} group_ops_;
|
||||
|
||||
bool is_group_set_;
|
||||
uint32_t group_id_;
|
||||
std::string group_name_;
|
||||
std::string gpu_ids_;
|
||||
};
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDCI_INCLUDE_RDCIGROUPSUBSYSTEM_H_
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc/rdc.h"
|
||||
|
||||
@@ -39,6 +40,8 @@ class RdciSubSystem {
|
||||
virtual void process() = 0;
|
||||
virtual ~RdciSubSystem();
|
||||
protected:
|
||||
std::vector<std::string> split_string(const std::string& s,
|
||||
char delimiter) const;
|
||||
void show_common_usage() const;
|
||||
rdc_handle_t rdc_handle_;
|
||||
std::string ip_port_;
|
||||
|
||||
@@ -0,0 +1,395 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "RdciDmonSubSystem.h"
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <limits>
|
||||
#include <iomanip>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "common/rdc_utils.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
// When ctrl-C the program, the SIGINT handler will set the is_terminating
|
||||
// to notify the program to clean up the resources created by the subsystem.
|
||||
volatile sig_atomic_t RdciDmonSubSystem::is_terminating_ = 0;
|
||||
|
||||
RdciDmonSubSystem::RdciDmonSubSystem():
|
||||
dmon_ops_(DMON_MONITOR)
|
||||
, need_cleanup_(false) {
|
||||
signal(SIGINT, set_terminating);
|
||||
}
|
||||
|
||||
RdciDmonSubSystem::~RdciDmonSubSystem() {
|
||||
clean_up();
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::set_terminating(int sig) {
|
||||
if (sig == SIGINT) {
|
||||
is_terminating_ = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
const int HOST_OPTIONS = 1000;
|
||||
const struct option long_options[] = {
|
||||
{"host", required_argument, nullptr, HOST_OPTIONS },
|
||||
{"help", optional_argument, nullptr, 'h' },
|
||||
{"list", optional_argument, nullptr, 'l' },
|
||||
{"field-group-id", required_argument, nullptr, 'f' },
|
||||
{"field-id", required_argument, nullptr, 'e' },
|
||||
{"gpu_index", required_argument, nullptr, 'i'},
|
||||
{"group-id", required_argument, nullptr, 'g' },
|
||||
{"count", required_argument, nullptr, 'c'},
|
||||
{"delay", required_argument, nullptr, 'd'},
|
||||
{ nullptr, 0 , nullptr, 0 }
|
||||
};
|
||||
|
||||
int option_index = 0;
|
||||
int opt = 0;
|
||||
std::string gpu_indexes;
|
||||
std::string field_ids;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hlf:g:c:d:e:i:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
ip_port_ = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
dmon_ops_ = DMON_HELP;
|
||||
return;
|
||||
case 'l':
|
||||
dmon_ops_ = DMON_LIST_FIELDS;
|
||||
return;
|
||||
case 'f':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The field group id needs to be a number");
|
||||
}
|
||||
options_.insert({OPTIONS_FIELD_GROUP_ID, std::stoi(optarg)});
|
||||
break;
|
||||
case 'e':
|
||||
field_ids = optarg;
|
||||
break;
|
||||
case 'i':
|
||||
gpu_indexes = optarg;
|
||||
break;
|
||||
case 'g':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The group id needs to be a number");
|
||||
}
|
||||
options_.insert({OPTIONS_GROUP_ID, std::stoi(optarg)});
|
||||
break;
|
||||
case 'c':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The count needs to be a number");
|
||||
}
|
||||
options_.insert({OPTIONS_COUNT, std::stoi(optarg)});
|
||||
break;
|
||||
case 'd':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The delay needs to be a number");
|
||||
}
|
||||
options_.insert({OPTIONS_DELAY, std::stoi(optarg)});
|
||||
break;
|
||||
default:
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Unknown command line options");
|
||||
}
|
||||
}
|
||||
|
||||
if (options_.find(OPTIONS_FIELD_GROUP_ID) == options_.end()) {
|
||||
if (field_ids == "") {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the fields or field group id");
|
||||
} else {
|
||||
std::vector<std::string> vec_ids = split_string(field_ids, ',');
|
||||
for (uint32_t i = 0; i < vec_ids.size(); i++) {
|
||||
if (!IsNumber(vec_ids[i])) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER, "The field Id "
|
||||
+vec_ids[i]+" needs to be a number");
|
||||
}
|
||||
field_ids_.push_back(std::stoi(vec_ids[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (options_.find(OPTIONS_GROUP_ID) == options_.end()) {
|
||||
if (gpu_indexes == "") {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the GPUs or group id");
|
||||
} else {
|
||||
std::vector<std::string> vec_ids = split_string(gpu_indexes, ',');
|
||||
for (uint32_t i = 0; i < vec_ids.size(); i++) {
|
||||
if (!IsNumber(vec_ids[i])) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The GPU index "+vec_ids[i]+" needs to be a number");
|
||||
}
|
||||
gpu_indexes_.push_back(std::stoi(vec_ids[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Group and GPU index cannot co-exist
|
||||
if (gpu_indexes != "" &&
|
||||
options_.find(OPTIONS_GROUP_ID) != options_.end()) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Use either the group or GPU indexes");
|
||||
}
|
||||
|
||||
// Field group and field Ids cannot co-exist
|
||||
if (field_ids != "" &&
|
||||
options_.find(OPTIONS_FIELD_GROUP_ID) != options_.end()) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Use either the field group or field IDs");
|
||||
}
|
||||
|
||||
// Set default delay to 1 second
|
||||
if (options_.find(OPTIONS_DELAY) == options_.end()) {
|
||||
options_.insert({OPTIONS_DELAY, 1000});
|
||||
}
|
||||
|
||||
// Set default count to max integer
|
||||
if (options_.find(OPTIONS_COUNT) == options_.end()) {
|
||||
options_.insert({OPTIONS_COUNT, std::numeric_limits<uint32_t>::max()});
|
||||
}
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::show_help() const {
|
||||
std::cout << " dmon -- Used to monitor GPUs and their stats.\n\n";
|
||||
std::cout << "Usage\n";
|
||||
std::cout << " rdci dmon [--host <IP/FQDN>:port] [-u] -f <fieldGroupId>"
|
||||
<< " -g <groupId>\n";
|
||||
std::cout << " [-d <delay>] [-c <count>]\n";
|
||||
std::cout << " rdci dmon [--host <IP/FQDN>:port] [-u] -e <fieldIds>"
|
||||
<< " -i <gpuIndexes>\n";
|
||||
std::cout << " [-d <delay>] [-c <count>]\n";
|
||||
std::cout << " rdci dmon [--host <IP/FQDN>:port] [-u] -l \n";
|
||||
std::cout << "\nFlags:\n";
|
||||
show_common_usage();
|
||||
std::cout << " -f --field-group-id The field group "
|
||||
<< "to query on the specified host.\n";
|
||||
std::cout << " -g --group-id The GPU group to query "
|
||||
<< "on the specified host.\n";
|
||||
std::cout << " -c --count count Integer representing How"
|
||||
<< " many times to loop before exiting. [default = runs forever.]\n";
|
||||
std::cout << " -e --field-id fieldIds Comma-separated list "
|
||||
<< "of the field ids to monitor.\n";
|
||||
std::cout << " -i --gpu_index gpuIndexes Comma-separated list "
|
||||
<< "of the GPU index to monitor.\n";
|
||||
std::cout << " -d --delay delay How often to query RDC "
|
||||
<< "in milli seconds. [default = 1000 msec, "
|
||||
<< "Minimum value = 100 msec.]\n";
|
||||
std::cout << " -l --list List to look up the long "
|
||||
<< "names and descriptions of the field ids\n";
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::create_temp_group() {
|
||||
if (gpu_indexes_.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string group_name("rdci-dmon-group");
|
||||
rdc_gpu_group_t group_id;
|
||||
rdc_status_t result = rdc_group_gpu_create(rdc_handle_,
|
||||
RDC_GROUP_EMPTY, group_name.c_str(), &group_id);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, "Fail to create the dmon group");
|
||||
}
|
||||
need_cleanup_ = true;
|
||||
|
||||
for (uint32_t i = 0; i < gpu_indexes_.size() ; i++) {
|
||||
result = rdc_group_gpu_add(rdc_handle_, group_id, gpu_indexes_[i]);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, "Fail to add " +
|
||||
std::to_string(gpu_indexes_[i])+" to the dmon group.");
|
||||
}
|
||||
}
|
||||
options_.insert({OPTIONS_GROUP_ID, group_id});
|
||||
}
|
||||
|
||||
|
||||
void RdciDmonSubSystem::create_temp_field_group() {
|
||||
if (field_ids_.size() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::string field_group_name("rdci-dmon-field-group");
|
||||
rdc_field_grp_t group_id;
|
||||
uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
for (uint32_t i = 0; i < field_ids_.size(); i++) {
|
||||
field_ids[i] = field_ids_[i];
|
||||
}
|
||||
|
||||
rdc_status_t result = rdc_group_field_create(rdc_handle_,
|
||||
field_ids_.size(), &field_ids[0], field_group_name.c_str(), &group_id);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, "Fail to create the dmon field group.");
|
||||
}
|
||||
|
||||
need_cleanup_ = true;
|
||||
options_.insert({OPTIONS_FIELD_GROUP_ID, group_id});
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::show_field_usage() const {
|
||||
std::cout << "Supported fields Ids:\n";
|
||||
std::cout << "100 RDC_FI_GPU_SM_CLOCK: Current GPU clock frequencies.\n";
|
||||
std::cout << "150 RDC_FI_GPU_TEMP: GPU "
|
||||
<< "temperature in millidegrees Celcius.\n";
|
||||
std::cout << "155 RDC_FI_POWER_USAGE: Power usage in microwatts.\n";
|
||||
std::cout << "203 RDC_FI_GPU_UTIL: GPU busy percentage.\n";
|
||||
std::cout << "525 RDC_FI_GPU_MEMORY_USAGE: Memory usage of the GPU "
|
||||
<< "instance in bytes.\n";
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::process() {
|
||||
if (dmon_ops_ == DMON_HELP ||
|
||||
dmon_ops_ == DMON_UNKNOWN) {
|
||||
show_help();
|
||||
return;
|
||||
}
|
||||
|
||||
if (dmon_ops_ == DMON_LIST_FIELDS) {
|
||||
show_field_usage();
|
||||
return;
|
||||
}
|
||||
|
||||
rdc_status_t result;
|
||||
rdc_group_info_t group_info;
|
||||
rdc_field_group_info_t field_info;
|
||||
|
||||
// Create a temporary group/field if pass as GPU indexes or field ids
|
||||
create_temp_group();
|
||||
create_temp_field_group();
|
||||
|
||||
result = rdc_group_gpu_get_info(rdc_handle_,
|
||||
options_[OPTIONS_GROUP_ID], &group_info);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, rdc_status_string(result));
|
||||
}
|
||||
result = rdc_group_field_get_info(rdc_handle_,
|
||||
options_[OPTIONS_FIELD_GROUP_ID], &field_info);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, rdc_status_string(result));
|
||||
}
|
||||
|
||||
// keep extra 1 minute data
|
||||
double max_keep_age = options_[OPTIONS_DELAY]/1000.0 + 60;
|
||||
const int max_keep_samples = 10; // keep only 10 samples
|
||||
result = rdc_field_watch(rdc_handle_,
|
||||
options_[OPTIONS_GROUP_ID], options_[OPTIONS_FIELD_GROUP_ID],
|
||||
options_[OPTIONS_DELAY]*1000, max_keep_age, max_keep_samples);
|
||||
need_cleanup_ = true;
|
||||
std::cout << "GPU\t";
|
||||
for (uint32_t findex = 0; findex < field_info.count; findex++) {
|
||||
std::cout << std::left << std::setw(20)
|
||||
<< field_id_string(field_info.field_ids[findex]);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
for (uint32_t i = 0; i < options_[OPTIONS_COUNT]; i++) {
|
||||
usleep(options_[OPTIONS_DELAY]*1000);
|
||||
for (uint32_t gindex = 0; gindex < group_info.count; gindex++) {
|
||||
std::cout << group_info.entity_ids[gindex] << "\t";
|
||||
for (uint32_t findex = 0; findex < field_info.count; findex++) {
|
||||
rdc_field_value value;
|
||||
result = rdc_field_get_latest_value(rdc_handle_,
|
||||
group_info.entity_ids[gindex],
|
||||
field_info.field_ids[findex], &value);
|
||||
if (result != RDC_ST_OK) {
|
||||
std::cout << std::left << std::setw(20) << "error";
|
||||
} else {
|
||||
if (value.type == INTEGER) {
|
||||
std::cout << std::left << std::setw(20)
|
||||
<< value.value.l_int;
|
||||
} else if (value.type == DOUBLE) {
|
||||
std::cout << std::left << std::setw(20)
|
||||
<< value.value.dbl;
|
||||
} else {
|
||||
std::cout << std::left << std::setw(20)
|
||||
<< value.value.str;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_terminating_) {
|
||||
clean_up();
|
||||
return;
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
clean_up();
|
||||
}
|
||||
|
||||
|
||||
void RdciDmonSubSystem::clean_up() {
|
||||
if (!need_cleanup_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Not throw the errors in order to clean up all resources created
|
||||
if (options_.find(OPTIONS_GROUP_ID) != options_.end() &&
|
||||
options_.find(OPTIONS_FIELD_GROUP_ID) != options_.end()) {
|
||||
rdc_field_unwatch(rdc_handle_, options_[OPTIONS_GROUP_ID],
|
||||
options_[OPTIONS_FIELD_GROUP_ID]);
|
||||
}
|
||||
|
||||
if (gpu_indexes_.size() != 0) {
|
||||
auto group = options_.find(OPTIONS_GROUP_ID);
|
||||
if (group != options_.end()) {
|
||||
rdc_group_gpu_destroy(rdc_handle_, group->second);
|
||||
}
|
||||
}
|
||||
|
||||
if (field_ids_.size() != 0) {
|
||||
auto fgroup = options_.find(OPTIONS_FIELD_GROUP_ID);
|
||||
if (fgroup != options_.end()) {
|
||||
rdc_group_field_destroy(rdc_handle_, fgroup->second);
|
||||
}
|
||||
}
|
||||
|
||||
need_cleanup_ = false;
|
||||
}
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "RdciFieldGroupSubSystem.h"
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "common/rdc_utils.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdciFieldGroupSubSystem::RdciFieldGroupSubSystem():
|
||||
field_group_ops_(FIELD_GROUP_UNKNOWN)
|
||||
, is_group_set_(false) {
|
||||
}
|
||||
|
||||
void RdciFieldGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
const int HOST_OPTIONS = 1000;
|
||||
const struct option long_options[] = {
|
||||
{"host", required_argument, nullptr, HOST_OPTIONS },
|
||||
{"help", optional_argument, nullptr, 'h' },
|
||||
{"list", optional_argument, nullptr, 'l' },
|
||||
{"group", required_argument, nullptr, 'g'},
|
||||
{"create", required_argument, nullptr, 'c' },
|
||||
{"fieldids", required_argument, nullptr, 'f'},
|
||||
{"info", optional_argument, nullptr, 'i' },
|
||||
{"delete", required_argument, nullptr, 'd' },
|
||||
{ nullptr, 0 , nullptr, 0 }
|
||||
};
|
||||
|
||||
int option_index = 0;
|
||||
int opt = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hlif:c:g:d:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
ip_port_ = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
field_group_ops_ = FIELD_GROUP_HELP;
|
||||
return;
|
||||
case 'l':
|
||||
field_group_ops_ = FIELD_GROUP_LIST;
|
||||
break;
|
||||
case 'f':
|
||||
field_ids_ = optarg;
|
||||
break;
|
||||
case 'g':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The group id needs to be a number");
|
||||
}
|
||||
group_id_ = std::stoi(optarg);
|
||||
is_group_set_ = true;
|
||||
break;
|
||||
case 'c':
|
||||
field_group_ops_ = FIELD_GROUP_CREATE;
|
||||
group_name_ = optarg;
|
||||
break;
|
||||
case 'i':
|
||||
field_group_ops_ = FIELD_GROUP_INFO;
|
||||
break;
|
||||
case 'd':
|
||||
field_group_ops_ = FIELD_GROUP_DELETE;
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The group id needs to be a number");
|
||||
}
|
||||
group_id_ = std::stoi(optarg);
|
||||
is_group_set_ = true;
|
||||
break;
|
||||
default:
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Unknown command line options");
|
||||
}
|
||||
}
|
||||
|
||||
if (field_group_ops_ == FIELD_GROUP_UNKNOWN) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Must specify a valid operations");
|
||||
}
|
||||
}
|
||||
|
||||
void RdciFieldGroupSubSystem::show_help() const {
|
||||
std::cout << " fieldgroup -- Used to create and maintain groups "
|
||||
<< "of field Ids.\n\n";
|
||||
std::cout << "Usage\n";
|
||||
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] -l\n";
|
||||
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] "
|
||||
<< "-c <groupName> -f <filedIds>\n";
|
||||
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] "
|
||||
<< "-g <groupId> -i\n";
|
||||
std::cout << " rdci fieldgroup [--host <IP/FQDN>:port] [-u] "
|
||||
<< "-d <groupId>\n";
|
||||
std::cout << "\nFlags:\n";
|
||||
show_common_usage();
|
||||
std::cout << " -l --list "
|
||||
<< "List the field groups that currently exist for a host.\n";
|
||||
std::cout << " -g --group groupId "
|
||||
<< "The field group to query on the specified host.\n";
|
||||
std::cout << " -c --create groupName "
|
||||
<< "Create a field group on the remote host.\n";
|
||||
std::cout << " -f --fieldids fieldIds Comma-separated "
|
||||
<< "list of the field ids to add to a field group\n";
|
||||
std::cout << " -i --info "
|
||||
<< "Display the information for the specified group Id\n";
|
||||
std::cout << " -d --delete groupId "
|
||||
<< "Delete a field group on the remote host.\n";
|
||||
}
|
||||
|
||||
|
||||
void RdciFieldGroupSubSystem::process() {
|
||||
rdc_status_t result = RDC_ST_OK;
|
||||
rdc_field_group_info_t group_info;
|
||||
uint32_t count = 0;
|
||||
switch (field_group_ops_) {
|
||||
case FIELD_GROUP_HELP:
|
||||
show_help();
|
||||
break;
|
||||
case FIELD_GROUP_CREATE:
|
||||
{
|
||||
if (group_name_ == "") {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Must specify the group name when create a field group");
|
||||
}
|
||||
std::vector<std::string> fields = split_string(field_ids_, ',');
|
||||
uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
for (uint32_t i = 0; i < fields.size(); i++) {
|
||||
if (!IsNumber(fields[i])) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The field Id "+fields[i]+" needs to be a number");
|
||||
}
|
||||
field_ids[i] = std::stoi(fields[i]);
|
||||
}
|
||||
rdc_field_grp_t group_id;
|
||||
result = rdc_group_field_create(rdc_handle_, fields.size(),
|
||||
&field_ids[0], group_name_.c_str(), &group_id);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Successfully created a field group"
|
||||
<< " with a group ID " << group_id << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case FIELD_GROUP_DELETE:
|
||||
if (!is_group_set_) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the group id to delete a group");
|
||||
}
|
||||
result = rdc_group_field_destroy(rdc_handle_, group_id_);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Successfully deleted the field group "
|
||||
<< group_id_ << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case FIELD_GROUP_LIST:
|
||||
rdc_field_grp_t group_id_list[RDC_MAX_NUM_FIELD_GROUPS];
|
||||
result = rdc_group_field_get_all_ids(
|
||||
rdc_handle_, group_id_list, &count);
|
||||
if ( result != RDC_ST_OK) break;
|
||||
|
||||
std::cout << count << " field group found.\n";
|
||||
std::cout << "GroupID\t" << "GroupName\t" << "FieldIds\n";
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
result = rdc_group_field_get_info(
|
||||
rdc_handle_, group_id_list[i], &group_info);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Fail to get information for field group " +
|
||||
std::to_string(group_id_list[i]));
|
||||
}
|
||||
|
||||
std::cout << group_id_list[i] << "\t"
|
||||
<< group_info.group_name << "\t\t";
|
||||
for (uint32_t j = 0; j < group_info.count; j++) {
|
||||
std::cout << group_info.field_ids[j];
|
||||
if ( j < group_info.count -1 ) {
|
||||
std::cout << ",";
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
break;
|
||||
case FIELD_GROUP_INFO:
|
||||
if (!is_group_set_) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the group id to show field group info");
|
||||
}
|
||||
result = rdc_group_field_get_info(
|
||||
rdc_handle_, group_id_, &group_info);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Group name: " << group_info.group_name
|
||||
<< std::endl;
|
||||
std::cout << "Field Ids: ";
|
||||
for (uint32_t i = 0; i < group_info.count; i++) {
|
||||
std::cout << group_info.field_ids[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER, "Unknown command");
|
||||
}
|
||||
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, rdc_status_string(result));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
@@ -0,0 +1,260 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "RdciGroupSubSystem.h"
|
||||
#include <getopt.h>
|
||||
#include <unistd.h>
|
||||
#include "common/rdc_utils.h"
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdciGroupSubSystem::RdciGroupSubSystem():
|
||||
group_ops_(GROUP_UNKNOWN)
|
||||
, is_group_set_(false) {
|
||||
}
|
||||
|
||||
void RdciGroupSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
const int HOST_OPTIONS = 1000;
|
||||
const struct option long_options[] = {
|
||||
{"host", required_argument, nullptr, HOST_OPTIONS },
|
||||
{"help", optional_argument, nullptr, 'h' },
|
||||
{"list", optional_argument, nullptr, 'l' },
|
||||
{"group", required_argument, nullptr, 'g'},
|
||||
{"create", required_argument, nullptr, 'c' },
|
||||
{"add", required_argument, nullptr, 'a' },
|
||||
{"info", optional_argument, nullptr, 'i' },
|
||||
{"delete", required_argument, nullptr, 'd' },
|
||||
{ nullptr, 0 , nullptr, 0 }
|
||||
};
|
||||
|
||||
int option_index = 0;
|
||||
int opt = 0;
|
||||
|
||||
while ((opt = getopt_long(argc, argv, "hlic:g:a:d:",
|
||||
long_options, &option_index)) != -1) {
|
||||
switch (opt) {
|
||||
case HOST_OPTIONS:
|
||||
ip_port_ = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
group_ops_ = GROUP_HELP;
|
||||
return;
|
||||
case 'l':
|
||||
group_ops_ = GROUP_LIST;
|
||||
break;
|
||||
case 'g':
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The group id needs to be a number");
|
||||
}
|
||||
group_id_ = std::stoi(optarg);
|
||||
is_group_set_ = true;
|
||||
break;
|
||||
case 'c':
|
||||
group_ops_ = GROUP_CREATE;
|
||||
group_name_ = optarg;
|
||||
break;
|
||||
case 'a':
|
||||
group_ops_ = GROUP_ADD_GPUS;
|
||||
gpu_ids_ = optarg;
|
||||
break;
|
||||
case 'i':
|
||||
group_ops_ = GROUP_INFO;
|
||||
break;
|
||||
case 'd':
|
||||
group_ops_ = GROUP_DELETE;
|
||||
if (!IsNumber(optarg)) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The group id needs to be a number");
|
||||
}
|
||||
group_id_ = std::stoi(optarg);
|
||||
is_group_set_ = true;
|
||||
break;
|
||||
default:
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Unknown command line options");
|
||||
}
|
||||
}
|
||||
|
||||
if (group_ops_ == GROUP_UNKNOWN) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Must specify a valid operations");
|
||||
}
|
||||
}
|
||||
|
||||
void RdciGroupSubSystem::show_help() const {
|
||||
std::cout << " group -- Used to create and maintain groups of GPUs.\n\n";
|
||||
std::cout << "Usage\n";
|
||||
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -l\n";
|
||||
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -c <groupName>\n";
|
||||
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -g <groupId> "
|
||||
<< "[-a <entityId>]\n";
|
||||
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] "
|
||||
<< "-g <groupId> [-i]\n";
|
||||
std::cout << " rdci group [--host <IP/FQDN>:port] [-u] -d <groupId>\n";
|
||||
std::cout << "\nFlags:\n";
|
||||
show_common_usage();
|
||||
std::cout << " -l --list "
|
||||
<< "List the groups that currently exist for a host.\n";
|
||||
std::cout << " -g --group groupId "
|
||||
<< "The GPU group to query on the specified host.\n";
|
||||
std::cout << " -c --create groupName "
|
||||
<< "Create a group on the remote host.\n";
|
||||
std::cout << " -a --add gpuIndexes "
|
||||
<< "Comma-separated list of the GPU indexes to add to the group.\n";
|
||||
std::cout << " -i --info "
|
||||
<< "Display the information for the specified group Id\n";
|
||||
std::cout << " -d --delete groupId "
|
||||
<< "Delete a group on the remote host.\n";
|
||||
}
|
||||
|
||||
|
||||
void RdciGroupSubSystem::process() {
|
||||
rdc_status_t result = RDC_ST_OK;
|
||||
std::vector<std::string> gpu_ids;
|
||||
rdc_group_info_t group_info;
|
||||
uint32_t count = 0;
|
||||
switch (group_ops_) {
|
||||
case GROUP_HELP:
|
||||
show_help();
|
||||
break;
|
||||
case GROUP_CREATE:
|
||||
if (group_name_ == "") {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Must specify the group name when create a group");
|
||||
}
|
||||
rdc_gpu_group_t group_id;
|
||||
result = rdc_group_gpu_create(rdc_handle_, RDC_GROUP_EMPTY,
|
||||
group_name_.c_str(), &group_id);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Successfully created group with a group ID "
|
||||
<< group_id << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case GROUP_DELETE:
|
||||
if (!is_group_set_) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the group id to delete a group");
|
||||
}
|
||||
result = rdc_group_gpu_destroy(rdc_handle_, group_id_);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Successfully deleted the group "
|
||||
<< group_id_ << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case GROUP_LIST:
|
||||
rdc_gpu_group_t group_id_list[RDC_MAX_NUM_GROUPS];
|
||||
result = rdc_group_get_all_ids(rdc_handle_, group_id_list, &count);
|
||||
if ( result != RDC_ST_OK) break;
|
||||
|
||||
std::cout << count << " group found.\n";
|
||||
std::cout << "GroupID\t" << "GroupName\t" << "GPUIndex\n";
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
result = rdc_group_gpu_get_info(rdc_handle_,
|
||||
group_id_list[i], &group_info);
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Fail to get information for group "
|
||||
+ std::to_string(group_id_list[i]));
|
||||
}
|
||||
|
||||
std::cout << group_id_list[i] << "\t"
|
||||
<< group_info.group_name << "\t\t";
|
||||
for (uint32_t j = 0; j < group_info.count; j++) {
|
||||
std::cout << group_info.entity_ids[j];
|
||||
if (j < group_info.count -1) {
|
||||
std::cout << ",";
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
break;
|
||||
case GROUP_ADD_GPUS:
|
||||
if (!is_group_set_) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the group id to add a group");
|
||||
}
|
||||
|
||||
gpu_ids = split_string(gpu_ids_, ',');
|
||||
for (uint32_t i = 0; i < gpu_ids.size(); i++) {
|
||||
if (!IsNumber(gpu_ids[i])) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The GUP Id "+gpu_ids[i]+" needs to be a number");
|
||||
}
|
||||
result = rdc_group_gpu_add(rdc_handle_,
|
||||
group_id_, std::stoi(gpu_ids[i]));
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, "Fail to add GPU "
|
||||
+ gpu_ids[i] + " to the group");
|
||||
}
|
||||
}
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Successfully added the GPU " << gpu_ids_
|
||||
<< " to group "<< group_id_ << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case GROUP_INFO:
|
||||
if (!is_group_set_) {
|
||||
show_help();
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"Need to specify the group id to show group info");
|
||||
}
|
||||
result = rdc_group_gpu_get_info(rdc_handle_,
|
||||
group_id_, &group_info);
|
||||
if (result == RDC_ST_OK) {
|
||||
std::cout << "Group name: "
|
||||
<< group_info.group_name << std::endl;
|
||||
std::cout << "Gpu indexes: ";
|
||||
for (uint32_t i = 0; i < group_info.count; i++) {
|
||||
std::cout << group_info.entity_ids[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER, "Unknown command");
|
||||
}
|
||||
|
||||
if (result != RDC_ST_OK) {
|
||||
throw RdcException(result, rdc_status_string(result));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include "RdciSubSystem.h"
|
||||
#include <sstream>
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "common/rdc_utils.h"
|
||||
|
||||
@@ -39,6 +40,17 @@ RdciSubSystem::RdciSubSystem():
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> RdciSubSystem::split_string(const std::string& s,
|
||||
char delimiter) const {
|
||||
std::vector<std::string> tokens;
|
||||
std::string token;
|
||||
std::istringstream tokenStream(s);
|
||||
while (std::getline(tokenStream, token, delimiter)) {
|
||||
tokens.push_back(token);
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
void RdciSubSystem::connect() {
|
||||
rdc_status_t status;
|
||||
|
||||
|
||||
@@ -26,6 +26,9 @@ THE SOFTWARE.
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "RdciDiscoverySubSystem.h"
|
||||
#include "RdciDmonSubSystem.h"
|
||||
#include "RdciFieldGroupSubSystem.h"
|
||||
#include "RdciGroupSubSystem.h"
|
||||
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
@@ -43,6 +46,12 @@ int main(int argc, char ** argv) {
|
||||
amd::rdc::RdciSubSystemPtr subsystem;
|
||||
if (subsystem_name == "discovery") {
|
||||
subsystem.reset(new amd::rdc::RdciDiscoverySubSystem());
|
||||
} else if (subsystem_name == "dmon") {
|
||||
subsystem.reset(new amd::rdc::RdciDmonSubSystem());
|
||||
} else if (subsystem_name == "group") {
|
||||
subsystem.reset(new amd::rdc::RdciGroupSubSystem());
|
||||
} else if (subsystem_name == "fieldgroup") {
|
||||
subsystem.reset(new amd::rdc::RdciFieldGroupSubSystem());
|
||||
} else {
|
||||
std::cout << usage_help;
|
||||
exit(0);
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする