Add rsmi_compute_process_gpus_get()
Given a process ID, give the device indices that process is
currently using.
Also:
* made corrections to how RSMI, amdgpu (ie, "card#") and
KFD indicies translate from one another
* add a few missing error codes to rsmi_status_string()
* fix some formatting
Change-Id: Icd2cae66bb4fec768da96af7cf9cf8b8b66ec7f9
[ROCm/rocm_smi_lib commit: 2d6e15190c]
Cette révision appartient à :
@@ -2301,7 +2301,7 @@ rsmi_dev_counter_group_supported(uint32_t dv_ind, rsmi_event_group_t group);
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] type the type of performance event to create
|
||||
* @param[in] type the ::rsmi_event_type_t of performance event to create
|
||||
*
|
||||
* @param[inout] evnt_handle A pointer to a ::rsmi_event_handle_t which will be
|
||||
* associated with a newly allocated counter
|
||||
@@ -2448,24 +2448,64 @@ rsmi_compute_process_info_get(rsmi_process_info_t *procs, uint32_t *num_items);
|
||||
/**
|
||||
* @brief Get process information about a specific process
|
||||
*
|
||||
* @details Given a pointer to an ::rsmi_process_info_t @p proc and a process id
|
||||
* @details Given a pointer to an ::rsmi_process_info_t @p proc and a process
|
||||
* id
|
||||
* @p pid, this function will write the process information for @p pid, if
|
||||
* available, to the memory pointed to by @p proc.
|
||||
*
|
||||
* @param[in] pid The process ID for which process information is being requested
|
||||
* @param[in] pid The process ID for which process information is being
|
||||
* requested
|
||||
*
|
||||
* @param[inout] proc a pointer to a ::rsmi_process_info_t to which
|
||||
* process information for @p pid will be written if it is found.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_NOT_FOUND is returned if there was no process information
|
||||
* @retval ::RSMI_STATUS_NOT_FOUND is returned if there was no process
|
||||
* information
|
||||
* found for the provided @p pid
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_compute_process_info_by_pid_get(uint32_t pid, rsmi_process_info_t *proc);
|
||||
|
||||
/**
|
||||
* @brief Get the device indices currently being used by a process
|
||||
*
|
||||
* @details Given a process id @p pid, a non-NULL pointer to an array of
|
||||
* uint32_t's @p dv_indices of length *@p num_devices, this function will
|
||||
* write up to @p num_devices device indices to the memory pointed to by
|
||||
* @p dv_indices. If @p dv_indices is not NULL, @p num_devices will be
|
||||
* updated with the number of gpu's currently being used by process @p pid.
|
||||
* If @p dv_indices is NULL, @p dv_indices will be updated with the number of
|
||||
* gpus currently being used by @p pid. Calling this function with @p
|
||||
* dv_indices being NULL is a way to determine how much memory is required
|
||||
* for when @p dv_indices is not NULL.
|
||||
*
|
||||
* @param[in] pid The process id of the process for which the number of gpus
|
||||
* currently being used is requested
|
||||
*
|
||||
* @param[inout] dv_indices a pointer to memory provided by the caller to
|
||||
* which indices of devices currently being used by the process will be
|
||||
* written. This may be NULL in which case only @p num_devices will be
|
||||
* updated with the number of devices being used.
|
||||
*
|
||||
* @param[inout] num_devices A pointer to a uint32_t, which on input, should
|
||||
* contain the amount of memory in uint32_t's which have been provided by the
|
||||
* @p dv_indices argument. On output, if @p dv_indices is non-NULL, this will
|
||||
* be updated with the number uint32_t's actually written. If @p dv_indices is
|
||||
* NULL, this argument will be updated with the number devices being used.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS is returned upon successful call
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if there were more
|
||||
* gpu indices that could have been written, but not enough space was
|
||||
* provided as indicated by @p dv_indices and @p num_devices, on input.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
|
||||
uint32_t *num_devices);
|
||||
|
||||
/** @} */ // end of SysInfo
|
||||
|
||||
|
||||
@@ -178,11 +178,9 @@ class Device {
|
||||
int readDevInfo(DevInfoTypes type, std::vector<std::string> *retVec);
|
||||
int writeDevInfo(DevInfoTypes type, uint64_t val);
|
||||
int writeDevInfo(DevInfoTypes type, std::string val);
|
||||
int populateKFDNodeProperties(bool force_update = false);
|
||||
int getKFDNodeProperty(DevKFDNodePropTypes prop, uint64_t *val);
|
||||
|
||||
uint32_t index(void) const {return index_;}
|
||||
void set_index(uint32_t index) {index_ = index;}
|
||||
uint32_t index(void) const {return card_indx_;}
|
||||
void set_card_index(uint32_t index) {card_indx_ = index;}
|
||||
uint32_t drm_render_minor(void) const {return drm_render_minor_;}
|
||||
void set_drm_render_minor(uint32_t minor) {drm_render_minor_ = minor;}
|
||||
static rsmi_dev_perf_level perfLvlStrToEnum(std::string s);
|
||||
@@ -192,6 +190,8 @@ class Device {
|
||||
evt::dev_evt_grp_set_t* supported_event_groups(void) {
|
||||
return &supported_event_groups_;}
|
||||
SupportedFuncMap *supported_funcs(void) {return &supported_funcs_;}
|
||||
uint64_t kfd_gpu_id(void) const {return kfd_gpu_id_;}
|
||||
void set_kfd_gpu_id(uint64_t id) {kfd_gpu_id_ = id;}
|
||||
void fillSupportedFuncs(void);
|
||||
void DumpSupportedFunctions(void);
|
||||
bool DeviceAPISupported(std::string name, uint64_t variant,
|
||||
@@ -202,20 +202,20 @@ class Device {
|
||||
std::shared_ptr<PowerMon> power_monitor_;
|
||||
std::string path_;
|
||||
shared_mutex_t mutex_;
|
||||
uint32_t index_;
|
||||
uint32_t card_indx_; // This index corresponds to the drm index (ie, card#)
|
||||
uint32_t drm_render_minor_;
|
||||
const RocmSMI_env_vars *env_;
|
||||
template <typename T> int openSysfsFileStream(DevInfoTypes type, T *fs,
|
||||
const char *str = nullptr);
|
||||
|
||||
int readDevInfoStr(DevInfoTypes type, std::string *retStr);
|
||||
int readDevInfoMultiLineStr(DevInfoTypes type,
|
||||
std::vector<std::string> *retVec);
|
||||
int writeDevInfoStr(DevInfoTypes type, std::string valStr);
|
||||
uint64_t bdfid_;
|
||||
uint64_t kfd_gpu_id_;
|
||||
std::unordered_set<rsmi_event_group_t,
|
||||
evt::RSMIEventGrpHashFunction> supported_event_groups_;
|
||||
std::map<std::string, uint64_t> kfdNodePropMap_;
|
||||
// std::map<std::string, uint64_t> kfdNodePropMap_;
|
||||
SupportedFuncMap supported_funcs_;
|
||||
};
|
||||
|
||||
|
||||
@@ -45,18 +45,50 @@
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
class KFDNode {
|
||||
public:
|
||||
explicit KFDNode(uint32_t node_ind) : node_indx_(node_ind) {}
|
||||
~KFDNode();
|
||||
|
||||
int Initialize();
|
||||
int ReadProperties(void);
|
||||
int get_property_value(std::string property, uint64_t *value);
|
||||
uint64_t gpu_id(void) const {return gpu_id_;}
|
||||
std::string name(void) const {return name_;}
|
||||
std::shared_ptr<Device> amdgpu_device(void) const {return amdgpu_device_;}
|
||||
uint32_t amdgpu_dev_index(void) const {return amdgpu_dev_index_;}
|
||||
void set_amdgpu_dev_index(uint32_t val) {amdgpu_dev_index_ = val;}
|
||||
|
||||
private:
|
||||
uint32_t node_indx_;
|
||||
uint32_t amdgpu_dev_index_;
|
||||
uint64_t gpu_id_;
|
||||
std::string name_;
|
||||
std::map<std::string, uint64_t> properties_;
|
||||
std::shared_ptr<Device> amdgpu_device_;
|
||||
};
|
||||
|
||||
int
|
||||
DiscoverKFDNodes(std::map<uint64_t, std::shared_ptr<KFDNode>> *nodes);
|
||||
|
||||
int
|
||||
GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated,
|
||||
uint32_t *num_procs_found);
|
||||
int
|
||||
GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc);
|
||||
|
||||
int
|
||||
GetProcessGPUs(uint32_t pid, std::unordered_set<uint64_t> *gpu_count);
|
||||
int
|
||||
ReadKFDDeviceProperties(uint32_t dev_id, std::vector<std::string> *retVec);
|
||||
|
||||
|
||||
@@ -51,7 +51,10 @@
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi_monitor.h"
|
||||
#include "rocm_smi/rocm_smi_power_mon.h"
|
||||
@@ -71,7 +74,7 @@ class RocmSMI {
|
||||
|
||||
static std::vector<std::shared_ptr<amd::smi::Device>>&
|
||||
monitor_devices() {return s_monitor_devices;}
|
||||
uint32_t DiscoverDevices(void);
|
||||
uint32_t DiscoverAmdgpuDevices(void);
|
||||
uint32_t DiscoverAMDPowerMonitors(bool force_update = false);
|
||||
|
||||
// Will execute "func" for every Device object known about, or until func
|
||||
@@ -84,17 +87,21 @@ class RocmSMI {
|
||||
|
||||
uint32_t euid() const {return euid_;}
|
||||
|
||||
std::map<uint64_t, std::shared_ptr<KFDNode>> & kfd_node_map(void) {
|
||||
return kfd_node_map_;}
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<Device>> devices_;
|
||||
std::map<uint64_t, std::shared_ptr<KFDNode>> kfd_node_map_;
|
||||
std::vector<std::shared_ptr<Monitor>> monitors_;
|
||||
std::vector<std::shared_ptr<PowerMon>> power_mons_;
|
||||
|
||||
std::set<std::string> amd_monitor_types_;
|
||||
void AddToDeviceList(std::string dev_name);
|
||||
void GetEnvVariables(void);
|
||||
uint32_t DiscoverAMDMonitors(void);
|
||||
|
||||
static std::vector<std::shared_ptr<amd::smi::Device>> s_monitor_devices;
|
||||
|
||||
RocmSMI_env_vars env_vars_;
|
||||
uint64_t init_options_;
|
||||
uint32_t euid_;
|
||||
|
||||
@@ -53,6 +53,7 @@
|
||||
#include <bitset>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <map>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
@@ -108,6 +109,16 @@ static rsmi_status_t handleException() {
|
||||
std::shared_ptr<amd::smi::Device> dev = smi.monitor_devices()[dv_ind]; \
|
||||
assert(dev != nullptr);
|
||||
|
||||
|
||||
#define GET_DEV_AND_KFDNODE_FROM_INDX \
|
||||
GET_DEV_FROM_INDX \
|
||||
std::shared_ptr<amd::smi::KFDNode> kfd_node; \
|
||||
if (smi.kfd_node_map().find(dev->kfd_gpu_id()) == \
|
||||
smi.kfd_node_map().end()) { \
|
||||
return RSMI_INITIALIZATION_ERROR; \
|
||||
} \
|
||||
kfd_node = smi.kfd_node_map()[dev->kfd_gpu_id()];
|
||||
|
||||
#define REQUIRE_ROOT_ACCESS \
|
||||
if (amd::smi::RocmSMI::getInstance().euid()) { \
|
||||
return RSMI_STATUS_PERMISSION; \
|
||||
@@ -168,6 +179,7 @@ static rsmi_status_t errno_to_rsmi_status(uint32_t err) {
|
||||
case EISDIR: return RSMI_STATUS_FILE_ERROR;
|
||||
case EINTR: return RSMI_STATUS_INTERRUPT;
|
||||
case EIO: return RSMI_STATUS_UNEXPECTED_SIZE;
|
||||
case ENXIO: return RSMI_STATUS_UNEXPECTED_DATA;
|
||||
default: return RSMI_STATUS_UNKNOWN_ERROR;
|
||||
}
|
||||
}
|
||||
@@ -208,7 +220,6 @@ static uint64_t get_multiplier_from_str(char units_char) {
|
||||
*/
|
||||
static uint64_t freq_string_to_int(const std::vector<std::string> &freq_lines,
|
||||
bool *is_curr, uint32_t lanes[], uint32_t i) {
|
||||
|
||||
assert(i < freq_lines.size());
|
||||
if (i >= freq_lines.size()) {
|
||||
throw amd::smi::rsmi_exception(RSMI_STATUS_INPUT_OUT_OF_BOUNDS,
|
||||
@@ -696,26 +707,15 @@ rsmi_status_t
|
||||
rsmi_dev_pci_id_get(uint32_t dv_ind, uint64_t *bdfid) {
|
||||
TRY
|
||||
|
||||
CHK_SUPPORT_NAME_ONLY(bdfid)
|
||||
GET_DEV_AND_KFDNODE_FROM_INDX
|
||||
CHK_API_SUPPORT_ONLY(bdfid, RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT)
|
||||
DEVICE_MUTEX
|
||||
|
||||
*bdfid = dev->bdfid();
|
||||
|
||||
int32_t ret = dev->populateKFDNodeProperties();
|
||||
|
||||
if (ret) {
|
||||
return errno_to_rsmi_status(errno);
|
||||
}
|
||||
|
||||
uint64_t domain = 0;
|
||||
|
||||
ret = dev->getKFDNodeProperty(amd::smi::kDevKFDNodePropDomain, &domain);
|
||||
|
||||
if (ret == EINVAL) {
|
||||
// "domain" is not found in properties file; just go with the 16 bit
|
||||
// domain already found
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
}
|
||||
kfd_node->get_property_value("domain", &domain);
|
||||
|
||||
// Replace the 16 bit domain originally set like this:
|
||||
// BDFID = ((<DOMAIN> & 0xffff) << 32) | ((<BUS> & 0xff) << 8) |
|
||||
@@ -2329,15 +2329,30 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) {
|
||||
" the call";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INTERRUPT:
|
||||
*status_string = "An interrupt occurred while executing the function";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_UNEXPECTED_SIZE:
|
||||
*status_string = "Data (usually from reading a file) was out of"
|
||||
" range from what was expected";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_NO_DATA:
|
||||
*status_string = "No data was found (usually from reading a file) "
|
||||
"where data was expected";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_UNEXPECTED_DATA:
|
||||
*status_string = "Data (usually from reading a file) was not of the "
|
||||
"type that was expected";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_UNKNOWN_ERROR:
|
||||
*status_string = "An unknown error prevented the call from completing"
|
||||
" successfully";
|
||||
break;
|
||||
|
||||
case RSMI_STATUS_INTERRUPT:
|
||||
*status_string = "An interrupt occurred while executing the function";
|
||||
break;
|
||||
|
||||
default:
|
||||
*status_string = "An unknown error occurred";
|
||||
return RSMI_STATUS_UNKNOWN_ERROR;
|
||||
@@ -2696,6 +2711,49 @@ rsmi_compute_process_info_get(rsmi_process_info_t *procs,
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_compute_process_gpus_get(uint32_t pid, uint32_t *dv_indices,
|
||||
uint32_t *num_devices) {
|
||||
TRY
|
||||
|
||||
if (num_devices == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
std::unordered_set<uint64_t> gpu_set;
|
||||
int err = amd::smi::GetProcessGPUs(pid, &gpu_set);
|
||||
|
||||
if (err) {
|
||||
return errno_to_rsmi_status(err);
|
||||
}
|
||||
|
||||
uint32_t i = 0;
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
|
||||
if (dv_indices != nullptr) {
|
||||
for (auto it = gpu_set.begin(); i < *num_devices && it != gpu_set.end();
|
||||
++it, ++i) {
|
||||
uint64_t gpu_id_val = (*it);
|
||||
dv_indices[i] = smi.kfd_node_map()[gpu_id_val]->amdgpu_dev_index();
|
||||
}
|
||||
}
|
||||
|
||||
if (dv_indices && *num_devices < gpu_set.size()) {
|
||||
// In this case, *num_devices should already hold the number of items
|
||||
// written to dv_devices. We just have to let the caller know there's more.
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
|
||||
*num_devices = static_cast<uint32_t>(gpu_set.size());
|
||||
if (gpu_set.size() > smi.monitor_devices().size()) {
|
||||
return RSMI_STATUS_UNEXPECTED_SIZE;
|
||||
}
|
||||
|
||||
return RSMI_STATUS_SUCCESS;
|
||||
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_memory_reserved_pages_get(uint32_t dv_ind, uint32_t *num_pages,
|
||||
rsmi_retired_page_record_t *records) {
|
||||
|
||||
@@ -368,7 +368,7 @@ amd::smi::evt::Event::stopCounter(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long
|
||||
static ssize_t
|
||||
readn(int fd, void *buf, size_t n) {
|
||||
ssize_t left = n;
|
||||
ssize_t bytes;
|
||||
|
||||
@@ -777,53 +777,6 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Device::populateKFDNodeProperties(bool force_update) {
|
||||
int ret;
|
||||
|
||||
std::vector<std::string> propVec;
|
||||
|
||||
if (kfdNodePropMap_.size() > 0 && !force_update) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = ReadKFDDeviceProperties(index_, &propVec);
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string key_str;
|
||||
// std::string val_str;
|
||||
uint64_t val_int; // Assume all properties are unsigned integers for now
|
||||
std::istringstream fs;
|
||||
|
||||
for (uint32_t i = 0; i < propVec.size(); ++i) {
|
||||
fs.str(propVec[i]);
|
||||
fs >> key_str;
|
||||
fs >> val_int;
|
||||
|
||||
kfdNodePropMap_[key_str] = val_int;
|
||||
|
||||
fs.str("");
|
||||
fs.clear();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Device::getKFDNodeProperty(DevKFDNodePropTypes prop, uint64_t *val) {
|
||||
assert(val != nullptr);
|
||||
assert(kDevKFDPropNameMap.find(prop) != kDevKFDPropNameMap.end());
|
||||
|
||||
const char *prop_name = kDevKFDPropNameMap.at(prop);
|
||||
if (kfdNodePropMap_.find(prop_name) == kfdNodePropMap_.end()) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
*val = kfdNodePropMap_.at(prop_name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Device::DumpSupportedFunctions(void) {
|
||||
SupportedFuncMapIt func_iter = supported_funcs_.begin();
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <fstream>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
@@ -57,6 +57,7 @@
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
@@ -66,25 +67,75 @@ static const char *kKFDNodesPathRoot = "/sys/class/kfd/kfd/topology/nodes";
|
||||
// Sysfs file names
|
||||
static const char *kKFDPasidFName = "pasid";
|
||||
|
||||
|
||||
|
||||
// KFD Node Property strings
|
||||
// static const char *kKFDNodePropCPU_CORES_COUNTStr = "cpu_cores_count";
|
||||
// static const char *kKFDNodePropSIMD_COUNTStr = "simd_count";
|
||||
// static const char *kKFDNodePropMEM_BANKS_COUNTStr = "mem_banks_count";
|
||||
// static const char *kKFDNodePropCACHES_COUNTStr = "caches_count";
|
||||
// static const char *kKFDNodePropIO_LINKS_COUNTStr = "io_links_count";
|
||||
// static const char *kKFDNodePropCPU_CORE_ID_BASEStr = "cpu_core_id_base";
|
||||
// static const char *kKFDNodePropSIMD_ID_BASEStr = "simd_id_base";
|
||||
// static const char *kKFDNodePropMAX_WAVES_PER_SIMDStr = "max_waves_per_simd";
|
||||
// static const char *kKFDNodePropLDS_SIZE_IN_KBStr = "lds_size_in_kb";
|
||||
// static const char *kKFDNodePropGDS_SIZE_IN_KBStr = "gds_size_in_kb";
|
||||
// static const char *kKFDNodePropNUM_GWSStr = "num_gws";
|
||||
// static const char *kKFDNodePropWAVE_FRONT_SIZEStr = "wave_front_size";
|
||||
// static const char *kKFDNodePropARRAY_COUNTStr = "array_count";
|
||||
// static const char *kKFDNodePropSIMD_ARRAYS_PER_ENGINEStr =
|
||||
// "simd_arrays_per_engine";
|
||||
// static const char *kKFDNodePropCU_PER_SIMD_ARRAYStr = "cu_per_simd_array";
|
||||
// static const char *kKFDNodePropSIMD_PER_CUStr = "simd_per_cu";
|
||||
// static const char *kKFDNodePropMAX_SLOTS_SCRATCH_CUStr =
|
||||
// "max_slots_scratch_cu";
|
||||
// static const char *kKFDNodePropVENDOR_IDStr = "vendor_id";
|
||||
// static const char *kKFDNodePropDEVICE_IDStr = "device_id";
|
||||
static const char *kKFDNodePropLOCATION_IDStr = "location_id";
|
||||
static const char *kKFDNodePropDOMAINStr = "domain";
|
||||
// static const char *kKFDNodePropDRM_RENDER_MINORStr = "drm_render_minor";
|
||||
// static const char *kKFDNodePropHIVE_IDStr = "hive_id";
|
||||
// static const char *kKFDNodePropNUM_SDMA_ENGINESStr = "num_sdma_engines";
|
||||
// static const char *kKFDNodePropNUM_SDMA_XGMI_ENGINESStr =
|
||||
// "num_sdma_xgmi_engines";
|
||||
// static const char *kKFDNodePropNUM_SDMA_QUEUES_PER_ENGINEStr =
|
||||
// "num_sdma_queues_per_engine";
|
||||
// static const char *kKFDNodePropNUM_CP_QUEUESStr = "num_cp_queues";
|
||||
// static const char *kKFDNodePropMAX_ENGINE_CLK_FCOMPUTEStr =
|
||||
// "max_engine_clk_fcompute";
|
||||
// static const char *kKFDNodePropLOCAL_MEM_SIZEStr = "local_mem_size";
|
||||
// static const char *kKFDNodePropFW_VERSIONStr = "fw_version";
|
||||
// static const char *kKFDNodePropCAPABILITYStr = "capability";
|
||||
// static const char *kKFDNodePropDEBUG_PROPStr = "debug_prop";
|
||||
// static const char *kKFDNodePropSDMA_FW_VERSIOStr = "sdma_fw_versio";
|
||||
// static const char *kKFDNodePropMAX_ENGINE_CLK_CCOMPUTEStr =
|
||||
// "max_engine_clk_ccompute";
|
||||
|
||||
static bool is_number(const std::string &s) {
|
||||
return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit);
|
||||
}
|
||||
|
||||
int ReadKFDDeviceProperties(uint32_t dev_id,
|
||||
std::vector<std::string> *retVec) {
|
||||
static std::string KFDDevicePath(uint32_t dev_id) {
|
||||
std::string node_path = kKFDNodesPathRoot;
|
||||
node_path += '/';
|
||||
node_path += std::to_string(dev_id);
|
||||
return node_path;
|
||||
}
|
||||
|
||||
static int OpenKFDNodeFile(uint32_t dev_id, std::string node_file,
|
||||
std::ifstream *fs) {
|
||||
std::string line;
|
||||
int ret;
|
||||
std::ifstream fs;
|
||||
std::string properties_path = kKFDNodesPathRoot;
|
||||
std::string f_path;
|
||||
bool reg_file;
|
||||
|
||||
assert(retVec != nullptr);
|
||||
assert(fs != nullptr);
|
||||
|
||||
properties_path += '/';
|
||||
properties_path += std::to_string(dev_id);
|
||||
properties_path += "/properties";
|
||||
f_path = KFDDevicePath(dev_id);
|
||||
f_path += "/";
|
||||
f_path += node_file;
|
||||
|
||||
ret = isRegularFile(properties_path, ®_file);
|
||||
ret = isRegularFile(f_path, ®_file);
|
||||
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
@@ -93,26 +144,104 @@ int ReadKFDDeviceProperties(uint32_t dev_id,
|
||||
return ENOENT;
|
||||
}
|
||||
|
||||
fs.open(properties_path);
|
||||
fs->open(f_path);
|
||||
|
||||
if (!fs.is_open()) {
|
||||
if (!fs->is_open()) {
|
||||
return errno;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ReadKFDDeviceProperties(uint32_t kfd_node_id,
|
||||
std::vector<std::string> *retVec) {
|
||||
std::string line;
|
||||
int ret;
|
||||
std::ifstream fs;
|
||||
std::string properties_path;
|
||||
|
||||
assert(retVec != nullptr);
|
||||
|
||||
ret = OpenKFDNodeFile(kfd_node_id, "properties", &fs);
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
while (std::getline(fs, line)) {
|
||||
retVec->push_back(line);
|
||||
}
|
||||
|
||||
if (retVec->size() == 0) {
|
||||
fs.close();
|
||||
return 0;
|
||||
}
|
||||
// Remove any *trailing* empty (whitespace) lines
|
||||
while (retVec->back().find_first_not_of(" \t\n\v\f\r") == std::string::npos) {
|
||||
retVec->pop_back();
|
||||
}
|
||||
|
||||
fs.close();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ReadKFDGpuId(uint32_t kfd_node_id, uint64_t *gpu_id) {
|
||||
std::string line;
|
||||
int ret;
|
||||
std::ifstream fs;
|
||||
std::string gpu_id_str;
|
||||
|
||||
assert(gpu_id != nullptr);
|
||||
|
||||
ret = OpenKFDNodeFile(kfd_node_id, "gpu_id", &fs);
|
||||
|
||||
if (ret) {
|
||||
fs.close();
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << fs.rdbuf();
|
||||
fs.close();
|
||||
|
||||
gpu_id_str = ss.str();
|
||||
|
||||
gpu_id_str.erase(std::remove(gpu_id_str.begin(), gpu_id_str.end(), '\n'),
|
||||
gpu_id_str.end());
|
||||
|
||||
if (!is_number(gpu_id_str)) {
|
||||
return ENXIO;
|
||||
}
|
||||
|
||||
*gpu_id = std::stoi(gpu_id_str);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ReadKFDGpuName(uint32_t kfd_node_id, std::string *gpu_name) {
|
||||
std::string line;
|
||||
int ret;
|
||||
std::ifstream fs;
|
||||
|
||||
assert(gpu_name != nullptr);
|
||||
|
||||
ret = OpenKFDNodeFile(kfd_node_id, "name", &fs);
|
||||
|
||||
if (ret) {
|
||||
fs.close();
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << fs.rdbuf();
|
||||
fs.close();
|
||||
|
||||
*gpu_name = ss.str();
|
||||
|
||||
gpu_name->erase(std::remove(gpu_name->begin(), gpu_name->end(), '\n'),
|
||||
gpu_name->end());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated,
|
||||
uint32_t *num_procs_found) {
|
||||
@@ -128,7 +257,7 @@ int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated,
|
||||
}
|
||||
auto dentry = readdir(proc_dir);
|
||||
|
||||
std::string prod_id_str;
|
||||
std::string proc_id_str;
|
||||
std::string tmp;
|
||||
|
||||
while (dentry != nullptr) {
|
||||
@@ -137,29 +266,32 @@ int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated,
|
||||
continue;
|
||||
}
|
||||
|
||||
prod_id_str = dentry->d_name;
|
||||
assert(is_number(prod_id_str) && "Unexpected file name in kfd/proc dir");
|
||||
if (!is_number(prod_id_str)) {
|
||||
proc_id_str = dentry->d_name;
|
||||
assert(is_number(proc_id_str) && "Unexpected file name in kfd/proc dir");
|
||||
if (!is_number(proc_id_str)) {
|
||||
dentry = readdir(proc_dir);
|
||||
continue;
|
||||
}
|
||||
if (procs && *num_procs_found < num_allocated) {
|
||||
int err;
|
||||
std::string tmp;
|
||||
|
||||
procs[*num_procs_found].process_id = std::stoi(prod_id_str);
|
||||
procs[*num_procs_found].process_id = std::stoi(proc_id_str);
|
||||
|
||||
std::string pasid_str_path = kKFDProcPathRoot;
|
||||
pasid_str_path += "/";
|
||||
pasid_str_path += prod_id_str;
|
||||
pasid_str_path += proc_id_str;
|
||||
pasid_str_path += "/";
|
||||
pasid_str_path += kKFDPasidFName;
|
||||
|
||||
err = ReadSysfsStr(pasid_str_path, &tmp);
|
||||
if (err) {
|
||||
return err;
|
||||
dentry = readdir(proc_dir);
|
||||
continue;
|
||||
}
|
||||
assert(is_number(tmp) && "Unexpected value in pasid file");
|
||||
if (!is_number(tmp)) {
|
||||
closedir(proc_dir);
|
||||
return EINVAL;
|
||||
}
|
||||
procs[*num_procs_found].pasid = std::stoi(tmp);
|
||||
@@ -176,6 +308,81 @@ int GetProcessInfo(rsmi_process_info_t *procs, uint32_t num_allocated,
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Read the gpuid files found in all the <queue id> dirs and put them in
|
||||
// gpus_found.
|
||||
// Directory structure:
|
||||
// /sys/class/kfd/kfd/proc/<pid>/queues/<queue id>/gpuid
|
||||
|
||||
int GetProcessGPUs(uint32_t pid, std::unordered_set<uint64_t> *gpu_set) {
|
||||
int err;
|
||||
|
||||
assert(gpu_set != nullptr);
|
||||
if (gpu_set == nullptr) {
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
errno = 0;
|
||||
|
||||
std::string queues_dir = kKFDProcPathRoot;
|
||||
queues_dir += "/";
|
||||
queues_dir += std::to_string(pid);
|
||||
queues_dir += "/queues";
|
||||
|
||||
auto queues_dir_hd = opendir(queues_dir.c_str());
|
||||
|
||||
if (queues_dir_hd == nullptr) {
|
||||
std::string err_str = "Unable to open queues directory for process ";
|
||||
err_str += std::to_string(pid);
|
||||
perror(err_str.c_str());
|
||||
return ESRCH;
|
||||
}
|
||||
|
||||
auto q_dentry = readdir(queues_dir_hd);
|
||||
|
||||
std::string q_gpu_id_str;
|
||||
std::string q_dir;
|
||||
|
||||
std::string tmp;
|
||||
|
||||
while (q_dentry != nullptr) {
|
||||
if (q_dentry->d_name[0] == '.') {
|
||||
q_dentry = readdir(queues_dir_hd);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_number(q_dentry->d_name)) {
|
||||
q_dentry = readdir(queues_dir_hd);
|
||||
continue;
|
||||
}
|
||||
|
||||
q_gpu_id_str = queues_dir + '/' + q_dentry->d_name + "/gpuid";
|
||||
|
||||
err = ReadSysfsStr(q_gpu_id_str, &tmp);
|
||||
if (err) {
|
||||
q_dentry = readdir(queues_dir_hd);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t val;
|
||||
try {
|
||||
val = std::stoi(tmp);
|
||||
} catch (...) {
|
||||
std::cerr << "Error; read invalid data: " << tmp << " from " <<
|
||||
q_gpu_id_str << std::endl;
|
||||
closedir(queues_dir_hd);
|
||||
return ENXIO; // Return "no such device" if we read an invalid gpu id
|
||||
}
|
||||
gpu_set->insert(val);
|
||||
|
||||
q_dentry = readdir(queues_dir_hd);
|
||||
}
|
||||
|
||||
errno = 0;
|
||||
if (closedir(queues_dir_hd)) {
|
||||
return errno;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc) {
|
||||
assert(proc != nullptr);
|
||||
int err;
|
||||
@@ -208,5 +415,138 @@ int GetProcessInfoForPID(uint32_t pid, rsmi_process_info_t *proc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int DiscoverKFDNodes(std::map<uint64_t, std::shared_ptr<KFDNode>> *nodes) {
|
||||
assert(nodes != nullptr);
|
||||
|
||||
if (nodes == nullptr) {
|
||||
return EINVAL;
|
||||
}
|
||||
assert(nodes->size() == 0);
|
||||
|
||||
nodes->clear();
|
||||
|
||||
std::shared_ptr<KFDNode> node;
|
||||
uint32_t node_indx;
|
||||
|
||||
auto kfd_node_dir = opendir(kKFDNodesPathRoot);
|
||||
assert(kfd_node_dir != nullptr);
|
||||
|
||||
auto dentry = readdir(kfd_node_dir);
|
||||
while (dentry != nullptr) {
|
||||
if (dentry->d_name[0] == '.') {
|
||||
dentry = readdir(kfd_node_dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_number(dentry->d_name)) {
|
||||
dentry = readdir(kfd_node_dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
node_indx = std::stoi(dentry->d_name);
|
||||
node = std::shared_ptr<KFDNode>(new KFDNode(node_indx));
|
||||
|
||||
node->Initialize();
|
||||
|
||||
if (node->gpu_id() == 0) {
|
||||
// Don't add; this is a cpu node.
|
||||
dentry = readdir(kfd_node_dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t kfd_gpu_node_bus_fn;
|
||||
uint64_t kfd_gpu_node_domain;
|
||||
int ret;
|
||||
ret =
|
||||
node->get_property_value(kKFDNodePropLOCATION_IDStr,
|
||||
&kfd_gpu_node_bus_fn);
|
||||
if (ret != 0) {
|
||||
closedir(kfd_node_dir);
|
||||
return ret;
|
||||
}
|
||||
ret =
|
||||
node->get_property_value(kKFDNodePropDOMAINStr, &kfd_gpu_node_domain);
|
||||
if (ret != 0) {
|
||||
closedir(kfd_node_dir);
|
||||
return ret;
|
||||
}
|
||||
|
||||
uint64_t kfd_bdfid =
|
||||
(kfd_gpu_node_domain << 32) | (kfd_gpu_node_bus_fn);
|
||||
(*nodes)[kfd_bdfid] = node;
|
||||
|
||||
dentry = readdir(kfd_node_dir);
|
||||
}
|
||||
|
||||
if (closedir(kfd_node_dir)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
KFDNode::~KFDNode() {
|
||||
}
|
||||
|
||||
int KFDNode::ReadProperties(void) {
|
||||
int ret;
|
||||
|
||||
std::vector<std::string> propVec;
|
||||
|
||||
assert(properties_.size() == 0);
|
||||
if (properties_.size() > 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = ReadKFDDeviceProperties(node_indx_, &propVec);
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string key_str;
|
||||
// std::string val_str;
|
||||
uint64_t val_int; // Assume all properties are unsigned integers for now
|
||||
std::istringstream fs;
|
||||
|
||||
for (uint32_t i = 0; i < propVec.size(); ++i) {
|
||||
fs.str(propVec[i]);
|
||||
fs >> key_str;
|
||||
fs >> val_int;
|
||||
|
||||
properties_[key_str] = val_int;
|
||||
|
||||
fs.str("");
|
||||
fs.clear();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
KFDNode::Initialize(void) {
|
||||
int ret = 0;
|
||||
ret = ReadProperties();
|
||||
if (ret) {return ret;}
|
||||
|
||||
ret = ReadKFDGpuId(node_indx_, &gpu_id_);
|
||||
if (ret) {return ret;}
|
||||
|
||||
ret = ReadKFDGpuName(node_indx_, &name_);
|
||||
|
||||
return ret;
|
||||
}
|
||||
int
|
||||
KFDNode::get_property_value(std::string property, uint64_t *value) {
|
||||
assert(value != nullptr);
|
||||
if (value == nullptr) {
|
||||
return EINVAL;
|
||||
}
|
||||
if (properties_.find(property) == properties_.end()) {
|
||||
return EINVAL;
|
||||
}
|
||||
*value = properties_[property];
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
@@ -55,12 +55,14 @@
|
||||
#include <utility>
|
||||
#include <functional>
|
||||
#include <cerrno>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_device.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
#include "rocm_smi/rocm_smi_exception.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
#include "rocm_smi/rocm_smi_kfd.h"
|
||||
|
||||
static const char *kPathDRMRoot = "/sys/class/drm";
|
||||
static const char *kPathHWMonRoot = "/sys/class/hwmon";
|
||||
@@ -253,9 +255,9 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
++i;
|
||||
}
|
||||
|
||||
// DiscoverDevices() will seach for devices and monitors and update internal
|
||||
// data structures.
|
||||
DiscoverDevices();
|
||||
// DiscoverAmdgpuDevices() will seach for devices and monitors and update
|
||||
// internal data structures.
|
||||
DiscoverAmdgpuDevices();
|
||||
|
||||
// IterateSMIDevices will iterate through all the known devices and apply
|
||||
// the provided call-back to each device found.
|
||||
@@ -264,7 +266,34 @@ RocmSMI::Initialize(uint64_t flags) {
|
||||
|
||||
if (ret != 0) {
|
||||
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
|
||||
"Failed to initialize rocm_smi library.");
|
||||
"Failed to initialize rocm_smi library (amdgpu node discovery.");
|
||||
}
|
||||
|
||||
std::map<uint64_t, std::shared_ptr<KFDNode>> tmp_map;
|
||||
ret = DiscoverKFDNodes(&tmp_map);
|
||||
if (ret != 0) {
|
||||
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
|
||||
"Failed to initialize rocm_smi library (KFD node discovery).");
|
||||
}
|
||||
|
||||
std::shared_ptr<amd::smi::Device> dev;
|
||||
|
||||
// 1. construct kfd_node_map_ with gpu_id as key and *Device as value
|
||||
// 2. for each kfd node, write the corresponding dv_ind
|
||||
// 3. for each amdgpu device, write the corresponding gpu_id
|
||||
for (uint32_t dv_ind = 0; dv_ind < s_monitor_devices.size(); ++dv_ind) {
|
||||
dev = s_monitor_devices[dv_ind];
|
||||
uint64_t bdfid = dev->bdfid();
|
||||
assert(tmp_map.find(bdfid) != tmp_map.end());
|
||||
if (tmp_map.find(bdfid) == tmp_map.end()) {
|
||||
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
|
||||
"amdgpu device bdfid has no KFD matching node");
|
||||
}
|
||||
|
||||
tmp_map[bdfid]->set_amdgpu_dev_index(dv_ind);
|
||||
uint64_t gpu_id = tmp_map[bdfid]->gpu_id();
|
||||
dev->set_kfd_gpu_id(gpu_id);
|
||||
kfd_node_map_[gpu_id] = tmp_map[bdfid];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -345,10 +374,11 @@ RocmSMI::AddToDeviceList(std::string dev_name) {
|
||||
}
|
||||
|
||||
std::string d_name = dev_name;
|
||||
uint32_t d_index = GetDeviceIndex(d_name);
|
||||
uint32_t card_indx = GetDeviceIndex(d_name);
|
||||
dev->set_drm_render_minor(GetDrmRenderMinor(dev_path));
|
||||
dev->set_index(d_index);
|
||||
GetSupportedEventGroups(d_index, dev->supported_event_groups());
|
||||
dev->set_card_index(card_indx);
|
||||
GetSupportedEventGroups(card_indx, dev->supported_event_groups());
|
||||
|
||||
devices_.push_back(dev);
|
||||
|
||||
return;
|
||||
@@ -381,7 +411,7 @@ static bool isAMDGPU(std::string dev_path) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t RocmSMI::DiscoverDevices(void) {
|
||||
uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
auto ret = 0;
|
||||
|
||||
// If this gets called more than once, clear previous findings.
|
||||
|
||||
@@ -303,6 +303,7 @@ static int get_supported_sensors(std::string dir_path, std::string fn_reg_ex,
|
||||
std::string::size_type pos = fn_reg_ex.find('#');
|
||||
|
||||
if (pos == std::string::npos) {
|
||||
closedir(hwmon_dir);
|
||||
return -1;
|
||||
}
|
||||
fn_reg_ex.erase(pos, 1);
|
||||
@@ -326,12 +327,16 @@ static int get_supported_sensors(std::string dir_path, std::string fn_reg_ex,
|
||||
assert(errno == 0);
|
||||
assert(*endptr == '\0');
|
||||
if (errno) {
|
||||
closedir(hwmon_dir);
|
||||
return -2;
|
||||
}
|
||||
sensors->push_back(mon_val);
|
||||
}
|
||||
dentry = readdir(hwmon_dir);
|
||||
}
|
||||
if (closedir(hwmon_dir)) {
|
||||
return errno;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -96,7 +96,8 @@ void TestIdInfoRead::Run(void) {
|
||||
|
||||
for (uint32_t i = 0; i < num_monitor_devs(); ++i) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Device index: " << id << std::endl;
|
||||
std::cout << "\t*************************" << std::endl;
|
||||
std::cout << "\t**Device index: " << i << std::endl;
|
||||
}
|
||||
|
||||
// Get the device ID, name, vendor ID and vendor name for the device
|
||||
|
||||
@@ -98,6 +98,10 @@ void TestProcInfoRead::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
|
||||
uint32_t num_devices;
|
||||
err = rsmi_num_monitor_devices(&num_devices);
|
||||
CHK_ERR_ASRT(err)
|
||||
|
||||
err = rsmi_compute_process_info_get(nullptr, &num_proc_found);
|
||||
if (err != RSMI_STATUS_SUCCESS) {
|
||||
if (err == RSMI_STATUS_NOT_SUPPORTED) {
|
||||
@@ -119,6 +123,7 @@ void TestProcInfoRead::Run(void) {
|
||||
if (num_proc_found == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
procs = new rsmi_process_info_t[num_proc_found];
|
||||
|
||||
val_ui32 = num_proc_found;
|
||||
@@ -150,8 +155,32 @@ void TestProcInfoRead::Run(void) {
|
||||
// Reset to the number we actually read
|
||||
num_proc_found = val_ui32;
|
||||
if (num_proc_found) {
|
||||
rsmi_process_info_t proc_info;
|
||||
// Allocate the max we expect to get
|
||||
uint32_t *dev_inds = new uint32_t[num_devices];
|
||||
uint32_t amt_allocd = num_devices;
|
||||
|
||||
for (uint32_t j = 0; j < num_proc_found; j++) {
|
||||
err = rsmi_compute_process_gpus_get(procs[j].process_id, dev_inds,
|
||||
&amt_allocd);
|
||||
CHK_ERR_ASRT(err)
|
||||
ASSERT_LE(amt_allocd, num_devices);
|
||||
|
||||
std::cout << "\t**Process " << procs[j].process_id <<
|
||||
" is using devices with indices: ";
|
||||
uint32_t i;
|
||||
if (amt_allocd > 0) {
|
||||
for (i = 0; i < amt_allocd - 1; ++i) {
|
||||
std::cout << dev_inds[i] << ", ";
|
||||
}
|
||||
std::cout << dev_inds[i] << std::endl;
|
||||
}
|
||||
// Reset amt_allocd back to the amount acutally allocated
|
||||
amt_allocd = num_devices;
|
||||
}
|
||||
|
||||
delete []dev_inds;
|
||||
|
||||
rsmi_process_info_t proc_info;
|
||||
err = rsmi_compute_process_info_by_pid_get(procs[0].process_id,
|
||||
&proc_info);
|
||||
if (err == RSMI_STATUS_NOT_FOUND) {
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur