Use the same mutex as rocm-smi
Share the same mutex as rocm-smi implementation. Handle the crash when a user is not in render group. Change-Id: I486b26569f9b523b41bbdaf95d51f4a730978cfd
Цей коміт міститься в:
зафіксовано
Shuzhou Liu
джерело
d74be3120e
коміт
5a6b5d2a0a
@@ -63,7 +63,6 @@ class AMDSmiGPUDevice: public AMDSmiProcessor {
|
||||
if (check_if_drm_is_supported()) this->get_drm_data();
|
||||
}
|
||||
~AMDSmiGPUDevice() {
|
||||
if (check_if_drm_is_supported()) shared_mutex_close(mutex_);
|
||||
}
|
||||
|
||||
amdsmi_status_t get_drm_data();
|
||||
@@ -91,7 +90,6 @@ class AMDSmiGPUDevice: public AMDSmiProcessor {
|
||||
amdsmi_bdf_t bdf_;
|
||||
uint32_t vendor_id_;
|
||||
AMDSmiDrm& drm_;
|
||||
shared_mutex_t mutex_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -131,6 +131,7 @@ amdsmi_status_t AMDSmiDrm::init() {
|
||||
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
|
||||
auto devices = smi.devices();
|
||||
|
||||
bool has_valid_fds = false;
|
||||
for (uint32_t i=0; i < devices.size(); i++) {
|
||||
auto rocm_smi_device = devices[i];
|
||||
std::string render_file_name;
|
||||
@@ -171,6 +172,7 @@ amdsmi_status_t AMDSmiDrm::init() {
|
||||
continue;
|
||||
}
|
||||
|
||||
has_valid_fds = true;
|
||||
bdf.fields.function_number = device->businfo.pci->func;
|
||||
bdf.fields.device_number = device->businfo.pci->dev;
|
||||
bdf.fields.bus_number = device->businfo.pci->bus;
|
||||
@@ -182,6 +184,12 @@ amdsmi_status_t AMDSmiDrm::init() {
|
||||
drm_free_device(&device);
|
||||
}
|
||||
|
||||
// cannot find any valid fds.
|
||||
if (!has_valid_fds) {
|
||||
drm_bdfs_.clear();
|
||||
return AMDSMI_STATUS_INIT_ERROR;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -315,7 +323,7 @@ std::vector<std::string>& AMDSmiDrm::get_drm_paths() {
|
||||
}
|
||||
|
||||
bool AMDSmiDrm::check_if_drm_is_supported() {
|
||||
return drm_cmd_write_ != NULL ? true : false;
|
||||
return (drm_cmd_write_ != NULL && drm_bdfs_.size() >0) ? true : false;
|
||||
}
|
||||
|
||||
std::vector<amdsmi_bdf_t> AMDSmiDrm::get_bdfs() {
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
|
||||
#include <functional>
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
|
||||
namespace amd {
|
||||
@@ -80,11 +81,6 @@ amdsmi_status_t AMDSmiGPUDevice::get_drm_data() {
|
||||
ret = drm_.get_bdf_by_index(gpu_id_, &bdf);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
mutex_ = shared_mutex_init(path.c_str(), 0777);
|
||||
if (mutex_.ptr == nullptr) {
|
||||
printf("Failed to create shared mem. mutex.");
|
||||
return AMDSMI_STATUS_INIT_ERROR;
|
||||
}
|
||||
bdf_ = bdf, path_ = path, fd_ = fd;
|
||||
vendor_id_ = drm_.get_vendor_id();
|
||||
|
||||
@@ -92,7 +88,7 @@ amdsmi_status_t AMDSmiGPUDevice::get_drm_data() {
|
||||
}
|
||||
|
||||
pthread_mutex_t* AMDSmiGPUDevice::get_mutex() {
|
||||
return mutex_.ptr;
|
||||
return amd::smi::GetMutex(gpu_id_);
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_info(unsigned info_id,
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача