diff --git a/include/amd_smi/impl/amd_smi_gpu_device.h b/include/amd_smi/impl/amd_smi_gpu_device.h index b512ce3e76..527d52774f 100644 --- a/include/amd_smi/impl/amd_smi_gpu_device.h +++ b/include/amd_smi/impl/amd_smi_gpu_device.h @@ -63,7 +63,6 @@ class AMDSmiGPUDevice: public AMDSmiProcessor { if (check_if_drm_is_supported()) this->get_drm_data(); } ~AMDSmiGPUDevice() { - if (check_if_drm_is_supported()) shared_mutex_close(mutex_); } amdsmi_status_t get_drm_data(); @@ -91,7 +90,6 @@ class AMDSmiGPUDevice: public AMDSmiProcessor { amdsmi_bdf_t bdf_; uint32_t vendor_id_; AMDSmiDrm& drm_; - shared_mutex_t mutex_; }; diff --git a/src/amd_smi/amd_smi_drm.cc b/src/amd_smi/amd_smi_drm.cc index a3b2fbc79e..3d0f4558c3 100644 --- a/src/amd_smi/amd_smi_drm.cc +++ b/src/amd_smi/amd_smi_drm.cc @@ -131,6 +131,7 @@ amdsmi_status_t AMDSmiDrm::init() { amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance(); auto devices = smi.devices(); + bool has_valid_fds = false; for (uint32_t i=0; i < devices.size(); i++) { auto rocm_smi_device = devices[i]; std::string render_file_name; @@ -171,6 +172,7 @@ amdsmi_status_t AMDSmiDrm::init() { continue; } + has_valid_fds = true; bdf.fields.function_number = device->businfo.pci->func; bdf.fields.device_number = device->businfo.pci->dev; bdf.fields.bus_number = device->businfo.pci->bus; @@ -182,6 +184,12 @@ amdsmi_status_t AMDSmiDrm::init() { drm_free_device(&device); } + // cannot find any valid fds. + if (!has_valid_fds) { + drm_bdfs_.clear(); + return AMDSMI_STATUS_INIT_ERROR; + } + return AMDSMI_STATUS_SUCCESS; } @@ -315,7 +323,7 @@ std::vector& AMDSmiDrm::get_drm_paths() { } bool AMDSmiDrm::check_if_drm_is_supported() { - return drm_cmd_write_ != NULL ? true : false; + return (drm_cmd_write_ != NULL && drm_bdfs_.size() >0) ? true : false; } std::vector AMDSmiDrm::get_bdfs() { diff --git a/src/amd_smi/amd_smi_gpu_device.cc b/src/amd_smi/amd_smi_gpu_device.cc index b303e82ea3..45d419f2e8 100644 --- a/src/amd_smi/amd_smi_gpu_device.cc +++ b/src/amd_smi/amd_smi_gpu_device.cc @@ -43,6 +43,7 @@ #include #include "amd_smi/impl/amd_smi_gpu_device.h" +#include "rocm_smi/rocm_smi_utils.h" namespace amd { @@ -80,11 +81,6 @@ amdsmi_status_t AMDSmiGPUDevice::get_drm_data() { ret = drm_.get_bdf_by_index(gpu_id_, &bdf); if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED; - mutex_ = shared_mutex_init(path.c_str(), 0777); - if (mutex_.ptr == nullptr) { - printf("Failed to create shared mem. mutex."); - return AMDSMI_STATUS_INIT_ERROR; - } bdf_ = bdf, path_ = path, fd_ = fd; vendor_id_ = drm_.get_vendor_id(); @@ -92,7 +88,7 @@ amdsmi_status_t AMDSmiGPUDevice::get_drm_data() { } pthread_mutex_t* AMDSmiGPUDevice::get_mutex() { - return mutex_.ptr; + return amd::smi::GetMutex(gpu_id_); } amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_info(unsigned info_id,