diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 12e269590f..fd4ace7f9f 100755 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -3435,7 +3435,8 @@ rsmi_dev_gpu_reset(uint32_t dv_ind) { ss << __PRETTY_FUNCTION__ << "| ======= start ======="; LOG_TRACE(ss); REQUIRE_ROOT_ACCESS - DEVICE_MUTEX + // No longer using DEVICE_MUTEX as it blocks long running processes + // DEVICE_MUTEX rsmi_status_t ret; uint64_t status_code = 0; diff --git a/third_party/shared_mutex/shared_mutex.cc b/third_party/shared_mutex/shared_mutex.cc index 414faa1352..59504ee1fc 100755 --- a/third_party/shared_mutex/shared_mutex.cc +++ b/third_party/shared_mutex/shared_mutex.cc @@ -255,10 +255,11 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { // When process crash before unlock the mutex, the mutex is in bad status. // reset the mutex if no process is using it, and then retry lock if (!retried) { - std::vector ids = lsof(name); + std::string shared_mutex_filename = "/dev/shm" + std::string(name); + std::vector ids = lsof(shared_mutex_filename.c_str()); if (ids.size() == 0) { // no process is using it fprintf(stderr, "%d re-init the mutex %s since no one use it. ret:%d ptr:%p\n", - cur_pid, name, ret, reinterpret_cast(addr)->ptr); + cur_pid, shared_mutex_filename, ret, reinterpret_cast(addr)->ptr); memset(mutex_ptr, 0, sizeof(pthread_mutex_t)); // Set mutex.created == 1 so that it can be initialized latter. mutex.created = 1;