From 750704720b5e4a51e27c1ace2d0845045805b92c Mon Sep 17 00:00:00 2001 From: "Bill(Shuzhou) Liu" Date: Thu, 21 Mar 2024 15:25:35 -0500 Subject: [PATCH] Unlock the mutex when process is dead After the dead process is detected, pthread_mutex_consistent() will be called. After that, the pthread_mutex_unlock() should also be called to unlock it: "It is the responsibility of the application to recover the state so it can be reused." Change-Id: I45d3e2e68c3b06779f3acb1e908dbec0c6a39297 --- third_party/shared_mutex/shared_mutex.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/third_party/shared_mutex/shared_mutex.cc b/third_party/shared_mutex/shared_mutex.cc index b1b8dade38..a1f711fd12 100755 --- a/third_party/shared_mutex/shared_mutex.cc +++ b/third_party/shared_mutex/shared_mutex.cc @@ -48,10 +48,13 @@ static std::vector lsof(const char* filename) { DIR *dp = nullptr; std::vector process_id; + pid_t cur_pid = getpid(); dp = opendir("/proc"); if (dp != nullptr) { while ((entry = readdir(dp))) { std::string id(entry->d_name); + // ignore current process + if (id == std::to_string(cur_pid)) continue; // the process id should be a number if (std::all_of(id.begin(), id.end(), ::isdigit)) { process_id.push_back(entry->d_name); @@ -132,6 +135,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { int ret; ret = pthread_mutex_timedlock(mutex_ptr, &expireTime); + pid_t cur_pid = getpid(); if (ret == EOWNERDEAD) { ret = pthread_mutex_consistent(mutex_ptr); @@ -149,6 +153,12 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { throw amd::smi::rsmi_exception(RSMI_STATUS_BUSY, __FUNCTION__); return mutex; } + + fprintf(stderr, "%s: %d detected dead process, and make mutex consistent.\n", name, cur_pid); + // The mutex is locked even if EOWNERDEAD was returned,and need to unlock it. + if (pthread_mutex_unlock(mutex_ptr)) { + perror("pthread_mutex_unlock"); + } } else if (ret || (mutex.created == 0 && reinterpret_cast(addr)->ptr == NULL)) { // Something is out of sync. @@ -158,6 +168,7 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { if (!retried) { std::vector ids = lsof(name); if (ids.size() == 0) { // no process is using it + fprintf(stderr, "%s: %d re-init the mutex since no one use it.\n", name, cur_pid); memset(mutex_ptr, 0, sizeof(pthread_mutex_t)); // Set mutex.created == 1 so that it can be initialized latter. mutex.created = 1; @@ -181,7 +192,8 @@ shared_mutex_t shared_mutex_init(const char *name, mode_t mode, bool retried) { } } - if (mutex.created) { + // also need to set the attribute when retried as the mutex is re-initialized. + if (mutex.created || retried) { pthread_mutexattr_t attr; if (pthread_mutexattr_init(&attr)) { perror("pthread_mutexattr_init");