diff --git a/hipamd/src/hip_internal.hpp b/hipamd/src/hip_internal.hpp index 3352feacec..e9ff22f984 100644 --- a/hipamd/src/hip_internal.hpp +++ b/hipamd/src/hip_internal.hpp @@ -144,15 +144,6 @@ const char* ihipGetErrorName(hipError_t hip_error); __func__, hip::ihipGetErrorName(err), ToString( __VA_ARGS__ ).c_str()); #define HIP_INIT_API_INTERNAL(noReturn, cid, ...) \ - amd::Thread* thread = amd::Thread::current(); \ - if (!VDI_CHECK_THREAD(thread)) { \ - ClPrint(amd::LOG_NONE, amd::LOG_ALWAYS, \ - "An internal error has occurred." \ - " This may be due to insufficient memory."); \ - if (!noReturn) { \ - return hipErrorOutOfMemory; \ - } \ - } \ HIP_INIT(noReturn) \ HIP_API_PRINT(__VA_ARGS__) \ HIP_CB_SPAWNER_OBJECT(cid); diff --git a/hipamd/src/hip_runtime.cpp b/hipamd/src/hip_runtime.cpp index 41edff8b34..50b98ec237 100644 --- a/hipamd/src/hip_runtime.cpp +++ b/hipamd/src/hip_runtime.cpp @@ -51,18 +51,11 @@ extern "C" BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) { #endif // DEBUG hip::PlatformState::instance().setDynamicLibraryHandle(static_cast(hinst)); break; - case DLL_PROCESS_DETACH: { - amd::Thread* thread = amd::Thread::current(); - if (!(thread != nullptr || - ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { - return true; - } + case DLL_PROCESS_DETACH: hip::ihipDestroyDevice(); - } break; - case DLL_THREAD_DETACH: { - amd::Thread* thread = amd::Thread::current(); - delete thread; - } break; + break; + case DLL_THREAD_DETACH: + break; default: break; } diff --git a/hipamd/src/hiprtc/hiprtcInternal.hpp b/hipamd/src/hiprtc/hiprtcInternal.hpp index 1ef807a1e5..79879c8ec0 100644 --- a/hipamd/src/hiprtc/hiprtcInternal.hpp +++ b/hipamd/src/hiprtc/hiprtcInternal.hpp @@ -80,13 +80,6 @@ template inline std::string ToString(T first, Arg // hiprtcInit lock static amd::Monitor g_hiprtcInitlock{}; #define HIPRTC_INIT_API_INTERNAL(...) \ - amd::Thread* thread = amd::Thread::current(); \ - if (!VDI_CHECK_THREAD(thread)) { \ - ClPrint(amd::LOG_NONE, amd::LOG_ALWAYS, \ - "An internal error has occurred." \ - " This may be due to insufficient memory."); \ - HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ - } \ amd::ScopedLock lock(g_hiprtcInitlock); \ if (!amd::Flag::init()) { \ HIPRTC_RETURN(HIPRTC_ERROR_INTERNAL_ERROR); \ diff --git a/opencl/amdocl/cl_runtime.cpp b/opencl/amdocl/cl_runtime.cpp index c4b9f42502..6ce0507ca2 100644 --- a/opencl/amdocl/cl_runtime.cpp +++ b/opencl/amdocl/cl_runtime.cpp @@ -48,10 +48,8 @@ extern "C" BOOL WINAPI DllMain(HINSTANCE hinst, DWORD reason, LPVOID reserved) { case DLL_PROCESS_DETACH: amd::Runtime::setLibraryDetached(); break; - case DLL_THREAD_DETACH: { - amd::Thread* thread = amd::Thread::current(); - delete thread; - } break; + case DLL_THREAD_DETACH: + break; default: break; } diff --git a/rocclr/cmake/ROCclr.cmake b/rocclr/cmake/ROCclr.cmake index 2241b6e924..8b49a9a4f4 100644 --- a/rocclr/cmake/ROCclr.cmake +++ b/rocclr/cmake/ROCclr.cmake @@ -86,8 +86,6 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/platform/program.cpp ${ROCCLR_SRC_DIR}/platform/runtime.cpp ${ROCCLR_SRC_DIR}/platform/interop_gl.cpp - ${ROCCLR_SRC_DIR}/thread/monitor.cpp - ${ROCCLR_SRC_DIR}/thread/semaphore.cpp ${ROCCLR_SRC_DIR}/thread/thread.cpp ${ROCCLR_SRC_DIR}/utils/debug.cpp ${ROCCLR_SRC_DIR}/utils/flags.cpp) diff --git a/rocclr/device/rocm/rocvirtual.cpp b/rocclr/device/rocm/rocvirtual.cpp index 23b3102ca7..f51fd35e52 100644 --- a/rocclr/device/rocm/rocvirtual.cpp +++ b/rocclr/device/rocm/rocvirtual.cpp @@ -186,12 +186,6 @@ void Timestamp::checkGpuTime() { bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { Timestamp* ts = reinterpret_cast(arg); - amd::Thread* thread = amd::Thread::current(); - if (!(thread != nullptr || - ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { - return false; - } - if (amd::activity_prof::IsEnabled(OP_ID_DISPATCH)) { amd::Command* head = ts->getParsedCommand(); if (head == nullptr) { diff --git a/rocclr/include/vdi_common.hpp b/rocclr/include/vdi_common.hpp index 7e234f546e..ba09e721ce 100644 --- a/rocclr/include/vdi_common.hpp +++ b/rocclr/include/vdi_common.hpp @@ -76,46 +76,25 @@ not_null(T* ptrOrNull) return amd::NotNullReference(ptrOrNull); } -#define VDI_CHECK_THREAD(thread) \ - (thread != NULL || ((thread = new amd::HostThread()) != NULL \ - && thread == amd::Thread::current())) - #define RUNTIME_ENTRY_RET(ret, func, args) \ CL_API_ENTRY ret CL_API_CALL \ func args \ { \ - amd::Thread* thread = amd::Thread::current(); \ - if (!VDI_CHECK_THREAD(thread)) { \ - *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; \ - return (ret) 0; \ - } #define RUNTIME_ENTRY_RET_NOERRCODE(ret, func, args) \ CL_API_ENTRY ret CL_API_CALL \ func args \ { \ - amd::Thread* thread = amd::Thread::current(); \ - if (!VDI_CHECK_THREAD(thread)) { \ - return (ret) 0; \ - } #define RUNTIME_ENTRY(ret, func, args) \ CL_API_ENTRY ret CL_API_CALL \ func args \ { \ - amd::Thread* thread = amd::Thread::current(); \ - if (!VDI_CHECK_THREAD(thread)) { \ - return CL_OUT_OF_HOST_MEMORY; \ - } #define RUNTIME_ENTRY_VOID(ret, func, args) \ CL_API_ENTRY ret CL_API_CALL \ func args \ { \ - amd::Thread* thread = amd::Thread::current(); \ - if (!VDI_CHECK_THREAD(thread)) { \ - return; \ - } #define RUNTIME_EXIT \ /* FIXME_lmoriche: we should check to thread->lastError here! */ \ diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index f9df497add..5382f2e1e7 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -51,7 +51,10 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti if (thread_.state() >= Thread::INITIALIZED) { ScopedLock sl(queueLock_); thread_.start(this); - queueLock_.wait(); + // wait for HostQueue::loop() to update acceptingCommands_ as true + while (!thread_.acceptingCommands_) { + queueLock_.wait(); + } } } } @@ -204,6 +207,7 @@ void HostQueue::loop(device::VirtualDevice* virtualDevice) { // Notify the caller that the queue is ready to accept commands. { ScopedLock sl(queueLock_); + // Notify HostQueue() that acceptingCommands_ is updated to true thread_.acceptingCommands_ = true; queueLock_.notify(); } diff --git a/rocclr/thread/monitor.cpp b/rocclr/thread/monitor.cpp deleted file mode 100644 index 6ebecb7a53..0000000000 --- a/rocclr/thread/monitor.cpp +++ /dev/null @@ -1,397 +0,0 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include "thread/monitor.hpp" -#include "thread/semaphore.hpp" -#include "thread/thread.hpp" -#include "utils/util.hpp" - -#include -#include -#include -#include - -namespace amd { - -namespace legacy_monitor { - -Monitor::Monitor(bool recursive) - : contendersList_(0), onDeck_(0), waitersList_(NULL), owner_(NULL), recursive_(recursive) {} - -bool Monitor::trySpinLock() { - if (tryLock()) { - return true; - } - - for (int s = kMaxSpinIter; s > 0; --s) { - // First, be SMT friendly - if (s >= (kMaxSpinIter - kMaxReadSpinIter)) { - Os::spinPause(); - } - // and then SMP friendly - else { - Thread::yield(); - } - if (!isLocked()) { - return tryLock(); - } - } - - // We could not acquire the lock in the spin loop. - return false; -} - -void Monitor::finishLock() { - Thread* thread = Thread::current(); - assert(thread != NULL && "cannot lock() from (null)"); - - if (trySpinLock()) { - return; // We succeeded, we are done. - } - - /* The lock is contended. Push the thread's semaphore onto - * the contention list. - */ - Semaphore& semaphore = thread->lockSemaphore(); - semaphore.reset(); - - LinkedNode newHead; - newHead.setItem(&semaphore); - - intptr_t head = contendersList_.load(std::memory_order_acquire); - for (;;) { - // The assumption is that lockWord is locked. Make sure we do not - // continue unless the lock bit is set. - if ((head & kLockBit) == 0) { - if (tryLock()) { - return; - } - continue; - } - - // Set the new contention list head if lockWord is unchanged. - newHead.setNext(reinterpret_cast(head & ~kLockBit)); - if (contendersList_.compare_exchange_weak(head, reinterpret_cast(&newHead) | kLockBit, - std::memory_order_acq_rel, - std::memory_order_acquire)) { - break; - } - - // We failed the CAS. yield/pause before trying again. - Thread::yield(); - } - - int32_t spinCount = 0; - // Go to sleep until we become the on-deck thread. - while ((onDeck_ & ~kLockBit) != reinterpret_cast(&semaphore)) { - // First, be SMT friendly - if (spinCount < kMaxReadSpinIter) { - Os::spinPause(); - } - // and then SMP friendly - else if (spinCount < kMaxSpinIter) { - Thread::yield(); - } - // now go to sleep - else { - semaphore.wait(); - } - spinCount++; - } - - spinCount = 0; - // - // From now-on, we are the on-deck thread. It will stay that way until - // we successfuly acquire the lock. - // - for (;;) { - assert((onDeck_ & ~kLockBit) == reinterpret_cast(&semaphore) && "just checking"); - if (tryLock()) { - break; - } - - // Somebody beat us to it. Since we are on-deck, we can just go - // back to sleep. - // First, be SMT friendly - if (spinCount < kMaxReadSpinIter) { - Os::spinPause(); - } - // and then SMP friendly - else if (spinCount < kMaxSpinIter) { - Thread::yield(); - } - // now go to sleep - else { - semaphore.wait(); - } - spinCount++; - } - - assert(newHead.next() == NULL && "Should not be linked"); - onDeck_ = 0; -} - -void Monitor::finishUnlock() { - // If we get here, it means that there might be a thread in the contention - // list waiting to acquire the lock. We need to select a successor and - // place it on-deck. - - for (;;) { - // Grab the onDeck_ microlock to protect the next loop (make sure only - // one semaphore is removed from the contention list). - // - intptr_t ptr = 0; - if (!onDeck_.compare_exchange_strong(ptr, ptr | kLockBit, std::memory_order_acq_rel, - std::memory_order_acquire)) { - return; // Somebody else has the microlock, let him select onDeck_ - } - - intptr_t head = contendersList_.load(std::memory_order_acquire); - for (;;) { - if (head == 0) { - break; // There's nothing else to do. - } - - if ((head & kLockBit) != 0) { - // Somebody could have acquired then released the lock - // and failed to grab the onDeck_ microlock. - head = 0; - break; - } - - if (contendersList_.compare_exchange_weak( - head, reinterpret_cast(reinterpret_cast(head)->next()), - std::memory_order_acq_rel, std::memory_order_acquire)) { -#ifdef ASSERT - reinterpret_cast(head)->setNext(NULL); -#endif // ASSERT - break; - } - } - - Semaphore* semaphore = (head != 0) ? reinterpret_cast(head)->item() : NULL; - - onDeck_.store(reinterpret_cast(semaphore), std::memory_order_release); - // - // Release the onDeck_ microlock (end of critical region); - - if (semaphore != NULL) { - semaphore->post(); - return; - } - - // A StoreLoad barrier is required to make sure the onDeck_ store is published before - // the contendersList_ micro-lock check. - std::atomic_thread_fence(std::memory_order_seq_cst); - - // We do not have an on-deck thread (semaphore == NULL). Return if - // the contention list is empty or if the lock got acquired again. - head = contendersList_; - if (head == 0 || (head & kLockBit) != 0) { - return; - } - } -} - -void Monitor::wait() { - Thread* thread = Thread::current(); - assert(isLocked() && owner_ == thread && "just checking"); - - // Add the thread's resume semaphore to the list. - Semaphore& suspend = thread->suspendSemaphore(); - suspend.reset(); - - LinkedNode newHead; - newHead.setItem(&suspend); - newHead.setNext(waitersList_); - waitersList_ = &newHead; - - // Preserve the lock count (for recursive mutexes) - uint32_t lockCount = lockCount_; - lockCount_ = 1; - - // Release the lock and go to sleep. - unlock(); - - // Go to sleep until we become the on-deck thread. - int32_t spinCount = 0; - while ((onDeck_ & ~kLockBit) != reinterpret_cast(&suspend)) { - // First, be SMT friendly - if (spinCount < kMaxReadSpinIter) { - Os::spinPause(); - } - // and then SMP friendly - else if (spinCount < kMaxSpinIter) { - Thread::yield(); - } - // now go to sleep - else { - suspend.timedWait(10); - } - spinCount++; - } - - spinCount = 0; - for (;;) { - assert((onDeck_ & ~kLockBit) == reinterpret_cast(&suspend) && "just checking"); - - if (trySpinLock()) { - break; - } - - // Somebody beat us to it. Since we are on-deck, we can just go - // back to sleep. - // First, be SMT friendly - if (spinCount < kMaxReadSpinIter) { - Os::spinPause(); - } - // and then SMP friendly - else if (spinCount < kMaxSpinIter) { - Thread::yield(); - } - // now go to sleep - else { - suspend.wait(); - } - spinCount++; - } - - // Restore the lock count (for recursive mutexes) - lockCount_ = lockCount; - - onDeck_.store(0, std::memory_order_release); -} - -void Monitor::notify() { - assert(isLocked() && owner_ == Thread::current() && "just checking"); - - LinkedNode* waiter = waitersList_; - if (waiter == NULL) { - return; - } - - // Dequeue a waiter from the wait list and add it to the contention list. - waitersList_ = waiter->next(); - - intptr_t node = contendersList_.load(std::memory_order_acquire); - for (;;) { - waiter->setNext(reinterpret_cast(node & ~kLockBit)); - if (contendersList_.compare_exchange_weak(node, reinterpret_cast(waiter) | kLockBit, - std::memory_order_acq_rel, - std::memory_order_acquire)) { - break; - } - } -} - -void Monitor::notifyAll() { - // NOTE: We could CAS the whole list in 1 shot but this is - // not critical code. Optimize this if it becomes hot. - while (waitersList_ != NULL) { - notify(); - } -} - -bool Monitor::tryLock() { - Thread* thread = Thread::current(); - assert(thread != NULL && "cannot lock() from (null)"); - - intptr_t ptr = contendersList_.load(std::memory_order_acquire); - - if (unlikely((ptr & kLockBit) != 0)) { - if (recursive_ && thread == owner_) { - // Recursive lock: increment the lock count and return. - ++lockCount_; - return true; - } - return false; // Already locked! - } - - if (unlikely(!contendersList_.compare_exchange_weak( - ptr, ptr | kLockBit, std::memory_order_acq_rel, std::memory_order_acquire))) { - return false; // We failed the CAS from unlocked to locked. - } - - setOwner(thread); // cannot move above the CAS. - lockCount_ = 1; - - return true; -} - -void Monitor::lock() { - if (unlikely(!tryLock())) { - // The lock is contented. - finishLock(); - } - - // This is the beginning of the critical region. From now-on, everything - // executes single-threaded! - // -} - -void Monitor::unlock() { - assert(isLocked() && owner_ == Thread::current() && "invariant"); - - if (recursive_ && --lockCount_ > 0) { - // was a recursive lock case, simply return. - return; - } - - setOwner(NULL); - - // Clear the lock bit. - intptr_t ptr = contendersList_.load(std::memory_order_acquire); - while (!contendersList_.compare_exchange_weak(ptr, ptr & ~kLockBit, std::memory_order_acq_rel, - std::memory_order_acquire)) - ; - - // A StoreLoad barrier is required to make sure future loads do not happen before the - // contendersList_ store is published. - std::atomic_thread_fence(std::memory_order_seq_cst); - - // - // We succeeded the CAS from locked to unlocked. - // This is the end of the critical region. - - // Check if we have an on-deck thread that needs signaling. - intptr_t onDeck = onDeck_; - if (onDeck != 0) { - if ((onDeck & kLockBit) == 0) { - // Only signal if it is unmarked. - reinterpret_cast(onDeck)->post(); - } - return; // We are done. - } - - // We do not have an on-deck thread yet, we might have to walk the list in - // order to select the next onDeck_. Only one thread needs to fill onDeck_, - // so return if the list is empty or if the lock got acquired again (it's - // somebody else's problem now!) - - intptr_t head = contendersList_; - if (head == 0 || (head & kLockBit) != 0) { - return; - } - - // Finish the unlock operation: find a thread to wake up. - finishUnlock(); -} -} // namespace legacy_monitor -} // namespace amd diff --git a/rocclr/thread/monitor.hpp b/rocclr/thread/monitor.hpp index 2c6b7f6a30..e538625e6c 100644 --- a/rocclr/thread/monitor.hpp +++ b/rocclr/thread/monitor.hpp @@ -23,141 +23,16 @@ #include "top.hpp" #include "utils/flags.hpp" -#include "thread/semaphore.hpp" -#include "thread/thread.hpp" #include #include #include #include #include #include +#include "os/os.hpp" namespace amd { -/*! \addtogroup Threads - * @{ - * - * \addtogroup Synchronization - * @{ - */ - -namespace details { - -template struct SimplyLinkedNode : public AllocClass { - typedef SimplyLinkedNode Node; - - protected: - std::atomic next_; /*!< \brief The next element. */ - T volatile item_; - - public: - //! \brief Return the next element in the linked-list. - Node* next() const { return next_; } - //! \brief Return the item. - T item() const { return item_; } - - //! \brief Set the next element pointer. - void setNext(Node* next) { next_ = next; } - //! \brief Set the item. - void setItem(T item) { item_ = item; } - - //! \brief Swap the next element pointer. - Node* swapNext(Node* next) { return next_.swap(next); } - - //! \brief Compare and set the next element pointer. - bool compareAndSetNext(Node* compare, Node* next) { - return next_.compare_exchange_strong(compare, next); - } -}; - -} // namespace details - -namespace legacy_monitor { -class Monitor { - typedef details::SimplyLinkedNode LinkedNode; - - private: - static constexpr intptr_t kLockBit = 0x1; - - static constexpr int kMaxSpinIter = 55; //!< Total number of spin iterations. - static constexpr int kMaxReadSpinIter = 50; //!< Read iterations before yielding - - /*! Linked list of semaphores the contending threads are waiting on - * and main lock. - */ - std::atomic_intptr_t contendersList_; - - //! Semaphore of the next thread to contend for the lock. - std::atomic_intptr_t onDeck_; - //! Linked list of the suspended threads resume semaphores. - LinkedNode* volatile waitersList_; - - //! Thread owning this monitor. - Thread* volatile owner_; - //! The amount of times this monitor was acquired by the owner. - uint32_t lockCount_; - //! True if this is a recursive mutex, false otherwise. - const bool recursive_; - - private: - //! Finish locking the mutex (contented case). - void finishLock(); - //! Finish unlocking the mutex (contented case). - void finishUnlock(); - - protected: - //! Try to spin-acquire the lock, return true if successful. - bool trySpinLock(); - - /*! \brief Return true if the lock is owned. - * - * \note The user is responsible for the memory ordering. - */ - bool isLocked() const { return (contendersList_ & kLockBit) != 0; } - - //! Return this monitor's owner thread (NULL if unlocked). - Thread* owner() const { return owner_; } - - //! Set the owner. - void setOwner(Thread* thread) { owner_ = thread; } - - public: - explicit Monitor(bool recursive = false); - ~Monitor() {} - - //! Try to acquire the lock, return true if successful. - bool tryLock(); - - //! Acquire the lock or suspend the calling thread. - void lock(); - - //! Release the lock and wake a single waiting thread if any. - void unlock(); - - /*! \brief Give up the lock and go to sleep. - * - * Calling wait() causes the current thread to go to sleep until - * another thread calls notify()/notifyAll(). - * - * \note The monitor must be owned before calling wait(). - */ - void wait(); - /*! \brief Wake up a single thread waiting on this monitor. - * - * \note The monitor must be owned before calling notify(). - */ - void notify(); - /*! \brief Wake up all threads that are waiting on this monitor. - * - * \note The monitor must be owned before calling notifyAll(). - */ - void notifyAll(); -}; - - -} // namespace legacy_monitor - -namespace mutex_monitor { class Monitor { public: explicit Monitor(bool recursive = false) : recursive_(recursive) { @@ -212,14 +87,13 @@ class Monitor { } // and then SMP friendly else { - Thread::yield(); + Os::yield(); } lk.lock(); } waits_.fetch_add(1, std::memory_order_acq_rel); lk.unlock(); - notifyState expextedNotifyState = notifyState::oneNotified; // expect that notify() is called // fast path c = 0; while (c < maxCount_ && @@ -230,10 +104,9 @@ class Monitor { } // and then SMP friendly else { - Thread::yield(); + Os::yield(); } c++; - expextedNotifyState = notifyState::oneNotified; } assert(c <= maxCount_ && "Error: c > maxCount_"); @@ -241,7 +114,7 @@ class Monitor { if (c == maxCount_) { // In case notify() is called between loop and here - expextedNotifyState = notifyState::oneNotified; + notifyState expextedNotifyState = notifyState::oneNotified; if (notifyState_.load(std::memory_order_acquire) != notifyState::allNotified && !notifyState_.compare_exchange_strong(expextedNotifyState, notifyState::notNotified, std::memory_order_acq_rel, std::memory_order_acquire)) { @@ -309,47 +182,6 @@ class Monitor { const int maxCount_{ 55 }; //!< Max count of spins in wait() const int maxReadSpinIter_{ 50 }; }; -} // namespace mutex_monitor - -// Monitor API wrapper to user -class Monitor { -public: - explicit Monitor(bool recursive = false){ - if (mode_) { - monitor_.emplace(recursive); - } else { - monitor_.emplace(recursive); - } - } - inline bool tryLock() { - return mode_ ? std::get(monitor_).tryLock() : - std::get(monitor_).tryLock(); - } - inline void lock() { - mode_ ? std::get(monitor_).lock() : - std::get(monitor_).lock(); - } - inline void unlock() { - mode_ ? std::get(monitor_).unlock() : - std::get(monitor_).unlock(); - } - inline void wait() { - mode_ ? std::get(monitor_).wait() : - std::get(monitor_).wait(); - } - inline void notify() { - mode_ ? std::get(monitor_).notify() : - std::get(monitor_).notify(); - } - inline void notifyAll() { - mode_ ? std::get(monitor_).notifyAll() : - std::get(monitor_).notifyAll(); - } - -private: - std::variant monitor_; - const bool mode_{DEBUG_CLR_USE_STDMUTEX_IN_AMD_MONITOR}; -}; class ScopedLock : StackObject { public: diff --git a/rocclr/thread/semaphore.cpp b/rocclr/thread/semaphore.cpp deleted file mode 100644 index 78f8addb03..0000000000 --- a/rocclr/thread/semaphore.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#include "thread/semaphore.hpp" -#include "thread/thread.hpp" - -#if defined(_WIN32) || defined(__CYGWIN__) -#include -#else // !_WIN32 -#include -#include -#include -#endif // !_WIN32 - -namespace amd { - -Semaphore::Semaphore() : state_(0) { -#ifdef _WIN32 - handle_ = static_cast(CreateSemaphore(NULL, 0, LONG_MAX, NULL)); - assert(handle_ != NULL && "CreateSemaphore failed"); -#else // !_WIN32 - if (sem_init(&sem_, 0, 0) != 0) { - fatal("sem_init() failed"); - } -#endif // !_WIN32 -} - -Semaphore::~Semaphore() { -#ifdef _WIN32 - if (!CloseHandle(static_cast(handle_))) { - fatal("CloseHandle() failed"); - } -#else // !_WIN32 - if (sem_destroy(&sem_) != 0) { - fatal("sem_destroy() failed"); - } -#endif // !WIN32 -} - -void Semaphore::post() { - int state = state_.load(std::memory_order_relaxed); - for (;;) { - if (state > 0) { - int newstate = state_.load(std::memory_order_acquire); - if (state == newstate) { - return; - } - state = newstate; - continue; - } - if (state_.compare_exchange_weak(state, state + 1, std::memory_order_acq_rel, - std::memory_order_acquire)) { - break; - } - } - - if (state < 0) { -// We have threads waiting on this event. -#ifdef _WIN32 - ReleaseSemaphore(static_cast(handle_), 1, NULL); -#else // !_WIN32 - if (0 != sem_post(&sem_)) { - fatal("sem_post() failed"); - } -#endif // !_WIN32 - } -} - -void Semaphore::wait() { - if (state_-- > 0) { - return; - } - -#ifdef _WIN32 - if (WAIT_OBJECT_0 != WaitForSingleObject(static_cast(handle_), INFINITE)) { - fatal("WaitForSingleObject failed"); - } -#else // !_WIN32 - while (0 != sem_wait(&sem_)) { - if (EINTR != errno) { - fatal("sem_wait() failed"); - } - } -#endif // !_WIN32 -} - -void Semaphore::timedWait(int millis) { - if (state_-- > 0) { - return; - } - -#ifdef _WIN32 - DWORD status = WaitForSingleObject(static_cast(handle_), millis); - if (WAIT_OBJECT_0 != status && WAIT_TIMEOUT != status) { - fatal("WaitForSingleObject failed"); - } -#else // !_WIN32 - struct timespec ts; - - if (clock_gettime(CLOCK_REALTIME, &ts) == -1) { - fatal("clock_gettime() failed"); - } - - ts.tv_sec += millis / 1000; - ts.tv_nsec += ((long)millis % 1000) * 1000000; - - if (ts.tv_nsec >= 1000000000) { - ts.tv_sec += 1; - ts.tv_nsec -= 1000000000; - } - - int status; - while ((status = sem_timedwait(&sem_, &ts)) != 0) { - if (ETIMEDOUT == errno) { - break; - } else if (EINTR != errno) { - fatal("sem_wait() failed"); - } - } -#endif // !_WIN32 -} - -} // namespace amd diff --git a/rocclr/thread/semaphore.hpp b/rocclr/thread/semaphore.hpp deleted file mode 100644 index 0615af0f83..0000000000 --- a/rocclr/thread/semaphore.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2008 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -#ifndef SEMAPHORE_HPP_ -#define SEMAPHORE_HPP_ - -#include "top.hpp" -#include "utils/util.hpp" - -#include -#if defined(__linux__) -#include -#endif /*linux*/ - - -namespace amd { - -/*! \addtogroup Threads - * @{ - * - * \addtogroup Synchronization - * @{ - */ - -class Thread; - -//! \brief Counting semaphore -class Semaphore : public HeapObject { - private: - std::atomic_int state_; //!< This semaphore's value. - -#ifdef _WIN32 - void* handle_; //!< The semaphore object's handle. -#else // !_WIN32 - sem_t sem_; //!< The semaphore object's identifier. -#endif /*!_WIN32*/ - -public: - Semaphore(); - ~Semaphore(); - - //! \brief Decrement this semaphore - void wait(); - void timedWait(int millis); - - //! \brief Increment this semaphore - void post(); - - //! \brief Reset this semaphore. - void reset() { state_.store(0, std::memory_order_release); } -}; - -/*! @} - * @} - */ - -} // namespace amd - -#endif /*SEMAPHORE_HPP_*/ diff --git a/rocclr/thread/thread.cpp b/rocclr/thread/thread.cpp index c7a6c35b90..cc429b56b4 100644 --- a/rocclr/thread/thread.cpp +++ b/rocclr/thread/thread.cpp @@ -19,7 +19,6 @@ THE SOFTWARE. */ #include "thread/thread.hpp" -#include "thread/semaphore.hpp" #include "thread/monitor.hpp" #include "os/os.hpp" @@ -29,19 +28,8 @@ namespace amd { -HostThread::HostThread() : Thread("HostThread", 0, false) { - setCurrent(); - Os::currentStackInfo(&stackBase_, &stackSize_); - setState(RUNNABLE); -} - void Thread::create() { - created_ = new Semaphore(); - lock_ = new Semaphore(); - suspend_ = new Semaphore(); - selfSuspendLock_ = new Monitor(); - data_ = NULL; handle_ = NULL; setState(CREATED); @@ -54,9 +42,11 @@ Thread::Thread(const std::string& name, size_t stackSize, bool spawn) if (!spawn) return; if ((handle_ = Os::createOsThread(this))) { - // Now we need to wait for Thread::main to report back. + // Now we need to wait for Thread::main() to report back. + ScopedLock sl(selfSuspendLock_); + // Wait for main() to update state as INITIALIZED while (state() != Thread::INITIALIZED) { - created_->wait(); + selfSuspendLock_->wait(); } } } @@ -67,10 +57,6 @@ Thread::~Thread() { ::CloseHandle((HANDLE)handle_); } #endif - delete created_; - delete lock_; - delete suspend_; - delete selfSuspendLock_; } @@ -84,11 +70,21 @@ void* Thread::main() { // Notify the parent thread that we are up and running. { ScopedLock sl(selfSuspendLock_); + // Notify parent thread that the state is update as INITIALIZED setState(INITIALIZED); - created_->post(); - selfSuspendLock_->wait(); + selfSuspendLock_->notify(); } + // Now we need to wait for Thread::start() to report back. + { + ScopedLock sl(selfSuspendLock_); + // wait for start() to update state as RUNNABLE + while (state() != Thread::RUNNABLE) { + selfSuspendLock_->wait(); + } + } + + if (state() == RUNNABLE) { run(data_); } @@ -103,8 +99,10 @@ bool Thread::start(void* data) { } data_ = data; + // Notify the thread that the parent thread are up and running. { ScopedLock sl(selfSuspendLock_); + // Notify main() that the state is updated as RUNNABLE setState(RUNNABLE); selfSuspendLock_->notify(); } @@ -166,8 +164,7 @@ bool Thread::init() { } initialized_ = true; - // Register the main thread - return NULL != new HostThread(); + return true; } void Thread::tearDown() { diff --git a/rocclr/thread/thread.hpp b/rocclr/thread/thread.hpp index ad88f7ffaa..f773c87b5b 100644 --- a/rocclr/thread/thread.hpp +++ b/rocclr/thread/thread.hpp @@ -22,7 +22,6 @@ #define THREAD_HPP_ #include "top.hpp" -#include "thread/semaphore.hpp" #include "os/os.hpp" #include @@ -61,12 +60,6 @@ class Thread : public HeapObject { //! The argument passed to run() void* data_; - //! \cond ignore - Semaphore* created_; //!< To notify the parent thread. - Semaphore* lock_; //!< For mutex support (during contention). - Semaphore* suspend_; //!< For wait/suspend support. - //! \endcond - Monitor* selfSuspendLock_; //!< For self suspend/resume. protected: @@ -148,11 +141,6 @@ class Thread : public HeapObject { //! Return this thread's stack bottom. address stackBottom() const { return stackBase() - stackSize(); } - //! Return this thread's contend semaphore. - Semaphore& lockSemaphore() const { return *lock_; } - //! Return this thread's resume semaphore. - Semaphore& suspendSemaphore() const { return *suspend_; } - //! Set this thread's affinity to the given cpu. void setAffinity(uint cpu_id) const { Os::setThreadAffinity(handle_, cpu_id); } @@ -160,22 +148,6 @@ class Thread : public HeapObject { void setAffinity(const Os::ThreadAffinityMask& mask) const { Os::setThreadAffinity(handle_, mask); } - - //! Yield to threads of the same priority of higher - static void yield() { Os::yield(); } -}; - -class HostThread : public Thread { - private: - //! A HostThread does not have a run function - virtual void run(void* data) { ShouldNotCallThis(); } - - public: - //! Construct a new HostThread - HostThread(); - - //! Return true is this is the host thread. - bool isHostThread() const { return true; }; }; /*! @} diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index b607e73154..c2009b13a9 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -267,8 +267,6 @@ release(bool, DEBUG_CLR_SYSMEM_POOL, false, \ "Use sysmem pool implementation in runtime for amd commands") \ release(bool, DEBUG_HIP_KERNARG_COPY_OPT, true, \ "Enable/Disable multiple kern arg copies") \ -release(bool, DEBUG_CLR_USE_STDMUTEX_IN_AMD_MONITOR, true, \ - "Use std::mutex in amd::monitor") \ release(bool, DEBUG_CLR_KERNARG_HDP_FLUSH_WA, false, \ "Toggle kernel arg copy workaround") \ release(bool, DEBUG_CLR_SKIP_RELEASE_SCOPE, false, \