From 61307442f0021572d2bd724c4b752b604f44a81e Mon Sep 17 00:00:00 2001 From: "Meserve, Mark" Date: Sun, 27 Apr 2025 19:55:32 -0500 Subject: [PATCH] [SDK] Fix for hang in hsa_barrier (#316) * SWDEV-515895: Fix for hang in hsa_barrier - Root cause of hang is enqueue_packet using a completed barrier - This occurs because set_barrier does not correctly mark the barrier as complete. - Fixed by consolidating duplicate code for how a barrier is: - Marked as complete - Added common function and trace message - Checked as complete - Removed internal completion tracking variable in favor of signal - Added some log messages which were useful in tracking this hang - Destroying an hsa_barrier now calls the provided completion callback * formatting * Fix hsa_barrier test - Removes assumption that enqueue_packet always generates packets - Add assertion that a complete barrier does not generate packets * formatting * Address review comments --- .../lib/rocprofiler-sdk/hsa/hsa_barrier.cpp | 64 ++++++++++++++----- .../lib/rocprofiler-sdk/hsa/hsa_barrier.hpp | 23 +++++-- .../lib/rocprofiler-sdk/tests/hsa_barrier.cpp | 12 +++- 3 files changed, 75 insertions(+), 24 deletions(-) diff --git a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp index 84f9d620b9..a99d0acb01 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.cpp @@ -31,7 +31,7 @@ namespace rocprofiler namespace hsa { hsa_barrier::hsa_barrier(std::function&& finished, CoreApiTable core_api) -: _barried_finished(std::move(finished)) +: _barrier_finished(std::move(finished)) , _core_api(core_api) { // Create the barrier signal @@ -40,18 +40,33 @@ hsa_barrier::hsa_barrier(std::function&& finished, CoreApiTable core_api hsa_barrier::~hsa_barrier() { - // Destroy the barrier signal - if(registration::get_fini_status() < 1) + if(registration::get_fini_status() > 0) { - _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 0); - _core_api.hsa_signal_destroy_fn(_barrier_signal); + return; } + + // If this barrier is destroyed and has queued packets, a future signal creation may + // inadvertently block these packets if the same handle is re-used by HSA. + if(!complete()) + { + _barrier_enqueued.rlock([&](auto& barrier_enqueued) { + if(!barrier_enqueued.empty()) + { + ROCP_WARNING << "An incomplete hsa_barrier which was enqueued is being destroyed."; + } + }); + } + + // Clear and destroy the barrier signal + clear_barrier(); + _core_api.hsa_signal_destroy_fn(_barrier_signal); } void hsa_barrier::set_barrier(const queue_map_ptr_t& q) { _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 1); + ROCP_TRACE << "Barrier (handle: " << _barrier_signal.handle << ") is now set."; _queue_waiting.wlock([&](auto& queue_waiting) { for(const auto& [_, queue] : q) { @@ -64,8 +79,7 @@ hsa_barrier::set_barrier(const queue_map_ptr_t& q) } if(queue_waiting.empty()) { - _barried_finished(); - _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 0); + clear_barrier(); } }); } @@ -73,7 +87,7 @@ hsa_barrier::set_barrier(const queue_map_ptr_t& q) std::optional hsa_barrier::enqueue_packet(const Queue* queue) { - if(_complete) return std::nullopt; + if(complete()) return std::nullopt; bool return_block = false; _barrier_enqueued.wlock([&](auto& barrier_enqueued) { if(barrier_enqueued.find(queue->get_id().handle) == barrier_enqueued.end()) @@ -88,7 +102,8 @@ hsa_barrier::enqueue_packet(const Queue* queue) rocprofiler_packet barrier{}; barrier.barrier_and.header = HSA_PACKET_TYPE_BARRIER_AND << HSA_PACKET_HEADER_TYPE; barrier.barrier_and.dep_signal[0] = _barrier_signal; - ROCP_INFO << "Barrier Added: " << _barrier_signal.handle; + ROCP_INFO << "Barrier (handle: " << _barrier_signal.handle + << ") added to queue (handle: " << queue->get_id().handle << ")"; return barrier; } @@ -100,9 +115,7 @@ hsa_barrier::remove_queue(const Queue* queue) queue_waiting.erase(queue->get_id().handle); if(queue_waiting.empty()) { - _barried_finished(); - _complete = true; - _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 0); + clear_barrier(); } }); } @@ -120,14 +133,33 @@ hsa_barrier::register_completion(const Queue* queue) queue_waiting.erase(queue->get_id().handle); if(queue_waiting.empty()) { - _barried_finished(); - // We are done, release the barrier - _complete = true; - _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 0); + clear_barrier(); } } }); return found; } + +bool +hsa_barrier::complete() const +{ + return _core_api.hsa_signal_load_scacquire_fn(_barrier_signal) == 0; +} + +void +hsa_barrier::clear_barrier() +{ + if(complete()) + { + return; + } + if(_barrier_finished) + { + _barrier_finished(); + } + _core_api.hsa_signal_store_screlease_fn(_barrier_signal, 0); + ROCP_TRACE << "Barrier (handle: " << _barrier_signal.handle << ") is now cleared."; +} + } // namespace hsa } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp index b23b92f298..9c78fe75b4 100644 --- a/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp +++ b/source/lib/rocprofiler-sdk/hsa/hsa_barrier.hpp @@ -50,22 +50,35 @@ public: hsa_barrier(std::function&& finished, CoreApiTable core_api); ~hsa_barrier(); + // If a queue in q currently has active async packets, sets the barrier and saves how many + // packets are currently in each queue to track barrier completion. If there are no queues with + // packets, clears the barrier and marks it as complete. void set_barrier(const queue_map_ptr_t& q); + // Returns the packets needed to place this barrier in a queue. + // If the barrier has already been enqueued in the queue given or the barrier is complete, no + // packets are returned. std::optional enqueue_packet(const Queue* queue); - bool register_completion(const Queue* queue); - bool complete() const { return _core_api.hsa_signal_load_scacquire_fn(_barrier_signal) == 0; } + // To be called when 1 async packet is finished in the given queue to decrement the remaining + // packets counter. If all packets in all queues given in set_barrier are completed, clears the + // barrier and marks it as complete. Returns true if the given queue had async packets waiting. + bool register_completion(const Queue* queue); - // Removes a queue from the barrier dependency list + // Checks if this barrier is complete + bool complete() const; + + // Removes a queue from the barrier dependency list. + // If this is the last queue waiting, clears the barrier and marks it as complete. void remove_queue(const Queue* queue); private: - std::function _barried_finished = {}; + std::function _barrier_finished = {}; CoreApiTable _core_api = {}; common::Synchronized> _queue_waiting = {}; common::Synchronized> _barrier_enqueued = {}; - std::atomic _complete = {false}; + + void clear_barrier(); // Blocks all queues from executing until the barrier is lifted hsa_signal_t _barrier_signal = {}; diff --git a/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp b/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp index 76cc9dfd88..619f8526aa 100644 --- a/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp +++ b/source/lib/rocprofiler-sdk/tests/hsa_barrier.cpp @@ -142,10 +142,16 @@ inject_barriers(hsa_barrier& barrier, QueueController::queue_map_t& queues) for(auto& [hsa_queue, fq] : queues) { - auto _pkt = barrier.enqueue_packet(fq.get()); - ASSERT_EQ(_pkt.has_value(), true); + auto complete = barrier.complete(); + auto _pkt = barrier.enqueue_packet(fq.get()); + // If barrier is complete, no packets should be generated + ASSERT_NE(complete, _pkt.has_value()); + hsa_barrier_and_packet_t* _packets = (hsa_barrier_and_packet_t*) hsa_queue->base_address; - enqueue_pkt(hsa_queue, _packets, _pkt->barrier_and); + if(_pkt.has_value()) + { + enqueue_pkt(hsa_queue, _packets, _pkt->barrier_and); + } // Construct packet that will trigger async handler after barrier is released rocprofiler_packet post_barrier{};