From e4e1e4857ee91fbd49a71d4d0e0831677da10fe0 Mon Sep 17 00:00:00 2001 From: Giovanni LB Date: Fri, 5 Jul 2024 15:17:40 -0300 Subject: [PATCH] SWDEV-470113: Fixing hang on navi33 Change-Id: I5c1d30547039a6f23ab5974f43ea63f971573108 [ROCm/rocprofiler commit: 7e199e6b7ae1922eed8abf32853c73a2f1ee08db] --- .../rocprofiler/src/core/hsa/queues/queue.cpp | 17 +++++++++++------ .../featuretests/profiler/apps/hip_kernels.cpp | 2 ++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/projects/rocprofiler/src/core/hsa/queues/queue.cpp b/projects/rocprofiler/src/core/hsa/queues/queue.cpp index b0570a2989..7208dced61 100644 --- a/projects/rocprofiler/src/core/hsa/queues/queue.cpp +++ b/projects/rocprofiler/src/core/hsa/queues/queue.cpp @@ -365,6 +365,7 @@ bool AsyncSignalReadyHandler(hsa_signal_value_t signal_value, void* data) { queue->cv_ready_signal.notify_all(); return false; } + queue->cv_ready_signal.notify_all(); hsasupport_singleton.GetCoreApiTable().hsa_signal_store_screlease_fn(queue->GetReadySignal(), 1); if (serializer.dispatch_queue == nullptr) enable_dispatch(queue); @@ -473,7 +474,6 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) auto* profile = pending->profile ? pending->profile->profile.get() : nullptr; if (pending->counters_count > 0 && profile && profile->events) { - ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_sub(1); Packet::AQLPacketProfile::MoveToCache(queue_info_session->agent, std::move(pending->profile)); profiler_serializer_t& serializer = @@ -483,10 +483,12 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) hsasupport_singleton.GetCoreApiTable().hsa_signal_store_screlease_fn( queue_info_session->block_signal, 1); serializer.dispatch_queue = nullptr; - if (serializer.dispatch_ready.empty()) return false; - Queue* queue = serializer.dispatch_ready.front(); - serializer.dispatch_ready.erase(serializer.dispatch_ready.begin()); - enable_dispatch(queue); + if (!serializer.dispatch_ready.empty()) + { + Queue* queue = serializer.dispatch_ready.front(); + serializer.dispatch_ready.erase(serializer.dispatch_ready.begin()); + enable_dispatch(queue); + } } if (pending->new_signal.handle) @@ -494,6 +496,9 @@ bool AsyncSignalHandler(hsa_signal_value_t signal_value, void* data) if (queue_info_session->interrupt_signal.handle) hsasupport_singleton.GetCoreApiTable().hsa_signal_destroy_fn( queue_info_session->interrupt_signal); + + if (pending->counters_count > 0 && profile && profile->events) + ACTIVE_INTERRUPT_SIGNAL_COUNT.fetch_sub(1); } delete queue_info_session; return false; @@ -641,7 +646,7 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u continue; } - std::unique_ptr profile_packet; + std::unique_ptr profile_packet{nullptr}; // If counters found in the session if (session_data_count > 0 && is_counter_collection_mode) { // Get the PM4 Packets using packets_generator diff --git a/projects/rocprofiler/tests-v2/featuretests/profiler/apps/hip_kernels.cpp b/projects/rocprofiler/tests-v2/featuretests/profiler/apps/hip_kernels.cpp index d1df5f0745..5b49cf00ae 100644 --- a/projects/rocprofiler/tests-v2/featuretests/profiler/apps/hip_kernels.cpp +++ b/projects/rocprofiler/tests-v2/featuretests/profiler/apps/hip_kernels.cpp @@ -83,9 +83,11 @@ void LaunchMultiStreamKernels() { int blockSize = 64; // This Kernel will always be launched with one wave int numBlocks = 1; + for(int i = 0; i < 100; i++) { for(int j = 0; j < hip_streams.size(); j++) hipLaunchKernelGGL(add, numBlocks, blockSize, 0, hip_streams[j], N, d_x, d_y); + HIP_ASSERT(hipDeviceSynchronize()); } //Wait for GPU to finish before accessing on host