From eeea586a2a1ca00a1891aa6b1f631e828300d940 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Tue, 11 May 2021 16:55:15 -0700 Subject: [PATCH] SWDEV-283726 - Workaround for rocprof hang This addresses the rocprof hang seen with direct dispatch. The workaround queues the handler back if any of the signal value in the batch is not decremented. To rememmber the last position in the list, we save the parsed command in the current timestamp struct. Change-Id: I02959e463cfe3cee83c54808ffd6e6f48f43b4e8 [ROCm/clr commit: e5e635f9bf9fc721b47d135cf7030a1aadf62692] --- projects/clr/rocclr/device/device.hpp | 2 ++ projects/clr/rocclr/device/gpu/gpuvirtual.hpp | 3 ++ projects/clr/rocclr/device/pal/palvirtual.hpp | 2 ++ .../clr/rocclr/device/rocm/rocvirtual.cpp | 29 +++++++++++++++++++ .../clr/rocclr/device/rocm/rocvirtual.hpp | 14 ++++++++- 5 files changed, 49 insertions(+), 1 deletion(-) diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index badb62d1e8..9adac471b6 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1192,6 +1192,8 @@ class VirtualDevice : public amd::HeapObject { ShouldNotReachHere(); } virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); } + + virtual void profilerAttach(bool enable) = 0; //! Get the blit manager object device::BlitManager& blitMgr() const { return *blitMgr_; } diff --git a/projects/clr/rocclr/device/gpu/gpuvirtual.hpp b/projects/clr/rocclr/device/gpu/gpuvirtual.hpp index 63cb99285b..a4700f91b5 100644 --- a/projects/clr/rocclr/device/gpu/gpuvirtual.hpp +++ b/projects/clr/rocclr/device/gpu/gpuvirtual.hpp @@ -309,6 +309,9 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext { //! End the command profiling void profilingEnd(amd::Command& command); + //! Attach profiler + void profilerAttach(bool enable = false) {} + //! Collect the profiling results bool profilingCollectResults(CommandBatch* cb, //!< Command batch const amd::Event* waitingEvent //!< Waiting event diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index ed15a64930..82098b1ade 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -355,6 +355,8 @@ class VirtualGPU : public device::VirtualDevice { void flush(amd::Command* list = nullptr, bool wait = false); + void profilerAttach(bool enable = false) {} + //! Returns GPU device object associated with this kernel const Device& dev() const { return gpuDevice_; } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 1c68a93022..5b361beaf1 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -147,12 +147,41 @@ void Timestamp::checkGpuTime() { // ================================================================================================ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { Timestamp* ts = reinterpret_cast(arg); + amd::Thread* thread = amd::Thread::current(); if (!(thread != nullptr || ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { return false; } amd::ScopedLock sl(ts->gpu()->execution()); + if (ts->gpu()->isProfilerAttached()) { + amd::Command* head = ts->getParsedCommand(); + if (head == nullptr) { + head = ts->command().GetBatchHead(); + } + while (head != nullptr) { + if (head->data() != nullptr) { + Timestamp* headTs = reinterpret_cast(head->data()); + ts->setParsedCommand(head); + for (auto it : headTs->Signals()) { + if (int64_t val = hsa_signal_load_relaxed(it->signal_) > 0) { + hsa_status_t result = hsa_amd_signal_async_handler(headTs->Signals()[0]->signal_, + HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + &HsaAmdSignalHandler, ts); + if (HSA_STATUS_SUCCESS != result) { + LogError("hsa_amd_signal_async_handler() failed to requeue the handler!"); + } else { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Requeue handler : value(%d), timestamp(%p)," + "handle(0x%lx)", static_cast(val), headTs, + headTs->HwProfiling() ? headTs->Signals()[0]->signal_.handle : 0); + } + return false; + } + } + } + head = head->getNext(); + } + } ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Handler: value(%d), timestamp(%p), handle(0x%lx)", static_cast(value), arg, ts->HwProfiling() ? ts->Signals()[0]->signal_.handle : 0); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 739e1a20fa..bfe3dceb39 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -89,6 +89,7 @@ class Timestamp : public amd::HeapObject { uint64_t end_; VirtualGPU* gpu_; //!< Virtual GPU, associated with this timestamp const amd::Command& command_; //!< Command, associated with this timestamp + amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head std::vector signals_; public: @@ -96,7 +97,8 @@ class Timestamp : public amd::HeapObject { : start_(std::numeric_limits::max()) , end_(0) , gpu_(gpu) - , command_(command) {} + , command_(command) + , parsedCommand_(nullptr) {} ~Timestamp() {} @@ -131,6 +133,12 @@ class Timestamp : public amd::HeapObject { //! Returns amd::command assigned to this timestamp const amd::Command& command() const { return command_; } + //! Sets the parsed command + void setParsedCommand(amd::Command* command) { parsedCommand_ = command; } + + //! Gets the parsed command + amd::Command* getParsedCommand() const { return parsedCommand_; } + //! Returns virtual GPU device, used with this timestamp VirtualGPU* gpu() const { return gpu_; } }; @@ -344,6 +352,9 @@ class VirtualGPU : public device::VirtualDevice { Timestamp* timestamp() const { return timestamp_; } + void profilerAttach(bool enable = false) { profilerAttached_ = enable; } + + bool isProfilerAttached() { return profilerAttached_; } // } roc OpenCL integration private: bool dispatchAqlPacket(hsa_kernel_dispatch_packet_t* packet, uint16_t header, @@ -413,6 +424,7 @@ class VirtualGPU : public device::VirtualDevice { uint32_t cooperative_ : 1; //!< Cooperative launch is enabled uint32_t addSystemScope_ : 1; //!< Insert a system scope to the next aql uint32_t tracking_created_ : 1; //!< Enabled if tracking object was properly initialized + uint32_t profilerAttached_ : 1; //!< Indicates if profiler is attached }; uint32_t state_; };