diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index badb62d1e8..9adac471b6 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1192,6 +1192,8 @@ class VirtualDevice : public amd::HeapObject { ShouldNotReachHere(); } virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); } + + virtual void profilerAttach(bool enable) = 0; //! Get the blit manager object device::BlitManager& blitMgr() const { return *blitMgr_; } diff --git a/projects/clr/rocclr/device/gpu/gpuvirtual.hpp b/projects/clr/rocclr/device/gpu/gpuvirtual.hpp index 63cb99285b..a4700f91b5 100644 --- a/projects/clr/rocclr/device/gpu/gpuvirtual.hpp +++ b/projects/clr/rocclr/device/gpu/gpuvirtual.hpp @@ -309,6 +309,9 @@ class VirtualGPU : public device::VirtualDevice, public CALGSLContext { //! End the command profiling void profilingEnd(amd::Command& command); + //! Attach profiler + void profilerAttach(bool enable = false) {} + //! Collect the profiling results bool profilingCollectResults(CommandBatch* cb, //!< Command batch const amd::Event* waitingEvent //!< Waiting event diff --git a/projects/clr/rocclr/device/pal/palvirtual.hpp b/projects/clr/rocclr/device/pal/palvirtual.hpp index ed15a64930..82098b1ade 100644 --- a/projects/clr/rocclr/device/pal/palvirtual.hpp +++ b/projects/clr/rocclr/device/pal/palvirtual.hpp @@ -355,6 +355,8 @@ class VirtualGPU : public device::VirtualDevice { void flush(amd::Command* list = nullptr, bool wait = false); + void profilerAttach(bool enable = false) {} + //! Returns GPU device object associated with this kernel const Device& dev() const { return gpuDevice_; } diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 1c68a93022..5b361beaf1 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -147,12 +147,41 @@ void Timestamp::checkGpuTime() { // ================================================================================================ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { Timestamp* ts = reinterpret_cast(arg); + amd::Thread* thread = amd::Thread::current(); if (!(thread != nullptr || ((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) { return false; } amd::ScopedLock sl(ts->gpu()->execution()); + if (ts->gpu()->isProfilerAttached()) { + amd::Command* head = ts->getParsedCommand(); + if (head == nullptr) { + head = ts->command().GetBatchHead(); + } + while (head != nullptr) { + if (head->data() != nullptr) { + Timestamp* headTs = reinterpret_cast(head->data()); + ts->setParsedCommand(head); + for (auto it : headTs->Signals()) { + if (int64_t val = hsa_signal_load_relaxed(it->signal_) > 0) { + hsa_status_t result = hsa_amd_signal_async_handler(headTs->Signals()[0]->signal_, + HSA_SIGNAL_CONDITION_LT, kInitSignalValueOne, + &HsaAmdSignalHandler, ts); + if (HSA_STATUS_SUCCESS != result) { + LogError("hsa_amd_signal_async_handler() failed to requeue the handler!"); + } else { + ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Requeue handler : value(%d), timestamp(%p)," + "handle(0x%lx)", static_cast(val), headTs, + headTs->HwProfiling() ? headTs->Signals()[0]->signal_.handle : 0); + } + return false; + } + } + } + head = head->getNext(); + } + } ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Handler: value(%d), timestamp(%p), handle(0x%lx)", static_cast(value), arg, ts->HwProfiling() ? ts->Signals()[0]->signal_.handle : 0); diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 739e1a20fa..bfe3dceb39 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -89,6 +89,7 @@ class Timestamp : public amd::HeapObject { uint64_t end_; VirtualGPU* gpu_; //!< Virtual GPU, associated with this timestamp const amd::Command& command_; //!< Command, associated with this timestamp + amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head std::vector signals_; public: @@ -96,7 +97,8 @@ class Timestamp : public amd::HeapObject { : start_(std::numeric_limits::max()) , end_(0) , gpu_(gpu) - , command_(command) {} + , command_(command) + , parsedCommand_(nullptr) {} ~Timestamp() {} @@ -131,6 +133,12 @@ class Timestamp : public amd::HeapObject { //! Returns amd::command assigned to this timestamp const amd::Command& command() const { return command_; } + //! Sets the parsed command + void setParsedCommand(amd::Command* command) { parsedCommand_ = command; } + + //! Gets the parsed command + amd::Command* getParsedCommand() const { return parsedCommand_; } + //! Returns virtual GPU device, used with this timestamp VirtualGPU* gpu() const { return gpu_; } }; @@ -344,6 +352,9 @@ class VirtualGPU : public device::VirtualDevice { Timestamp* timestamp() const { return timestamp_; } + void profilerAttach(bool enable = false) { profilerAttached_ = enable; } + + bool isProfilerAttached() { return profilerAttached_; } // } roc OpenCL integration private: bool dispatchAqlPacket(hsa_kernel_dispatch_packet_t* packet, uint16_t header, @@ -413,6 +424,7 @@ class VirtualGPU : public device::VirtualDevice { uint32_t cooperative_ : 1; //!< Cooperative launch is enabled uint32_t addSystemScope_ : 1; //!< Insert a system scope to the next aql uint32_t tracking_created_ : 1; //!< Enabled if tracking object was properly initialized + uint32_t profilerAttached_ : 1; //!< Indicates if profiler is attached }; uint32_t state_; };