diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 9d98ca7f3d..6ea2a09dab 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -1386,8 +1386,8 @@ address VirtualGPU::allocKernelArguments(size_t size, size_t alignment) { // ================================================================================================ /* profilingBegin, when profiling is enabled, creates a timestamp to save in -* virtualgpu's timestamp_, and calls start() to get the current host -* timestamp. +* virtualgpu's timestamp_, saves the pointer timestamp_ to the command's data +* and then calls start() to get the current host timestamp. */ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) { if (command.profilingInfo().enabled_) { @@ -1398,6 +1398,7 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) { } // Without barrier profiling will wait for each individual signal timestamp_ = new Timestamp(this, command); + command.setData(timestamp_); timestamp_->start(); } @@ -1423,15 +1424,13 @@ void VirtualGPU::profilingBegin(amd::Command& command, bool drmProfiling) { // ================================================================================================ /* profilingEnd, when profiling is enabled, checks to see if a signal was * created for whatever command we are running and calls end() to get the -* current host timestamp if no signal is available. It then saves the pointer -* timestamp_ to the command's data. +* current host timestamp if no signal is available. */ void VirtualGPU::profilingEnd(amd::Command& command) { if (command.profilingInfo().enabled_) { - if (!timestamp_->HwProfiling()) { + if (timestamp_->HwProfiling() == false) { timestamp_->end(); } - command.setData(timestamp_); timestamp_ = nullptr; } if (AMD_DIRECT_DISPATCH) { diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index 7bed796803..beeca99794 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -111,6 +111,7 @@ class Timestamp : public amd::ReferenceCountedObject { hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update amd::Monitor lock_; //!< Serialize timestamp update bool accum_ena_ = false; //!< If TRUE then the accumulation of execution times has started + bool hasHwProfiling_ = false; //!< If TRUE then HwProfiling is enabled for the command Timestamp(const Timestamp&) = delete; Timestamp& operator=(const Timestamp&) = delete; @@ -133,11 +134,14 @@ class Timestamp : public amd::ReferenceCountedObject { *end = end_; } - void AddProfilingSignal(ProfilingSignal* signal) { signals_.push_back(signal); } + void AddProfilingSignal(ProfilingSignal* signal) { + signals_.push_back(signal); + hasHwProfiling_ = true; + } const std::vector& Signals() const { return signals_; } - const bool HwProfiling() const { return !signals_.empty(); } + const bool HwProfiling() const { return hasHwProfiling_; } //! Finds execution ticks on GPU void checkGpuTime();