From 6b27cc35a1cbd27bbcf2eb475d01e17fbece23f0 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 28 Jul 2017 10:35:46 -0400 Subject: [PATCH] P4 to Git Change 1440849 by jatang@jatang-opencl-hsa-stg1 on 2017/07/28 10:17:34 SWDEV-95919 - Avoid flushing when PerfCounter is enabled. To make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#27 edit --- rocclr/runtime/device/pal/palvirtual.cpp | 12 +++++++++--- rocclr/runtime/device/pal/palvirtual.hpp | 7 ++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp index a40d2f1e44..cd03ade47d 100644 --- a/rocclr/runtime/device/pal/palvirtual.cpp +++ b/rocclr/runtime/device/pal/palvirtual.cpp @@ -1909,9 +1909,13 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const } bool needFlush = false; - dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize()); - if (dmaFlushMgmt().dispatchSplitSize() != 0) { - needFlush = true; + + // Avoid flushing when PerfCounter is enabled, to make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer + if (!state_.perfCounterEnabled_) { + dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize()); + if (dmaFlushMgmt().dispatchSplitSize() != 0) { + needFlush = true; + } } size_t newOffset[3] = {0, 0, 0}; @@ -2347,6 +2351,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) { if (vcmd.getState() == amd::PerfCounterCommand::Begin) { Pal::SetClockModeInput input; Pal::SetClockModeOutput output = {}; + state_.perfCounterEnabled_ = true; input.clockMode = Pal::DeviceClockMode::Profiling; dev().iDev()->SetClockMode(input, &output); GpuEvent event; @@ -2360,6 +2365,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) { iCmd()->CmdEndPerfExperiment(palPerf); eventEnd(MainEngine, event); setGpuEvent(event); + state_.perfCounterEnabled_ = false; } else { LogError("Unsupported performance counter state"); vcmd.setStatus(CL_INVALID_OPERATION); diff --git a/rocclr/runtime/device/pal/palvirtual.hpp b/rocclr/runtime/device/pal/palvirtual.hpp index 65ee2f5ad1..4eabc1a902 100644 --- a/rocclr/runtime/device/pal/palvirtual.hpp +++ b/rocclr/runtime/device/pal/palvirtual.hpp @@ -180,9 +180,10 @@ class VirtualGPU : public device::VirtualDevice { //! The virtual GPU states union State { struct { - uint profiling_ : 1; //!< Profiling is enabled - uint forceWait_ : 1; //!< Forces wait in flush() - uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter + uint profiling_ : 1; //!< Profiling is enabled + uint forceWait_ : 1; //!< Forces wait in flush() + uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter + uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled }; uint value_; State() : value_(0) {}