From 6b27cc35a1cbd27bbcf2eb475d01e17fbece23f0 Mon Sep 17 00:00:00 2001
From: foreman
Date: Fri, 28 Jul 2017 10:35:46 -0400
Subject: [PATCH] P4 to Git Change 1440849 by jatang@jatang-opencl-hsa-stg1 on
2017/07/28 10:17:34
SWDEV-95919 - Avoid flushing when PerfCounter is enabled.
To make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#27 edit
---
rocclr/runtime/device/pal/palvirtual.cpp | 12 +++++++++---
rocclr/runtime/device/pal/palvirtual.hpp | 7 ++++---
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/rocclr/runtime/device/pal/palvirtual.cpp b/rocclr/runtime/device/pal/palvirtual.cpp
index a40d2f1e44..cd03ade47d 100644
--- a/rocclr/runtime/device/pal/palvirtual.cpp
+++ b/rocclr/runtime/device/pal/palvirtual.cpp
@@ -1909,9 +1909,13 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
}
bool needFlush = false;
- dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
- if (dmaFlushMgmt().dispatchSplitSize() != 0) {
- needFlush = true;
+
+ // Avoid flushing when PerfCounter is enabled, to make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer
+ if (!state_.perfCounterEnabled_) {
+ dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
+ if (dmaFlushMgmt().dispatchSplitSize() != 0) {
+ needFlush = true;
+ }
}
size_t newOffset[3] = {0, 0, 0};
@@ -2347,6 +2351,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) {
if (vcmd.getState() == amd::PerfCounterCommand::Begin) {
Pal::SetClockModeInput input;
Pal::SetClockModeOutput output = {};
+ state_.perfCounterEnabled_ = true;
input.clockMode = Pal::DeviceClockMode::Profiling;
dev().iDev()->SetClockMode(input, &output);
GpuEvent event;
@@ -2360,6 +2365,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) {
iCmd()->CmdEndPerfExperiment(palPerf);
eventEnd(MainEngine, event);
setGpuEvent(event);
+ state_.perfCounterEnabled_ = false;
} else {
LogError("Unsupported performance counter state");
vcmd.setStatus(CL_INVALID_OPERATION);
diff --git a/rocclr/runtime/device/pal/palvirtual.hpp b/rocclr/runtime/device/pal/palvirtual.hpp
index 65ee2f5ad1..4eabc1a902 100644
--- a/rocclr/runtime/device/pal/palvirtual.hpp
+++ b/rocclr/runtime/device/pal/palvirtual.hpp
@@ -180,9 +180,10 @@ class VirtualGPU : public device::VirtualDevice {
//! The virtual GPU states
union State {
struct {
- uint profiling_ : 1; //!< Profiling is enabled
- uint forceWait_ : 1; //!< Forces wait in flush()
- uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
+ uint profiling_ : 1; //!< Profiling is enabled
+ uint forceWait_ : 1; //!< Forces wait in flush()
+ uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
+ uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled
};
uint value_;
State() : value_(0) {}