P4 to Git Change 1440849 by jatang@jatang-opencl-hsa-stg1 on 2017/07/28 10:17:34

SWDEV-95919 - Avoid flushing when PerfCounter is enabled.

	To make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#27 edit


[ROCm/clr commit: 6b27cc35a1]
이 커밋은 다음에 포함됨:
foreman
2017-07-28 10:35:46 -04:00
부모 e4157876e9
커밋 c1846c32fa
2개의 변경된 파일13개의 추가작업 그리고 6개의 파일을 삭제
+9 -3
파일 보기
@@ -1909,9 +1909,13 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
}
bool needFlush = false;
dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
if (dmaFlushMgmt().dispatchSplitSize() != 0) {
needFlush = true;
// Avoid flushing when PerfCounter is enabled, to make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer
if (!state_.perfCounterEnabled_) {
dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
if (dmaFlushMgmt().dispatchSplitSize() != 0) {
needFlush = true;
}
}
size_t newOffset[3] = {0, 0, 0};
@@ -2347,6 +2351,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) {
if (vcmd.getState() == amd::PerfCounterCommand::Begin) {
Pal::SetClockModeInput input;
Pal::SetClockModeOutput output = {};
state_.perfCounterEnabled_ = true;
input.clockMode = Pal::DeviceClockMode::Profiling;
dev().iDev()->SetClockMode(input, &output);
GpuEvent event;
@@ -2360,6 +2365,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) {
iCmd()->CmdEndPerfExperiment(palPerf);
eventEnd(MainEngine, event);
setGpuEvent(event);
state_.perfCounterEnabled_ = false;
} else {
LogError("Unsupported performance counter state");
vcmd.setStatus(CL_INVALID_OPERATION);
+4 -3
파일 보기
@@ -180,9 +180,10 @@ class VirtualGPU : public device::VirtualDevice {
//! The virtual GPU states
union State {
struct {
uint profiling_ : 1; //!< Profiling is enabled
uint forceWait_ : 1; //!< Forces wait in flush()
uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
uint profiling_ : 1; //!< Profiling is enabled
uint forceWait_ : 1; //!< Forces wait in flush()
uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled
};
uint value_;
State() : value_(0) {}