P4 to Git Change 1440849 by jatang@jatang-opencl-hsa-stg1 on 2017/07/28 10:17:34
SWDEV-95919 - Avoid flushing when PerfCounter is enabled.
To make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#27 edit
[ROCm/clr commit: 6b27cc35a1]
이 커밋은 다음에 포함됨:
@@ -1909,9 +1909,13 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
}
|
||||
|
||||
bool needFlush = false;
|
||||
dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
|
||||
if (dmaFlushMgmt().dispatchSplitSize() != 0) {
|
||||
needFlush = true;
|
||||
|
||||
// Avoid flushing when PerfCounter is enabled, to make sure PerfStart/dispatch/PerfEnd are in the same cmdBuffer
|
||||
if (!state_.perfCounterEnabled_) {
|
||||
dmaFlushMgmt_.findSplitSize(dev(), sizes.global().product(), hsaKernel.aqlCodeSize());
|
||||
if (dmaFlushMgmt().dispatchSplitSize() != 0) {
|
||||
needFlush = true;
|
||||
}
|
||||
}
|
||||
|
||||
size_t newOffset[3] = {0, 0, 0};
|
||||
@@ -2347,6 +2351,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) {
|
||||
if (vcmd.getState() == amd::PerfCounterCommand::Begin) {
|
||||
Pal::SetClockModeInput input;
|
||||
Pal::SetClockModeOutput output = {};
|
||||
state_.perfCounterEnabled_ = true;
|
||||
input.clockMode = Pal::DeviceClockMode::Profiling;
|
||||
dev().iDev()->SetClockMode(input, &output);
|
||||
GpuEvent event;
|
||||
@@ -2360,6 +2365,7 @@ void VirtualGPU::submitPerfCounter(amd::PerfCounterCommand& vcmd) {
|
||||
iCmd()->CmdEndPerfExperiment(palPerf);
|
||||
eventEnd(MainEngine, event);
|
||||
setGpuEvent(event);
|
||||
state_.perfCounterEnabled_ = false;
|
||||
} else {
|
||||
LogError("Unsupported performance counter state");
|
||||
vcmd.setStatus(CL_INVALID_OPERATION);
|
||||
|
||||
@@ -180,9 +180,10 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
//! The virtual GPU states
|
||||
union State {
|
||||
struct {
|
||||
uint profiling_ : 1; //!< Profiling is enabled
|
||||
uint forceWait_ : 1; //!< Forces wait in flush()
|
||||
uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
|
||||
uint profiling_ : 1; //!< Profiling is enabled
|
||||
uint forceWait_ : 1; //!< Forces wait in flush()
|
||||
uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
|
||||
uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled
|
||||
};
|
||||
uint value_;
|
||||
State() : value_(0) {}
|
||||
|
||||
새 이슈에서 참조
사용자 차단