P4 to Git Change 2035516 by gandryey@gera-hip-lnx on 2019/11/22 15:51:48
SWDEV-79445 - OCL generic changes and code clean-up
- Don't sync on the scratch buffer if the executed kernel is unchanged, since the number of scratch regs remains the same
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.cpp#158 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/pal/palvirtual.hpp#65 edit
[ROCm/clr commit: dd5459c7a1]
Этот коммит содержится в:
@@ -2379,9 +2379,6 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add ISA memory object to the resource tracking list
|
||||
AddKernel(kernel);
|
||||
|
||||
uint64_t vmDefQueue = 0;
|
||||
VirtualGPU* gpuDefQueue = nullptr;
|
||||
if (hsaKernel.dynamicParallelism()) {
|
||||
@@ -2400,6 +2397,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add ISA memory object to the resource tracking list
|
||||
AddKernel(kernel);
|
||||
|
||||
bool needFlush = false;
|
||||
// Avoid flushing when PerfCounter is enabled, to make sure PerfStart/dispatch/PerfEnd
|
||||
// are in the same cmdBuffer
|
||||
@@ -3507,7 +3507,11 @@ bool VirtualGPU::processMemObjectsHSA(const amd::Kernel& kernel, const_address p
|
||||
const Device::ScratchBuffer* scratch = dev().scratch(hwRing());
|
||||
// Validate scratch buffer to force sync mode, because
|
||||
// the current scratch logic is optimized for size and performance
|
||||
memoryDependency().validate(*this, scratch->memObj_, IsReadOnly);
|
||||
// Note: runtime can skip sync if the same kernel is used,
|
||||
// since the number of scratch regs remains the same
|
||||
if (!IsSameKernel(kernel)) {
|
||||
memoryDependency().validate(*this, scratch->memObj_, IsReadOnly);
|
||||
}
|
||||
addVmMemory(scratch->memObj_);
|
||||
}
|
||||
|
||||
|
||||
@@ -377,6 +377,10 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
inline void AddKernel(const amd::Kernel& kernel //!< AMD kernel object
|
||||
) const;
|
||||
|
||||
//! Checks if runtime dispatches the same kernel as previously
|
||||
inline bool IsSameKernel(const amd::Kernel& kernel //!< AMD kernel object
|
||||
) const;
|
||||
|
||||
//! Adds a dopp desktop texture reference
|
||||
void addDoppRef(const Memory* memory, //!< GPU memory object
|
||||
bool lastDoopCmd, //!< is the last submission for the pre-present primary
|
||||
@@ -652,6 +656,10 @@ inline void VirtualGPU::AddKernel(const amd::Kernel& kernel) const {
|
||||
queues_[MainEngine]->last_kernel_ = &kernel;
|
||||
}
|
||||
|
||||
inline bool VirtualGPU::IsSameKernel(const amd::Kernel& kernel) const {
|
||||
return (queues_[MainEngine]->last_kernel_ == &kernel) ? true : false;
|
||||
}
|
||||
|
||||
template <bool avoidBarrierSubmit> uint VirtualGPU::Queue::submit(bool forceFlush) {
|
||||
cmdCnt_++;
|
||||
uint id = cmdBufIdCurrent_;
|
||||
|
||||
Ссылка в новой задаче
Block a user