SWDEV-273265 - Fix to wait on a pending dispatch in PAL.
Change-Id: I431cedfef5d5cb727c35ba8e294528017bfe2088
[ROCm/clr commit: 21ea81d5b9]
Этот коммит содержится в:
коммит произвёл
Sourabh Betigeri
родитель
2f8170b85d
Коммит
6ec57b845c
@@ -47,6 +47,8 @@ inline Memory& DmaBlitManager::gpuMem(device::Memory& mem) const {
|
||||
bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory** xferBuf,
|
||||
size_t origin, size_t& offset, size_t& totalSize,
|
||||
size_t xferSize) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
amd::Coord3D dst(0, 0, 0);
|
||||
size_t tmpSize;
|
||||
uint idxWrite = 0;
|
||||
@@ -118,6 +120,8 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory**
|
||||
bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableReadBuffer_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
@@ -206,6 +210,8 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
const amd::BufferRect& bufRect, const amd::BufferRect& hostRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableReadBufferRect_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
@@ -258,6 +264,8 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin,
|
||||
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch,
|
||||
bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableReadImage_) {
|
||||
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
@@ -324,6 +332,8 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
|
||||
bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
// Use host copy if memory has direct access or it's persistent
|
||||
if (setup_.disableWriteBuffer_ ||
|
||||
(gpuMem(dstMemory).isHostMemDirectAccess() &&
|
||||
@@ -412,6 +422,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
const amd::BufferRect& hostRect,
|
||||
const amd::BufferRect& bufRect, const amd::Coord3D& size,
|
||||
bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
// Use host copy if memory has direct access or it's persistent
|
||||
if (setup_.disableWriteBufferRect_ ||
|
||||
(dstMemory.isHostMemDirectAccess() &&
|
||||
@@ -468,6 +480,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
|
||||
bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
const amd::Coord3D& origin, const amd::Coord3D& size,
|
||||
size_t rowPitch, size_t slicePitch, bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableWriteImage_) {
|
||||
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
|
||||
entire);
|
||||
@@ -483,6 +497,8 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
|
||||
bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableCopyBuffer_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
|
||||
!dev().settings().apuSystem_ && gpuMem(dstMemory).isHostMemDirectAccess())) {
|
||||
@@ -497,6 +513,8 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
|
||||
bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
|
||||
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
if (setup_.disableCopyBufferRect_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
|
||||
gpuMem(dstMemory).isHostMemDirectAccess())) {
|
||||
@@ -573,6 +591,8 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
|
||||
const amd::Coord3D& size, bool entire, size_t rowPitch,
|
||||
size_t slicePitch) const {
|
||||
bool result = false;
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
|
||||
if (setup_.disableCopyImageToBuffer_) {
|
||||
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
@@ -597,6 +617,8 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
|
||||
const amd::Coord3D& size, bool entire, size_t rowPitch,
|
||||
size_t slicePitch) const {
|
||||
bool result = false;
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
|
||||
if (setup_.disableCopyBufferToImage_) {
|
||||
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
|
||||
@@ -620,6 +642,8 @@ bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMem
|
||||
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
|
||||
const amd::Coord3D& size, bool entire) const {
|
||||
bool result = false;
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
|
||||
if (setup_.disableCopyImage_) {
|
||||
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
|
||||
@@ -1655,6 +1679,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
|
||||
// Use host copy if memory has direct access or it's persistent
|
||||
if (setup_.disableReadImage_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result =
|
||||
HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
|
||||
synchronize();
|
||||
@@ -1702,6 +1728,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
|
||||
// Use host copy if memory has direct access or it's persistent
|
||||
if (setup_.disableWriteImage_ || gpuMem(dstMemory).isHostMemDirectAccess() ||
|
||||
gpuMem(dstMemory).isPersistentDirectMap()) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result =
|
||||
HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
|
||||
synchronize();
|
||||
@@ -1872,9 +1900,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
|
||||
bool entire) const {
|
||||
amd::ScopedLock k(lockXferOps_);
|
||||
bool result = false;
|
||||
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableReadBuffer_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -1923,6 +1954,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
|
||||
// Use host copy if memory has direct access
|
||||
if (setup_.disableReadBufferRect_ ||
|
||||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -1971,6 +2004,8 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
|
||||
(gpuMem(dstMemory).isHostMemDirectAccess() &&
|
||||
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
|
||||
(gpuMem(dstMemory).memoryType() == Resource::Persistent)) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -2022,6 +2057,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
|
||||
(gpuMem(dstMemory).isHostMemDirectAccess() &&
|
||||
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
|
||||
gpuMem(dstMemory).isPersistentDirectMap()) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -2071,6 +2108,8 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
|
||||
|
||||
// Use host fill if memory has direct access
|
||||
if (setup_.disableFillBuffer_ || (!forceBlit && gpuMem(memory).isHostMemDirectAccess())) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, origin, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
@@ -2212,6 +2251,8 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
|
||||
|
||||
// Use host fill if memory has direct access
|
||||
if (setup_.disableFillImage_ || gpuMem(memory).isHostMemDirectAccess()) {
|
||||
gpu().releaseGpuMemoryFence();
|
||||
|
||||
result = HostBlitManager::fillImage(memory, pattern, origin, size, entire);
|
||||
synchronize();
|
||||
return result;
|
||||
|
||||
@@ -1216,7 +1216,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
|
||||
if (size[0] <= dev().settings().pinnedMinXferSize_) {
|
||||
partial = size[0];
|
||||
}
|
||||
// Make first step transfer
|
||||
// Make first step transfer
|
||||
if (partial > 0) {
|
||||
result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial);
|
||||
}
|
||||
@@ -2596,6 +2596,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
|
||||
dev().rgpCaptureMgr()->PostDispatch(this);
|
||||
}
|
||||
|
||||
// Mark the flag indicating if a dispatch is outstanding.
|
||||
state_.hasPendingDispatch_ = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -3833,4 +3836,13 @@ void* VirtualGPU::getOrCreateHostcallBuffer() {
|
||||
}
|
||||
return hostcallBuffer_;
|
||||
}
|
||||
|
||||
void VirtualGPU::releaseGpuMemoryFence() {
|
||||
if (isPendingDispatch() && amd::IS_HIP) {
|
||||
WaitForIdleCompute();
|
||||
// Reset the status.
|
||||
state_.hasPendingDispatch_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pal
|
||||
|
||||
@@ -226,6 +226,8 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
|
||||
uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled
|
||||
uint rgpCaptureEnabled_ : 1; //!< RGP capture is enabled in the runtime
|
||||
uint imageBufferWrtBack_ : 1; //!< Enable image buffer write back
|
||||
uint hasPendingDispatch_ : 1; //!< A kernel dispatch is outstanding
|
||||
};
|
||||
uint value_;
|
||||
State() : value_(0) {}
|
||||
@@ -553,6 +555,13 @@ class VirtualGPU : public device::VirtualDevice {
|
||||
|
||||
void* getOrCreateHostcallBuffer();
|
||||
|
||||
//! Waits on an outstanding kernel.
|
||||
void VirtualGPU::releaseGpuMemoryFence();
|
||||
|
||||
//! Returns true if a dispatch is pending.
|
||||
bool isPendingDispatch() const { return state_.hasPendingDispatch_; }
|
||||
|
||||
|
||||
protected:
|
||||
void profileEvent(EngineType engine, bool type) const;
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user