SWDEV-273265 - Fix to wait on a pending dispatch in PAL.

Change-Id: I431cedfef5d5cb727c35ba8e294528017bfe2088


[ROCm/clr commit: 21ea81d5b9]
Этот коммит содержится в:
Sourabh Betigeri
2021-03-23 11:12:37 -07:00
коммит произвёл Sourabh Betigeri
родитель 2f8170b85d
Коммит 6ec57b845c
3 изменённых файлов: 63 добавлений и 1 удалений
+41
Просмотреть файл
@@ -47,6 +47,8 @@ inline Memory& DmaBlitManager::gpuMem(device::Memory& mem) const {
bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory** xferBuf,
size_t origin, size_t& offset, size_t& totalSize,
size_t xferSize) const {
gpu().releaseGpuMemoryFence();
amd::Coord3D dst(0, 0, 0);
size_t tmpSize;
uint idxWrite = 0;
@@ -118,6 +120,8 @@ bool DmaBlitManager::readMemoryStaged(Memory& srcMemory, void* dstHost, Memory**
bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
gpu().releaseGpuMemoryFence();
// Use host copy if memory has direct access
if (setup_.disableReadBuffer_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
@@ -206,6 +210,8 @@ bool DmaBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
const amd::BufferRect& bufRect, const amd::BufferRect& hostRect,
const amd::Coord3D& size, bool entire) const {
gpu().releaseGpuMemoryFence();
// Use host copy if memory has direct access
if (setup_.disableReadBufferRect_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
@@ -258,6 +264,8 @@ bool DmaBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
bool DmaBlitManager::readImage(device::Memory& srcMemory, void* dstHost, const amd::Coord3D& origin,
const amd::Coord3D& size, size_t rowPitch, size_t slicePitch,
bool entire) const {
gpu().releaseGpuMemoryFence();
if (setup_.disableReadImage_) {
return HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch,
entire);
@@ -324,6 +332,8 @@ bool DmaBlitManager::writeMemoryStaged(const void* srcHost, Memory& dstMemory, M
bool DmaBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
bool entire) const {
gpu().releaseGpuMemoryFence();
// Use host copy if memory has direct access or it's persistent
if (setup_.disableWriteBuffer_ ||
(gpuMem(dstMemory).isHostMemDirectAccess() &&
@@ -412,6 +422,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
const amd::BufferRect& hostRect,
const amd::BufferRect& bufRect, const amd::Coord3D& size,
bool entire) const {
gpu().releaseGpuMemoryFence();
// Use host copy if memory has direct access or it's persistent
if (setup_.disableWriteBufferRect_ ||
(dstMemory.isHostMemDirectAccess() &&
@@ -468,6 +480,8 @@ bool DmaBlitManager::writeBufferRect(const void* srcHost, device::Memory& dstMem
bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
const amd::Coord3D& origin, const amd::Coord3D& size,
size_t rowPitch, size_t slicePitch, bool entire) const {
gpu().releaseGpuMemoryFence();
if (setup_.disableWriteImage_) {
return HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch,
entire);
@@ -483,6 +497,8 @@ bool DmaBlitManager::writeImage(const void* srcHost, device::Memory& dstMemory,
bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
gpu().releaseGpuMemoryFence();
if (setup_.disableCopyBuffer_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
!dev().settings().apuSystem_ && gpuMem(dstMemory).isHostMemDirectAccess())) {
@@ -497,6 +513,8 @@ bool DmaBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& dstMe
bool DmaBlitManager::copyBufferRect(device::Memory& srcMemory, device::Memory& dstMemory,
const amd::BufferRect& srcRect, const amd::BufferRect& dstRect,
const amd::Coord3D& size, bool entire) const {
gpu().releaseGpuMemoryFence();
if (setup_.disableCopyBufferRect_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable() &&
gpuMem(dstMemory).isHostMemDirectAccess())) {
@@ -573,6 +591,8 @@ bool DmaBlitManager::copyImageToBuffer(device::Memory& srcMemory, device::Memory
const amd::Coord3D& size, bool entire, size_t rowPitch,
size_t slicePitch) const {
bool result = false;
gpu().releaseGpuMemoryFence();
if (setup_.disableCopyImageToBuffer_) {
result = HostBlitManager::copyImageToBuffer(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
@@ -597,6 +617,8 @@ bool DmaBlitManager::copyBufferToImage(device::Memory& srcMemory, device::Memory
const amd::Coord3D& size, bool entire, size_t rowPitch,
size_t slicePitch) const {
bool result = false;
gpu().releaseGpuMemoryFence();
if (setup_.disableCopyBufferToImage_) {
result = HostBlitManager::copyBufferToImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size,
@@ -620,6 +642,8 @@ bool DmaBlitManager::copyImage(device::Memory& srcMemory, device::Memory& dstMem
const amd::Coord3D& srcOrigin, const amd::Coord3D& dstOrigin,
const amd::Coord3D& size, bool entire) const {
bool result = false;
gpu().releaseGpuMemoryFence();
if (setup_.disableCopyImage_) {
return HostBlitManager::copyImage(srcMemory, dstMemory, srcOrigin, dstOrigin, size, entire);
@@ -1655,6 +1679,8 @@ bool KernelBlitManager::readImage(device::Memory& srcMemory, void* dstHost,
// Use host copy if memory has direct access or it's persistent
if (setup_.disableReadImage_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
result =
HostBlitManager::readImage(srcMemory, dstHost, origin, size, rowPitch, slicePitch, entire);
synchronize();
@@ -1702,6 +1728,8 @@ bool KernelBlitManager::writeImage(const void* srcHost, device::Memory& dstMemor
// Use host copy if memory has direct access or it's persistent
if (setup_.disableWriteImage_ || gpuMem(dstMemory).isHostMemDirectAccess() ||
gpuMem(dstMemory).isPersistentDirectMap()) {
gpu().releaseGpuMemoryFence();
result =
HostBlitManager::writeImage(srcHost, dstMemory, origin, size, rowPitch, slicePitch, entire);
synchronize();
@@ -1872,9 +1900,12 @@ bool KernelBlitManager::readBuffer(device::Memory& srcMemory, void* dstHost,
bool entire) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
// Use host copy if memory has direct access
if (setup_.disableReadBuffer_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBuffer(srcMemory, dstHost, origin, size, entire);
synchronize();
return result;
@@ -1923,6 +1954,8 @@ bool KernelBlitManager::readBufferRect(device::Memory& srcMemory, void* dstHost,
// Use host copy if memory has direct access
if (setup_.disableReadBufferRect_ ||
(gpuMem(srcMemory).isHostMemDirectAccess() && gpuMem(srcMemory).isCacheable())) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::readBufferRect(srcMemory, dstHost, bufRect, hostRect, size, entire);
synchronize();
return result;
@@ -1971,6 +2004,8 @@ bool KernelBlitManager::writeBuffer(const void* srcHost, device::Memory& dstMemo
(gpuMem(dstMemory).isHostMemDirectAccess() &&
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
(gpuMem(dstMemory).memoryType() == Resource::Persistent)) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBuffer(srcHost, dstMemory, origin, size, entire);
synchronize();
return result;
@@ -2022,6 +2057,8 @@ bool KernelBlitManager::writeBufferRect(const void* srcHost, device::Memory& dst
(gpuMem(dstMemory).isHostMemDirectAccess() &&
(gpuMem(dstMemory).memoryType() != Resource::ExternalPhysical)) ||
gpuMem(dstMemory).isPersistentDirectMap()) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::writeBufferRect(srcHost, dstMemory, hostRect, bufRect, size, entire);
synchronize();
return result;
@@ -2071,6 +2108,8 @@ bool KernelBlitManager::fillBuffer(device::Memory& memory, const void* pattern,
// Use host fill if memory has direct access
if (setup_.disableFillBuffer_ || (!forceBlit && gpuMem(memory).isHostMemDirectAccess())) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::fillBuffer(memory, pattern, patternSize, origin, size, entire);
synchronize();
return result;
@@ -2212,6 +2251,8 @@ bool KernelBlitManager::fillImage(device::Memory& memory, const void* pattern,
// Use host fill if memory has direct access
if (setup_.disableFillImage_ || gpuMem(memory).isHostMemDirectAccess()) {
gpu().releaseGpuMemoryFence();
result = HostBlitManager::fillImage(memory, pattern, origin, size, entire);
synchronize();
return result;
+13 -1
Просмотреть файл
@@ -1216,7 +1216,7 @@ void VirtualGPU::submitReadMemory(amd::ReadMemoryCommand& vcmd) {
if (size[0] <= dev().settings().pinnedMinXferSize_) {
partial = size[0];
}
// Make first step transfer
// Make first step transfer
if (partial > 0) {
result = blitMgr().readBuffer(*memory, vcmd.destination(), origin, partial);
}
@@ -2596,6 +2596,9 @@ bool VirtualGPU::submitKernelInternal(const amd::NDRangeContainer& sizes, const
dev().rgpCaptureMgr()->PostDispatch(this);
}
// Mark the flag indicating if a dispatch is outstanding.
state_.hasPendingDispatch_ = true;
return true;
}
@@ -3833,4 +3836,13 @@ void* VirtualGPU::getOrCreateHostcallBuffer() {
}
return hostcallBuffer_;
}
void VirtualGPU::releaseGpuMemoryFence() {
if (isPendingDispatch() && amd::IS_HIP) {
WaitForIdleCompute();
// Reset the status.
state_.hasPendingDispatch_ = false;
}
}
} // namespace pal
+9
Просмотреть файл
@@ -226,6 +226,8 @@ class VirtualGPU : public device::VirtualDevice {
uint profileEnabled_ : 1; //!< Profiling is enabled for WaveLimiter
uint perfCounterEnabled_ : 1; //!< PerfCounter is enabled
uint rgpCaptureEnabled_ : 1; //!< RGP capture is enabled in the runtime
uint imageBufferWrtBack_ : 1; //!< Enable image buffer write back
uint hasPendingDispatch_ : 1; //!< A kernel dispatch is outstanding
};
uint value_;
State() : value_(0) {}
@@ -553,6 +555,13 @@ class VirtualGPU : public device::VirtualDevice {
void* getOrCreateHostcallBuffer();
//! Waits on an outstanding kernel.
void VirtualGPU::releaseGpuMemoryFence();
//! Returns true if a dispatch is pending.
bool isPendingDispatch() const { return state_.hasPendingDispatch_; }
protected:
void profileEvent(EngineType engine, bool type) const;