SWDEV-519596 - Avoid passing dep signal to SDMA

- For D2H cases avoid passing dependent signals to SDMA, the signals
  take a while to resolve on SDMA engine

Change-Id: I569635228af977847f201c82ca897002f8f2f4a8


[ROCm/clr commit: 78d0ff2dbc]
This commit is contained in:
Saleel Kudchadker
2025-03-06 19:43:52 +00:00
bovenliggende 021ca96766
commit c94c02a2e6
@@ -561,7 +561,7 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
address src = reinterpret_cast<address>(srcMemory.getDeviceMemory());
address dst = reinterpret_cast<address>(dstMemory.getDeviceMemory());
gpu().releaseGpuMemoryFence(kSkipCpuWait);
bool skipCpuWait = true;
src += srcOrigin[0];
dst += dstOrigin[0];
@@ -581,6 +581,15 @@ bool DmaBlitManager::hsaCopy(const Memory& srcMemory, const Memory& dstMemory,
dstAgent = dstMemory.dev().getBackendDevice();
}
// Blocking D2H copies need a wait anyways so better wait here
// than having to wait on the device for dependent signals for SDMA which is slow
if (!copyMetadata.isAsync_ && !srcMemory.isHostMemDirectAccess()
&& dstMemory.isHostMemDirectAccess()) {
skipCpuWait = false;
}
gpu().releaseGpuMemoryFence(skipCpuWait);
return rocrCopyBuffer(dst, dstAgent, src, srcAgent, size[0], copyMetadata);
}
@@ -636,7 +645,8 @@ void DmaBlitManager::releaseBuffer(BufferState &buffer) const {
bool DmaBlitManager::hsaCopyStagedOrPinned(const_address hostSrc, address hostDst,
size_t size, bool hostToDev, amd::CopyMetadata& copyMetadata,
bool enablePin) const {
gpu().releaseGpuMemoryFence(kSkipCpuWait);
// Do not skip wait here for D2H. Resolving dependent signals for SDMA engine is slow
gpu().releaseGpuMemoryFence(hostToDev);
// If Pinning is enabled, Pin host Memory for copy size > MinSizeForPinnedTransfer
// For 16KB < size <= MinSizeForPinnedTransfer Use staging buffer without pinning
bool status = true;
@@ -698,8 +708,6 @@ bool DmaBlitManager::hsaCopyStagedOrPinned(const_address hostSrc, address hostDs
return false;
}
gpu().addSystemScope();
return true;
}
// ================================================================================================