From c9dd95bf6cfd9bc4522123d4524c4e260da8bfe5 Mon Sep 17 00:00:00 2001 From: Saleel Kudchadker Date: Thu, 9 Jan 2025 21:36:07 +0000 Subject: [PATCH] SWDEV-504494 - Use system scope for D2H - When using shader copy, make sure to use release scope for the AQL packet. This is a potential bug but is hidden as hipMemcpyAsync always needs synchronization(which inserts a barrier with release scope). For hipMemcpy we use a barrier packet to make sure its blocking. Eitherways a barrier gets always used and hides in some ways a potential bug. Change-Id: I57fb7f769c3179e76d712471c0905104c801d7ba --- rocclr/device/rocm/rocblit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rocclr/device/rocm/rocblit.cpp b/rocclr/device/rocm/rocblit.cpp index 1fcd23c6fe..ad7db8cf9c 100644 --- a/rocclr/device/rocm/rocblit.cpp +++ b/rocclr/device/rocm/rocblit.cpp @@ -2261,7 +2261,8 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds // Check CL_MEM_SVM_ATOMICS flag to see if we used system_coarse_segment_ auto memFlags = srcMemory.owner()->getMemFlags(); bool srcSvmAtomics = (memFlags & CL_MEM_SVM_ATOMICS) != 0; - if (!srcSvmAtomics && srcMemory.isHostMemDirectAccess()) { + if ((!srcSvmAtomics && srcMemory.isHostMemDirectAccess()) || + (dstMemory.isHostMemDirectAccess())) { gpu().addSystemScope(); } result = shaderCopyBuffer(reinterpret_cast
(dstMemory.virtualAddress()),