From c8e91b3f3e6cd7c52ecaf4ec8fa59888f0ad89af Mon Sep 17 00:00:00 2001 From: SaleelK Date: Tue, 9 Sep 2025 13:13:29 -0700 Subject: [PATCH] clr: Fix condition for taking shader path (#884) * SWDEV-551080 * Fix condition for taking shader path, the size check was moved incorrectly * Also account for a bitmask returned for preferred engines --- projects/clr/rocclr/device/rocm/rocblit.cpp | 34 ++++++++++++++------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index 842d322554..2937c4ceee 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -526,7 +526,7 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c // If requested engine is valid and available, use it if (recIdMask != 0 && (freeEngineMask & recIdMask) != 0) { - copyMask = recIdMask; + copyMask = recIdMask - (recIdMask & (recIdMask - 1)); } else { // Otherwise use first available engine copyMask = freeEngineMask - (freeEngineMask & (freeEngineMask - 1)); @@ -2252,17 +2252,29 @@ bool KernelBlitManager::copyBuffer(device::Memory& srcMemory, device::Memory& ds } } - bool ipcShared = srcMemory.owner()->ipcShared() || dstMemory.owner()->ipcShared(); + // Determine if we should use shader copy path based on various conditions + bool hwlCopyDisabled = setup_.disableHwlCopyBuffer_; - bool useShaderCopyPath = setup_.disableHwlCopyBuffer_ || - (copyMetadata.copyEnginePreference_ == - amd::CopyMetadata::CopyEnginePreference::BLIT) || - (sizeIn[0] <= dev().settings().sdmaCopyThreshold_ && - !(p2p || ipcShared) && - !srcMemory.isHostMemDirectAccess() && - !dstMemory.isHostMemDirectAccess() && - copyMetadata.copyEnginePreference_ != - amd::CopyMetadata::CopyEnginePreference::SDMA); + // Check copy engine preferences + bool isSdmaPreference = + copyMetadata.copyEnginePreference_ == amd::CopyMetadata::CopyEnginePreference::SDMA; + bool isBlitPreference = + copyMetadata.copyEnginePreference_ == amd::CopyMetadata::CopyEnginePreference::BLIT; + + // Check memory access patterns + bool isP2pOrIpc = p2p || srcMemory.owner()->ipcShared() || dstMemory.owner()->ipcShared(); + bool neitherMemoryIsHostDirectAccess = + !srcMemory.isHostMemDirectAccess() && !dstMemory.isHostMemDirectAccess(); + + // Determine shader copy path conditions + bool smallSizeWithNonSdmaPreference = + sizeIn[0] <= dev().settings().sdmaCopyThreshold_ && !isSdmaPreference; + + bool nonP2PIpcOrDirectAccess = + !isP2pOrIpc && neitherMemoryIsHostDirectAccess && !isSdmaPreference; + + const bool useShaderCopyPath = hwlCopyDisabled || smallSizeWithNonSdmaPreference || + nonP2PIpcOrDirectAccess || isBlitPreference; if (!useShaderCopyPath) { if (amd::IS_HIP) {