From ec5e9673adf0eb3cbcec0cac98d904ba36a2271d Mon Sep 17 00:00:00 2001 From: SaleelK Date: Tue, 16 Sep 2025 12:56:07 -0700 Subject: [PATCH] clr: Use current device copy engine for inter-dev copy (#945) * For inter-device copies always use the SDMA engine of current device * ROCr uses srcAgent SDMA engine, and it could be a remote device --- projects/clr/rocclr/device/rocm/rocblit.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index 2937c4ceee..7b8495eacb 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -539,6 +539,13 @@ inline bool DmaBlitManager::rocrCopyBuffer(address dst, hsa_agent_t& dstAgent, c // Copy on the first available free engine if ROCr returns a valid mask hsa_amd_sdma_engine_id_t copyEngine = static_cast(copyMask); + // Check if engine type is SdmaInter and adjust agents accordingly + // ROCr copy api would always choose SDMA engine of the srcAgent if its a GPU + if (engine == HwQueueEngine::SdmaInter) { + srcAgent = dev().getBackendDevice(); + forceSDMA = true; + } + ClPrint(amd::LOG_DEBUG, amd::LOG_COPY2, "HSA Copy copy_engine=0x%x, dst=0x%zx, src=0x%zx, " "size=%ld, forceSDMA=%d, engineType=%d, wait_event=0x%zx, completion_signal=0x%zx",