From 230a22b395286ddfd958d6785e5a0089937638a4 Mon Sep 17 00:00:00 2001 From: SaleelK Date: Wed, 3 Sep 2025 06:33:29 -0700 Subject: [PATCH] rocr: Workaround for peak SDMA b/w on gfx94x (#626) * Ideally SDMA0/1/2 are the engines to use for H2D/D2H due to physical PCIE proximity * Allow using same src/dst agent for SDMA query apis --- .../core/runtime/amd_gpu_agent.cpp | 27 ++++++++++++++++--- .../hsa-runtime/core/runtime/runtime.cpp | 8 ------ .../runtime/hsa-runtime/inc/hsa_ext_amd.h | 4 --- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 1b6ecb8b86..2edfce416f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -1244,11 +1244,30 @@ hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_ag hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent, uint32_t *recommended_ids_mask) { - assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) || - (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) && - ("Both devices are CPU agents which is not expected")); + // From the collected data, gfx94x performance is better only for first 3 SDMA engines + bool isGfx94x = (isa_->GetMajorVersion() == 9 && + (isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5)); - *recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle]; + if (isGfx94x && + ((src_agent.device_type() == core::Agent::kAmdCpuDevice && + dst_agent.device_type() == core::Agent::kAmdGpuDevice) || + (src_agent.device_type() == core::Agent::kAmdGpuDevice && + dst_agent.device_type() == core::Agent::kAmdCpuDevice))) { + + if (src_agent.device_type() == core::Agent::kAmdCpuDevice) { + // Host to Device: Use SDMA engine 0 if available + *recommended_ids_mask = HSA_AMD_SDMA_ENGINE_0; + } else { + // Device to Host: Use SDMA engines 1 and 2 if available + *recommended_ids_mask = HSA_AMD_SDMA_ENGINE_1; + + if (properties_.NumSdmaEngines + properties_.NumSdmaXgmiEngines > 2) { + *recommended_ids_mask |= HSA_AMD_SDMA_ENGINE_2; + } + } + } else { + *recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle]; + } return HSA_STATUS_SUCCESS; } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index 8451095527..180063d14a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -605,10 +605,6 @@ hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_ const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent; - if (dst_agent == src_agent) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask); } @@ -617,10 +613,6 @@ hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* sr const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent; - if (dst_agent == src_agent) { - return HSA_STATUS_ERROR_INVALID_AGENT; - } - return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h index 1c590946b3..4578fbce43 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -1810,8 +1810,6 @@ hsa_status_t HSA_API * * @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Agent does not have available SDMA engines. * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as - * dst_agent == src_agent is generally used for shader copies. */ hsa_status_t HSA_API hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent, @@ -1828,8 +1826,6 @@ hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent, * * @retval ::HSA_STATUS_SUCCESS For mask returned * - * @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as - * dst_agent == src_agent is generally used for shader copies. */ hsa_status_t HSA_API hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,