rocr: Workaround for peak SDMA b/w on gfx94x (#626)

* Ideally SDMA0/1/2 are the engines to use for H2D/D2H due to physical
  PCIE proximity
* Allow using same src/dst agent for SDMA query apis
This commit is contained in:
SaleelK
2025-09-03 06:33:29 -07:00
committed by GitHub
orang tua a57fd50865
melakukan 230a22b395
3 mengubah file dengan 23 tambahan dan 16 penghapusan
@@ -1244,11 +1244,30 @@ hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_ag
hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
uint32_t *recommended_ids_mask) {
assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
(dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
("Both devices are CPU agents which is not expected"));
// From the collected data, gfx94x performance is better only for first 3 SDMA engines
bool isGfx94x = (isa_->GetMajorVersion() == 9 &&
(isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5));
*recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];
if (isGfx94x &&
((src_agent.device_type() == core::Agent::kAmdCpuDevice &&
dst_agent.device_type() == core::Agent::kAmdGpuDevice) ||
(src_agent.device_type() == core::Agent::kAmdGpuDevice &&
dst_agent.device_type() == core::Agent::kAmdCpuDevice))) {
if (src_agent.device_type() == core::Agent::kAmdCpuDevice) {
// Host to Device: Use SDMA engine 0 if available
*recommended_ids_mask = HSA_AMD_SDMA_ENGINE_0;
} else {
// Device to Host: Use SDMA engines 1 and 2 if available
*recommended_ids_mask = HSA_AMD_SDMA_ENGINE_1;
if (properties_.NumSdmaEngines + properties_.NumSdmaXgmiEngines > 2) {
*recommended_ids_mask |= HSA_AMD_SDMA_ENGINE_2;
}
}
} else {
*recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];
}
return HSA_STATUS_SUCCESS;
}
@@ -605,10 +605,6 @@ hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_
const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;
if (dst_agent == src_agent) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask);
}
@@ -617,10 +613,6 @@ hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* sr
const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;
if (dst_agent == src_agent) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask);
}
@@ -1810,8 +1810,6 @@ hsa_status_t HSA_API
*
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Agent does not have available SDMA engines.
*
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
* dst_agent == src_agent is generally used for shader copies.
*/
hsa_status_t HSA_API
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
@@ -1828,8 +1826,6 @@ hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
*
* @retval ::HSA_STATUS_SUCCESS For mask returned
*
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
* dst_agent == src_agent is generally used for shader copies.
*/
hsa_status_t HSA_API
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,