rocr: Workaround for peak SDMA b/w on gfx94x (#626)
* Ideally SDMA0/1/2 are the engines to use for H2D/D2H due to physical PCIE proximity * Allow using same src/dst agent for SDMA query apis
This commit is contained in:
@@ -1244,11 +1244,30 @@ hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_ag
|
||||
|
||||
hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
|
||||
uint32_t *recommended_ids_mask) {
|
||||
assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
|
||||
(dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
|
||||
("Both devices are CPU agents which is not expected"));
|
||||
// From the collected data, gfx94x performance is better only for first 3 SDMA engines
|
||||
bool isGfx94x = (isa_->GetMajorVersion() == 9 &&
|
||||
(isa_->GetMinorVersion() == 4 || isa_->GetMinorVersion() == 5));
|
||||
|
||||
*recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];
|
||||
if (isGfx94x &&
|
||||
((src_agent.device_type() == core::Agent::kAmdCpuDevice &&
|
||||
dst_agent.device_type() == core::Agent::kAmdGpuDevice) ||
|
||||
(src_agent.device_type() == core::Agent::kAmdGpuDevice &&
|
||||
dst_agent.device_type() == core::Agent::kAmdCpuDevice))) {
|
||||
|
||||
if (src_agent.device_type() == core::Agent::kAmdCpuDevice) {
|
||||
// Host to Device: Use SDMA engine 0 if available
|
||||
*recommended_ids_mask = HSA_AMD_SDMA_ENGINE_0;
|
||||
} else {
|
||||
// Device to Host: Use SDMA engines 1 and 2 if available
|
||||
*recommended_ids_mask = HSA_AMD_SDMA_ENGINE_1;
|
||||
|
||||
if (properties_.NumSdmaEngines + properties_.NumSdmaXgmiEngines > 2) {
|
||||
*recommended_ids_mask |= HSA_AMD_SDMA_ENGINE_2;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -605,10 +605,6 @@ hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_
|
||||
const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
|
||||
core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;
|
||||
|
||||
if (dst_agent == src_agent) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask);
|
||||
}
|
||||
|
||||
@@ -617,10 +613,6 @@ hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* sr
|
||||
const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
|
||||
core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;
|
||||
|
||||
if (dst_agent == src_agent) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask);
|
||||
}
|
||||
|
||||
|
||||
@@ -1810,8 +1810,6 @@ hsa_status_t HSA_API
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Agent does not have available SDMA engines.
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
|
||||
* dst_agent == src_agent is generally used for shader copies.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
@@ -1828,8 +1826,6 @@ hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS For mask returned
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
|
||||
* dst_agent == src_agent is generally used for shader copies.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
|
||||
Reference in New Issue
Block a user