rocr: return preferred SDMA engine mask

- Add a new AMD extension API to return preferred SDMA engine mask.
This can use used in conjunction with copy_on_engine API to get
optimal bandwidth.
Этот коммит содержится в:
Saleel Kudchadker
2025-04-10 14:47:00 +00:00
коммит произвёл Kudchadker, Saleel
родитель bdb6e43b54
Коммит 57c0c643ce
13 изменённых файлов: 119 добавлений и 19 удалений
+2 -2
Просмотреть файл
@@ -3,7 +3,7 @@
## The University of Illinois/NCSA
## Open Source License (NCSA)
##
## Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
## Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
##
## Developed by:
##
@@ -87,7 +87,7 @@ include(utils)
## Get version strings
get_version("1.15.0")
get_version("1.16.0")
if (${ROCM_PATCH_VERSION})
set(VERSION_PATCH ${ROCM_PATCH_VERSION})
endif()
+11 -2
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -1007,7 +1007,16 @@ hsa_status_t HSA_API
hsa_status_t HSA_API
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
uint32_t *engine_ids_mask) {
return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent, engine_ids_mask);
return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent,
engine_ids_mask);
}
// Mirrors Amd Extension Apis
hsa_status_t HSA_API
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
uint32_t* recommended_ids_mask) {
return amdExtTable->hsa_amd_memory_get_preferred_copy_engine_fn(dst_agent, src_agent,
recommended_ids_mask);
}
// Mirrors Amd Extension Apis
+13 -1
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -201,6 +201,18 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
return HSA_STATUS_ERROR;
}
// @brief Return DMA availability status for copy direction.
//
// @param [in] dst_agent Destination agent.
// @param [in] src_agent Source agent.
// @param [out] recommended_ids_mask Mask of recommended engine ids.
//
// @retval HSA_STATUS_SUCCESS For mask returned
virtual hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
uint32_t* recommended_ids_mask) {
return HSA_STATUS_ERROR;
}
// @brief Submit DMA command to set the content of a pointer and wait
// until it is finished.
//
+5 -1
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -311,6 +311,10 @@ class GpuAgent : public GpuAgentInt {
hsa_status_t DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_agent,
uint32_t *engine_ids_mask) override;
// @brief Override from core::Agent.
hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
uint32_t* recommended_ids_mask) override;
// @brief Override from core::Agent.
hsa_status_t DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
+6 -1
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -166,6 +166,11 @@ hsa_status_t
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
uint32_t *engine_ids_mask);
// Mirrors Amd Extension Apis
hsa_status_t
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
uint32_t* recommended_ids_mask);
// Mirrors Amd Extension Apis
hsa_status_t hsa_amd_memory_async_copy_rect(
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
+10
Просмотреть файл
@@ -283,6 +283,16 @@ class Runtime {
hsa_status_t CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_agent,
uint32_t *engine_ids_mask);
/// @brief Get preferred SDMA engine for the copy direction
///
/// @param [in] dst_agent Destination agent.
/// @param [in] src_agent Source agent.
/// @param [out] recommended_ids_mask Mask of recommended_ids.
///
/// @retval HSA_STATUS_SUCCESS For mask returned
hsa_status_t GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
uint32_t* recommended_ids_mask);
/// @brief Fill the first @p count of uint32_t in ptr with value.
///
/// @param [in] ptr Memory address to be filled.
+11
Просмотреть файл
@@ -1246,6 +1246,17 @@ hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_ag
return !!(*engine_ids_mask) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
uint32_t *recommended_ids_mask) {
assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
(dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
("Both devices are CPU agents which is not expected"));
*recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];
return HSA_STATUS_SUCCESS;
}
hsa_status_t GpuAgent::DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
const hsa_dim3_t* range, hsa_amd_copy_direction_t dir,
+3 -2
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -87,7 +87,7 @@ void HsaApiTable::Init() {
// they can add preprocessor macros on the new functions
constexpr size_t expected_core_api_table_size = 1016;
constexpr size_t expected_amd_ext_table_size = 592;
constexpr size_t expected_amd_ext_table_size = 600;
constexpr size_t expected_image_ext_table_size = 128;
constexpr size_t expected_finalizer_ext_table_size = 64;
constexpr size_t expected_tools_table_size = 64;
@@ -475,6 +475,7 @@ void HsaApiTable::UpdateAmdExts() {
amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info;
amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging;
amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all;
amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine;
}
void HsaApiTable::UpdateTools() {
+21 -5
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -296,7 +296,8 @@ hsa_status_t hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle,
hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent_handle,
const void* src, hsa_agent_t src_agent_handle, size_t size,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal, hsa_amd_sdma_engine_id_t engine_id,
hsa_signal_t completion_signal,
hsa_amd_sdma_engine_id_t engine_id,
bool force_copy_on_sdma) {
TRY;
IS_BAD_PTR(dst);
@@ -337,7 +338,8 @@ hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agen
CATCH;
}
hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, hsa_agent_t src_agent_handle,
hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle,
hsa_agent_t src_agent_handle,
uint32_t *engine_ids_mask) {
core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
IS_VALID(dst_agent);
@@ -345,7 +347,21 @@ hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, hsa
core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
IS_VALID(src_agent);
return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent, engine_ids_mask);
return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent,
engine_ids_mask);
}
hsa_status_t hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent_handle,
hsa_agent_t src_agent_handle,
uint32_t* recommended_ids_mask) {
core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
IS_VALID(dst_agent);
core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
IS_VALID(src_agent);
return core::Runtime::runtime_singleton_->GetPreferredEngine(dst_agent, src_agent,
recommended_ids_mask);
}
hsa_status_t hsa_amd_memory_async_copy_rect(
@@ -648,7 +664,7 @@ uint32_t hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signal
uint32_t satisfying_signal_idx =
core::Signal::WaitMultiple(valid_signals.size(), valid_signals.data(), conds, values, timeout_hint, wait_hint,
satisfying_value_vec, false);
// Map back the index
satisfying_signal_idx = valid_signal_ids[satisfying_signal_idx];
+12
Просмотреть файл
@@ -614,6 +614,18 @@ hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_
return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask);
}
hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
uint32_t* recommended_ids_mask) {
const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;
if (dst_agent == src_agent) {
return HSA_STATUS_ERROR_INVALID_AGENT;
}
return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask);
}
hsa_status_t Runtime::FillMemory(void* ptr, uint32_t value, size_t count) {
// Choose blit agent from pointer info
hsa_amd_pointer_info_t info = {};
+2 -1
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -269,6 +269,7 @@ struct AmdExtTable {
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
};
// Table to export HSA Core Runtime Apis
+2 -2
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -58,7 +58,7 @@
// Step Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_STEP_VERSION 0x01
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x05
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x06
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
+21 -2
Просмотреть файл
@@ -58,9 +58,10 @@
* - 1.5 - hsa_amd_agent_info: HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES
* - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
* - 1.7 - hsa_amd_signal_wait_all
* - 1.8 - hsa_amd_memory_get_preferred_copy_engine
*/
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
#define HSA_AMD_INTERFACE_VERSION_MINOR 7
#define HSA_AMD_INTERFACE_VERSION_MINOR 8
#ifdef __cplusplus
extern "C" {
@@ -1775,8 +1776,26 @@ hsa_status_t HSA_API
* dst_agent == src_agent is generally used for shader copies.
*/
hsa_status_t HSA_API
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
uint32_t *engine_ids_mask);
/**
* @brief Returns the preferred SDMA engine mask.
*
* @param[in] dst_agent Destination agent of copy status direction.
*
* @param[in] src_agent Source agent of copy status direction.
*
* @param[out] recommended_ids_mask returns available SDMA engine IDs for max bandwidth
* that can be masked with hsa_amd_sdma_engine_id_t. Can be 0 if there is no preference
*
* @retval ::HSA_STATUS_SUCCESS For mask returned
*
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
* dst_agent == src_agent is generally used for shader copies.
*/
hsa_status_t HSA_API
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
uint32_t* recommended_ids_mask);
/*
[Provisional API]