rocr: return preferred SDMA engine mask
- Add a new AMD extension API to return preferred SDMA engine mask. This can use used in conjunction with copy_on_engine API to get optimal bandwidth.
Этот коммит содержится в:
коммит произвёл
Kudchadker, Saleel
родитель
bdb6e43b54
Коммит
57c0c643ce
@@ -3,7 +3,7 @@
|
||||
## The University of Illinois/NCSA
|
||||
## Open Source License (NCSA)
|
||||
##
|
||||
## Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
## Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
##
|
||||
## Developed by:
|
||||
##
|
||||
@@ -87,7 +87,7 @@ include(utils)
|
||||
|
||||
|
||||
## Get version strings
|
||||
get_version("1.15.0")
|
||||
get_version("1.16.0")
|
||||
if (${ROCM_PATCH_VERSION})
|
||||
set(VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
endif()
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -1007,7 +1007,16 @@ hsa_status_t HSA_API
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
uint32_t *engine_ids_mask) {
|
||||
return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent, engine_ids_mask);
|
||||
return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent,
|
||||
engine_ids_mask);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
uint32_t* recommended_ids_mask) {
|
||||
return amdExtTable->hsa_amd_memory_get_preferred_copy_engine_fn(dst_agent, src_agent,
|
||||
recommended_ids_mask);
|
||||
}
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -201,6 +201,18 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// @brief Return DMA availability status for copy direction.
|
||||
//
|
||||
// @param [in] dst_agent Destination agent.
|
||||
// @param [in] src_agent Source agent.
|
||||
// @param [out] recommended_ids_mask Mask of recommended engine ids.
|
||||
//
|
||||
// @retval HSA_STATUS_SUCCESS For mask returned
|
||||
virtual hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
|
||||
uint32_t* recommended_ids_mask) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
// @brief Submit DMA command to set the content of a pointer and wait
|
||||
// until it is finished.
|
||||
//
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -311,6 +311,10 @@ class GpuAgent : public GpuAgentInt {
|
||||
hsa_status_t DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_agent,
|
||||
uint32_t *engine_ids_mask) override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
|
||||
uint32_t* recommended_ids_mask) override;
|
||||
|
||||
// @brief Override from core::Agent.
|
||||
hsa_status_t DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
|
||||
const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -166,6 +166,11 @@ hsa_status_t
|
||||
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
uint32_t *engine_ids_mask);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t
|
||||
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
uint32_t* recommended_ids_mask);
|
||||
|
||||
// Mirrors Amd Extension Apis
|
||||
hsa_status_t hsa_amd_memory_async_copy_rect(
|
||||
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
|
||||
|
||||
@@ -283,6 +283,16 @@ class Runtime {
|
||||
hsa_status_t CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_agent,
|
||||
uint32_t *engine_ids_mask);
|
||||
|
||||
/// @brief Get preferred SDMA engine for the copy direction
|
||||
///
|
||||
/// @param [in] dst_agent Destination agent.
|
||||
/// @param [in] src_agent Source agent.
|
||||
/// @param [out] recommended_ids_mask Mask of recommended_ids.
|
||||
///
|
||||
/// @retval HSA_STATUS_SUCCESS For mask returned
|
||||
hsa_status_t GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
|
||||
uint32_t* recommended_ids_mask);
|
||||
|
||||
/// @brief Fill the first @p count of uint32_t in ptr with value.
|
||||
///
|
||||
/// @param [in] ptr Memory address to be filled.
|
||||
|
||||
@@ -1246,6 +1246,17 @@ hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_ag
|
||||
return !!(*engine_ids_mask) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent,
|
||||
uint32_t *recommended_ids_mask) {
|
||||
assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
|
||||
(dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
|
||||
("Both devices are CPU agents which is not expected"));
|
||||
|
||||
*recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle];
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset,
|
||||
const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
|
||||
const hsa_dim3_t* range, hsa_amd_copy_direction_t dir,
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -87,7 +87,7 @@ void HsaApiTable::Init() {
|
||||
// they can add preprocessor macros on the new functions
|
||||
|
||||
constexpr size_t expected_core_api_table_size = 1016;
|
||||
constexpr size_t expected_amd_ext_table_size = 592;
|
||||
constexpr size_t expected_amd_ext_table_size = 600;
|
||||
constexpr size_t expected_image_ext_table_size = 128;
|
||||
constexpr size_t expected_finalizer_ext_table_size = 64;
|
||||
constexpr size_t expected_tools_table_size = 64;
|
||||
@@ -475,6 +475,7 @@ void HsaApiTable::UpdateAmdExts() {
|
||||
amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info;
|
||||
amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging;
|
||||
amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all;
|
||||
amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine;
|
||||
}
|
||||
|
||||
void HsaApiTable::UpdateTools() {
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -296,7 +296,8 @@ hsa_status_t hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle,
|
||||
hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent_handle,
|
||||
const void* src, hsa_agent_t src_agent_handle, size_t size,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal, hsa_amd_sdma_engine_id_t engine_id,
|
||||
hsa_signal_t completion_signal,
|
||||
hsa_amd_sdma_engine_id_t engine_id,
|
||||
bool force_copy_on_sdma) {
|
||||
TRY;
|
||||
IS_BAD_PTR(dst);
|
||||
@@ -337,7 +338,8 @@ hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agen
|
||||
CATCH;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, hsa_agent_t src_agent_handle,
|
||||
hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle,
|
||||
hsa_agent_t src_agent_handle,
|
||||
uint32_t *engine_ids_mask) {
|
||||
core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
|
||||
IS_VALID(dst_agent);
|
||||
@@ -345,7 +347,21 @@ hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, hsa
|
||||
core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
|
||||
IS_VALID(src_agent);
|
||||
|
||||
return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent, engine_ids_mask);
|
||||
return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent,
|
||||
engine_ids_mask);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent_handle,
|
||||
hsa_agent_t src_agent_handle,
|
||||
uint32_t* recommended_ids_mask) {
|
||||
core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle);
|
||||
IS_VALID(dst_agent);
|
||||
|
||||
core::Agent* src_agent = core::Agent::Convert(src_agent_handle);
|
||||
IS_VALID(src_agent);
|
||||
|
||||
return core::Runtime::runtime_singleton_->GetPreferredEngine(dst_agent, src_agent,
|
||||
recommended_ids_mask);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_rect(
|
||||
@@ -648,7 +664,7 @@ uint32_t hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signal
|
||||
uint32_t satisfying_signal_idx =
|
||||
core::Signal::WaitMultiple(valid_signals.size(), valid_signals.data(), conds, values, timeout_hint, wait_hint,
|
||||
satisfying_value_vec, false);
|
||||
|
||||
|
||||
// Map back the index
|
||||
satisfying_signal_idx = valid_signal_ids[satisfying_signal_idx];
|
||||
|
||||
|
||||
@@ -614,6 +614,18 @@ hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_
|
||||
return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask);
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent,
|
||||
uint32_t* recommended_ids_mask) {
|
||||
const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice);
|
||||
core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent;
|
||||
|
||||
if (dst_agent == src_agent) {
|
||||
return HSA_STATUS_ERROR_INVALID_AGENT;
|
||||
}
|
||||
|
||||
return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask);
|
||||
}
|
||||
|
||||
hsa_status_t Runtime::FillMemory(void* ptr, uint32_t value, size_t count) {
|
||||
// Choose blit agent from pointer info
|
||||
hsa_amd_pointer_info_t info = {};
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -269,6 +269,7 @@ struct AmdExtTable {
|
||||
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
|
||||
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
|
||||
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
|
||||
decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn;
|
||||
};
|
||||
|
||||
// Table to export HSA Core Runtime Apis
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
@@ -58,7 +58,7 @@
|
||||
// Step Ids of the Api tables exported by Hsa Core Runtime
|
||||
#define HSA_API_TABLE_STEP_VERSION 0x01
|
||||
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x05
|
||||
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x06
|
||||
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
|
||||
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
|
||||
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
|
||||
|
||||
@@ -58,9 +58,10 @@
|
||||
* - 1.5 - hsa_amd_agent_info: HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES
|
||||
* - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
|
||||
* - 1.7 - hsa_amd_signal_wait_all
|
||||
* - 1.8 - hsa_amd_memory_get_preferred_copy_engine
|
||||
*/
|
||||
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 7
|
||||
#define HSA_AMD_INTERFACE_VERSION_MINOR 8
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -1775,8 +1776,26 @@ hsa_status_t HSA_API
|
||||
* dst_agent == src_agent is generally used for shader copies.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
uint32_t *engine_ids_mask);
|
||||
/**
|
||||
* @brief Returns the preferred SDMA engine mask.
|
||||
*
|
||||
* @param[in] dst_agent Destination agent of copy status direction.
|
||||
*
|
||||
* @param[in] src_agent Source agent of copy status direction.
|
||||
*
|
||||
* @param[out] recommended_ids_mask returns available SDMA engine IDs for max bandwidth
|
||||
* that can be masked with hsa_amd_sdma_engine_id_t. Can be 0 if there is no preference
|
||||
*
|
||||
* @retval ::HSA_STATUS_SUCCESS For mask returned
|
||||
*
|
||||
* @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as
|
||||
* dst_agent == src_agent is generally used for shader copies.
|
||||
*/
|
||||
hsa_status_t HSA_API
|
||||
hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent,
|
||||
uint32_t* recommended_ids_mask);
|
||||
|
||||
/*
|
||||
[Provisional API]
|
||||
|
||||
Ссылка в новой задаче
Block a user