diff --git a/CMakeLists.txt b/CMakeLists.txt index 25478b129b..4fe99908d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ ## The University of Illinois/NCSA ## Open Source License (NCSA) ## -## Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +## Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. ## ## Developed by: ## @@ -87,7 +87,7 @@ include(utils) ## Get version strings -get_version("1.15.0") +get_version("1.16.0") if (${ROCM_PATCH_VERSION}) set(VERSION_PATCH ${ROCM_PATCH_VERSION}) endif() diff --git a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp index a1d48c68c2..47427de931 100644 --- a/runtime/hsa-runtime/core/common/hsa_table_interface.cpp +++ b/runtime/hsa-runtime/core/common/hsa_table_interface.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -1007,7 +1007,16 @@ hsa_status_t HSA_API hsa_status_t HSA_API hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent, uint32_t *engine_ids_mask) { - return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent, engine_ids_mask); + return amdExtTable->hsa_amd_memory_copy_engine_status_fn(dst_agent, src_agent, + engine_ids_mask); +} + +// Mirrors Amd Extension Apis +hsa_status_t HSA_API + hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent, + uint32_t* recommended_ids_mask) { + return amdExtTable->hsa_amd_memory_get_preferred_copy_engine_fn(dst_agent, src_agent, + recommended_ids_mask); } // Mirrors Amd Extension Apis diff --git a/runtime/hsa-runtime/core/inc/agent.h b/runtime/hsa-runtime/core/inc/agent.h index 906ce869c5..491b4a6d95 100644 --- a/runtime/hsa-runtime/core/inc/agent.h +++ b/runtime/hsa-runtime/core/inc/agent.h @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -201,6 +201,18 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { return HSA_STATUS_ERROR; } + // @brief Return DMA availability status for copy direction. + // + // @param [in] dst_agent Destination agent. + // @param [in] src_agent Source agent. + // @param [out] recommended_ids_mask Mask of recommended engine ids. + // + // @retval HSA_STATUS_SUCCESS For mask returned + virtual hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent, + uint32_t* recommended_ids_mask) { + return HSA_STATUS_ERROR; + } + // @brief Submit DMA command to set the content of a pointer and wait // until it is finished. // diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 8efc271c90..01fa1cbfde 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -311,6 +311,10 @@ class GpuAgent : public GpuAgentInt { hsa_status_t DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_agent, uint32_t *engine_ids_mask) override; + // @brief Override from core::Agent. + hsa_status_t DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent, + uint32_t* recommended_ids_mask) override; + // @brief Override from core::Agent. hsa_status_t DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset, diff --git a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h index 68c98737ab..b2d03de55f 100644 --- a/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h +++ b/runtime/hsa-runtime/core/inc/hsa_ext_amd_impl.h @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -166,6 +166,11 @@ hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent, uint32_t *engine_ids_mask); +// Mirrors Amd Extension Apis +hsa_status_t + hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent, + uint32_t* recommended_ids_mask); + // Mirrors Amd Extension Apis hsa_status_t hsa_amd_memory_async_copy_rect( const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, diff --git a/runtime/hsa-runtime/core/inc/runtime.h b/runtime/hsa-runtime/core/inc/runtime.h index 1f88106a48..ad5958f34a 100644 --- a/runtime/hsa-runtime/core/inc/runtime.h +++ b/runtime/hsa-runtime/core/inc/runtime.h @@ -283,6 +283,16 @@ class Runtime { hsa_status_t CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_agent, uint32_t *engine_ids_mask); + /// @brief Get preferred SDMA engine for the copy direction + /// + /// @param [in] dst_agent Destination agent. + /// @param [in] src_agent Source agent. + /// @param [out] recommended_ids_mask Mask of recommended_ids. + /// + /// @retval HSA_STATUS_SUCCESS For mask returned + hsa_status_t GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent, + uint32_t* recommended_ids_mask); + /// @brief Fill the first @p count of uint32_t in ptr with value. /// /// @param [in] ptr Memory address to be filled. diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 9fd6e035fa..1b62fe707d 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -1246,6 +1246,17 @@ hsa_status_t GpuAgent::DmaCopyStatus(core::Agent& dst_agent, core::Agent& src_ag return !!(*engine_ids_mask) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR_OUT_OF_RESOURCES; } +hsa_status_t GpuAgent::DmaPreferredEngine(core::Agent& dst_agent, core::Agent& src_agent, + uint32_t *recommended_ids_mask) { + assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) || + (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) && + ("Both devices are CPU agents which is not expected")); + + *recommended_ids_mask = rec_sdma_eng_id_peers_info_[dst_agent.public_handle().handle]; + + return HSA_STATUS_SUCCESS; +} + hsa_status_t GpuAgent::DmaCopyRect(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_amd_copy_direction_t dir, diff --git a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp index ea2a831a13..e6640b4c8e 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_api_trace.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -87,7 +87,7 @@ void HsaApiTable::Init() { // they can add preprocessor macros on the new functions constexpr size_t expected_core_api_table_size = 1016; - constexpr size_t expected_amd_ext_table_size = 592; + constexpr size_t expected_amd_ext_table_size = 600; constexpr size_t expected_image_ext_table_size = 128; constexpr size_t expected_finalizer_ext_table_size = 64; constexpr size_t expected_tools_table_size = 64; @@ -475,6 +475,7 @@ void HsaApiTable::UpdateAmdExts() { amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info; amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging; amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all; + amd_ext_api.hsa_amd_memory_get_preferred_copy_engine_fn = AMD::hsa_amd_memory_get_preferred_copy_engine; } void HsaApiTable::UpdateTools() { diff --git a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index 4998605fd7..cc27fc1a51 100644 --- a/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -296,7 +296,8 @@ hsa_status_t hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle, hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agent_handle, const void* src, hsa_agent_t src_agent_handle, size_t size, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, - hsa_signal_t completion_signal, hsa_amd_sdma_engine_id_t engine_id, + hsa_signal_t completion_signal, + hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) { TRY; IS_BAD_PTR(dst); @@ -337,7 +338,8 @@ hsa_status_t hsa_amd_memory_async_copy_on_engine(void* dst, hsa_agent_t dst_agen CATCH; } -hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, hsa_agent_t src_agent_handle, +hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, + hsa_agent_t src_agent_handle, uint32_t *engine_ids_mask) { core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle); IS_VALID(dst_agent); @@ -345,7 +347,21 @@ hsa_status_t hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent_handle, hsa core::Agent* src_agent = core::Agent::Convert(src_agent_handle); IS_VALID(src_agent); - return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent, engine_ids_mask); + return core::Runtime::runtime_singleton_->CopyMemoryStatus(dst_agent, src_agent, + engine_ids_mask); +} + +hsa_status_t hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent_handle, + hsa_agent_t src_agent_handle, + uint32_t* recommended_ids_mask) { + core::Agent* dst_agent = core::Agent::Convert(dst_agent_handle); + IS_VALID(dst_agent); + + core::Agent* src_agent = core::Agent::Convert(src_agent_handle); + IS_VALID(src_agent); + + return core::Runtime::runtime_singleton_->GetPreferredEngine(dst_agent, src_agent, + recommended_ids_mask); } hsa_status_t hsa_amd_memory_async_copy_rect( @@ -648,7 +664,7 @@ uint32_t hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signal uint32_t satisfying_signal_idx = core::Signal::WaitMultiple(valid_signals.size(), valid_signals.data(), conds, values, timeout_hint, wait_hint, satisfying_value_vec, false); - + // Map back the index satisfying_signal_idx = valid_signal_ids[satisfying_signal_idx]; diff --git a/runtime/hsa-runtime/core/runtime/runtime.cpp b/runtime/hsa-runtime/core/runtime/runtime.cpp index 53ea8963a3..23b5c0ca97 100644 --- a/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -614,6 +614,18 @@ hsa_status_t Runtime::CopyMemoryStatus(core::Agent* dst_agent, core::Agent* src_ return copy_agent->DmaCopyStatus(*dst_agent, *src_agent, engine_ids_mask); } +hsa_status_t Runtime::GetPreferredEngine(core::Agent* dst_agent, core::Agent* src_agent, + uint32_t* recommended_ids_mask) { + const bool src_gpu = (src_agent->device_type() == core::Agent::DeviceType::kAmdGpuDevice); + core::Agent* copy_agent = (src_gpu) ? src_agent : dst_agent; + + if (dst_agent == src_agent) { + return HSA_STATUS_ERROR_INVALID_AGENT; + } + + return copy_agent->DmaPreferredEngine(*dst_agent, *src_agent, recommended_ids_mask); +} + hsa_status_t Runtime::FillMemory(void* ptr, uint32_t value, size_t count) { // Choose blit agent from pointer info hsa_amd_pointer_info_t info = {}; diff --git a/runtime/hsa-runtime/inc/hsa_api_trace.h b/runtime/hsa-runtime/inc/hsa_api_trace.h index d14d272d5a..b1bfc2e848 100644 --- a/runtime/hsa-runtime/inc/hsa_api_trace.h +++ b/runtime/hsa-runtime/inc/hsa_api_trace.h @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -269,6 +269,7 @@ struct AmdExtTable { decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn; decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn; decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn; + decltype(hsa_amd_memory_get_preferred_copy_engine)* hsa_amd_memory_get_preferred_copy_engine_fn; }; // Table to export HSA Core Runtime Apis diff --git a/runtime/hsa-runtime/inc/hsa_api_trace_version.h b/runtime/hsa-runtime/inc/hsa_api_trace_version.h index 54190803ff..27ebd4c4ca 100644 --- a/runtime/hsa-runtime/inc/hsa_api_trace_version.h +++ b/runtime/hsa-runtime/inc/hsa_api_trace_version.h @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2025, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -58,7 +58,7 @@ // Step Ids of the Api tables exported by Hsa Core Runtime #define HSA_API_TABLE_STEP_VERSION 0x01 #define HSA_CORE_API_TABLE_STEP_VERSION 0x00 -#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x05 +#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x06 #define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00 #define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01 // Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION diff --git a/runtime/hsa-runtime/inc/hsa_ext_amd.h b/runtime/hsa-runtime/inc/hsa_ext_amd.h index 68670ef1c8..9eb5a945f3 100644 --- a/runtime/hsa-runtime/inc/hsa_ext_amd.h +++ b/runtime/hsa-runtime/inc/hsa_ext_amd.h @@ -58,9 +58,10 @@ * - 1.5 - hsa_amd_agent_info: HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES * - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align * - 1.7 - hsa_amd_signal_wait_all + * - 1.8 - hsa_amd_memory_get_preferred_copy_engine */ #define HSA_AMD_INTERFACE_VERSION_MAJOR 1 -#define HSA_AMD_INTERFACE_VERSION_MINOR 7 +#define HSA_AMD_INTERFACE_VERSION_MINOR 8 #ifdef __cplusplus extern "C" { @@ -1775,8 +1776,26 @@ hsa_status_t HSA_API * dst_agent == src_agent is generally used for shader copies. */ hsa_status_t HSA_API - hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent, +hsa_amd_memory_copy_engine_status(hsa_agent_t dst_agent, hsa_agent_t src_agent, uint32_t *engine_ids_mask); + /** + * @brief Returns the preferred SDMA engine mask. + * + * @param[in] dst_agent Destination agent of copy status direction. + * + * @param[in] src_agent Source agent of copy status direction. + * + * @param[out] recommended_ids_mask returns available SDMA engine IDs for max bandwidth + * that can be masked with hsa_amd_sdma_engine_id_t. Can be 0 if there is no preference + * + * @retval ::HSA_STATUS_SUCCESS For mask returned + * + * @retval ::HSA_STATUS_ERROR_INVALID_AGENT dst_agent and src_agent are the same as + * dst_agent == src_agent is generally used for shader copies. + */ +hsa_status_t HSA_API +hsa_amd_memory_get_preferred_copy_engine(hsa_agent_t dst_agent, hsa_agent_t src_agent, + uint32_t* recommended_ids_mask); /* [Provisional API]