From 6c87f5b5cea3350080ff0ade573a643dbd43059c Mon Sep 17 00:00:00 2001 From: Honglei Huang Date: Fri, 11 Jul 2025 13:54:39 +0800 Subject: [PATCH] rocr/driver: add memory residency management interface in driver This commit introduces MakeMemoryResident and MakeMemoryUnresident functions to KfdDriver and XdnaDriver classes. - Added implementations in amd_kfd_driver.cpp - Added stubs in amd_xdna_driver.cpp returning HSA_STATUS_ERROR - Updated header files amd_kfd_driver.h and amd_xdna_driver.h - Removed MakeKfdMemoryResident/Unresident from amd_memory_region.cpp Signed-off-by: Honglei Huang --- .../core/driver/kfd/amd_kfd_driver.cpp | 25 +++++++++++++ .../core/driver/xdna/amd_xdna_driver.cpp | 8 ++++ runtime/hsa-runtime/core/inc/amd_kfd_driver.h | 4 ++ .../hsa-runtime/core/inc/amd_memory_region.h | 7 ---- .../hsa-runtime/core/inc/amd_xdna_driver.h | 4 ++ runtime/hsa-runtime/core/inc/driver.h | 18 +++++++++ .../core/runtime/amd_gpu_agent.cpp | 14 +++---- .../core/runtime/amd_memory_region.cpp | 37 ++++++------------- 8 files changed, 77 insertions(+), 40 deletions(-) diff --git a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 4d1bfc6602..2c3d518430 100644 --- a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -638,6 +638,31 @@ hsa_status_t KfdDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_SUCCESS; } +hsa_status_t KfdDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags, uint32_t num_nodes, + const uint32_t* nodes) const { + if (mem_flags == nullptr && nodes == nullptr) { + if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(const_cast(mem), size, alternate_va)) != + HSAKMT_STATUS_SUCCESS) { + return HSA_STATUS_ERROR; + } + } else if (mem_flags != nullptr && nodes != nullptr) { + if (!MakeKfdMemoryResident(num_nodes, nodes, mem, size, alternate_va, *mem_flags)) { + return HSA_STATUS_ERROR; + } + } else { + debug_print("Invalid memory flags ptr:%p nodes ptr:%p\n", mem_flags, nodes); + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::MakeMemoryUnresident(const void* mem) const { + HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast(mem))); + return HSA_STATUS_SUCCESS; +} + hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const { // AIE does not support streaming performance monitor. HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; diff --git a/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index 9ae1043498..58d7ec9d17 100644 --- a/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -908,5 +908,13 @@ hsa_status_t XdnaDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags me hsa_status_t XdnaDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_ERROR; } +hsa_status_t XdnaDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags, uint32_t num_nodes, + const uint32_t* nodes) const { + return HSA_STATUS_ERROR; +} + +hsa_status_t XdnaDriver::MakeMemoryUnresident(const void* mem) const { return HSA_STATUS_ERROR; } + } // namespace AMD } // namespace rocr diff --git a/runtime/hsa-runtime/core/inc/amd_kfd_driver.h b/runtime/hsa-runtime/core/inc/amd_kfd_driver.h index 87d9a77092..ca8ee8a593 100644 --- a/runtime/hsa-runtime/core/inc/amd_kfd_driver.h +++ b/runtime/hsa-runtime/core/inc/amd_kfd_driver.h @@ -132,6 +132,10 @@ public: hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override; hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override; hsa_status_t DeregisterMemory(void* ptr) const override; + hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags, uint32_t num_nodes, + const uint32_t* nodes) const override; + hsa_status_t MakeMemoryUnresident(const void* mem) const override; hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override; diff --git a/runtime/hsa-runtime/core/inc/amd_memory_region.h b/runtime/hsa-runtime/core/inc/amd_memory_region.h index b6ebaceb4e..13e8313c45 100644 --- a/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -77,13 +77,6 @@ class MemoryRegion : public core::MemoryRegion { return reinterpret_cast(region.handle); } - /// @brief Pin memory. - static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr, - size_t size, uint64_t* alternate_va, HsaMemMapFlags map_flag); - - /// @brief Unpin memory. - static bool MakeKfdMemoryUnresident(const void* ptr); - MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain, bool user_visible, core::Agent* owner, const HsaMemoryProperties& mem_props); diff --git a/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/runtime/hsa-runtime/core/inc/amd_xdna_driver.h index b23f1b2a2e..34d96d3c62 100644 --- a/runtime/hsa-runtime/core/inc/amd_xdna_driver.h +++ b/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -246,6 +246,10 @@ public: hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override; hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override; hsa_status_t DeregisterMemory(void* ptr) const override; + hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags, uint32_t num_nodes, + const uint32_t* nodes) const override; + hsa_status_t MakeMemoryUnresident(const void* mem) const override; hsa_status_t IsModelEnabled(bool* enable) const override; diff --git a/runtime/hsa-runtime/core/inc/driver.h b/runtime/hsa-runtime/core/inc/driver.h index 766b77db55..84d07f4041 100644 --- a/runtime/hsa-runtime/core/inc/driver.h +++ b/runtime/hsa-runtime/core/inc/driver.h @@ -326,6 +326,24 @@ public: /// @return HSA_STATUS_SUCCESS if deregister memory successfully. virtual hsa_status_t DeregisterMemory(void* ptr) const = 0; + /// @brief Make the memory is resident and can be accessed by GPU + /// @param[in] mem address of memory to be made resident + /// @param[in] size size of memory + /// @param[out] alternate_va alternate virtual address + /// @param[in] mem_flags memory flags can be null + /// @param[in] num_nodes number of nodes to be used can be 0 if not used + /// @param[in] nodes nodes to be used can be null + /// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory + virtual hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va, + const HsaMemMapFlags* mem_flags = nullptr, + uint32_t num_nodes = 0, + const uint32_t* nodes = nullptr) const = 0; + + /// @brief Releases the residency of the memory + /// @param[in] mem address of memory to be made unresident + /// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory + virtual hsa_status_t MakeMemoryUnresident(const void* mem) const = 0; + /// Unique identifier for supported kernel-mode drivers. const DriverType kernel_driver_type_; diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 3be9215299..f994fb7fb0 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -556,7 +556,7 @@ void GpuAgent::ReserveScratch() void* reserved_base = scratch_pool_.alloc(reserved_sz); assert(reserved_base && "Could not allocate reserved memory"); - if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(reserved_base, reserved_sz, &alt_va)) == HSAKMT_STATUS_SUCCESS) + if (driver().MakeMemoryResident(reserved_base, reserved_sz, &alt_va) == HSA_STATUS_SUCCESS) scratch_cache_.reserve(reserved_sz, reserved_base); else throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Reserve scratch memory failed."); @@ -1887,8 +1887,8 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) { if (scratch.main_queue_base != nullptr) { HSAuint64 alternate_va; if ((profile_ == HSA_PROFILE_FULL) || - (HSAKMT_CALL(hsaKmtMapMemoryToGPU(scratch.main_queue_base, scratch.main_size, &alternate_va)) == - HSAKMT_STATUS_SUCCESS)) { + (driver().MakeMemoryResident(scratch.main_queue_base, scratch.main_size, + &alternate_va) == HSA_STATUS_SUCCESS)) { if (scratch.large) scratch_used_large_ += scratch.main_size; scratch_cache_.insertMain(scratch); return; @@ -1940,7 +1940,7 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) { HSAuint64 alternate_va; if ((base != nullptr) && ((profile_ == HSA_PROFILE_FULL) || - (HSAKMT_CALL(hsaKmtMapMemoryToGPU(base, size, &alternate_va)) == HSAKMT_STATUS_SUCCESS))) { + (driver().MakeMemoryResident(base, size, &alternate_va) == HSA_STATUS_SUCCESS))) { // Scratch allocated and either full profile or map succeeded. scratch.main_queue_base = base; scratch.main_size = size; @@ -2020,8 +2020,8 @@ void GpuAgent::AcquireQueueAltScratch(ScratchInfo& scratch) { if (scratch.alt_queue_base != nullptr) { HSAuint64 alternate_va; if ((profile_ == HSA_PROFILE_FULL) || - (HSAKMT_CALL(hsaKmtMapMemoryToGPU(scratch.alt_queue_base, scratch.alt_size, &alternate_va)) == - HSAKMT_STATUS_SUCCESS)) { + (driver().MakeMemoryResident(scratch.alt_queue_base, scratch.alt_size, &alternate_va) == + HSA_STATUS_SUCCESS)) { scratch_cache_.insertAlt(scratch); return; } @@ -2061,7 +2061,7 @@ void GpuAgent::ReleaseQueueAltScratch(ScratchInfo& scratch) { void GpuAgent::ReleaseScratch(void* base, size_t size, bool large) { if (profile_ == HSA_PROFILE_BASE) { - if (HSAKMT_STATUS_SUCCESS != HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(base))) { + if (HSA_STATUS_SUCCESS != driver().MakeMemoryUnresident(base)) { assert(false && "Unmap scratch subrange failed!"); } } diff --git a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index 269004e64e..7bcef72091 100644 --- a/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -58,24 +58,6 @@ namespace AMD { size_t MemoryRegion::max_sysmem_alloc_size_ = 0; const size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE); -bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr, - size_t size, uint64_t* alternate_va, - HsaMemMapFlags map_flag) { - assert(num_node > 0); - assert(nodes != NULL); - - *alternate_va = 0; - const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes( - const_cast(ptr), size, alternate_va, map_flag, num_node, const_cast(nodes))); - - return (status == HSAKMT_STATUS_SUCCESS); -} - -bool MemoryRegion::MakeKfdMemoryUnresident(const void* ptr) { - const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast(ptr))); - return (status == HSAKMT_STATUS_SUCCESS); -} - MemoryRegion::MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain, bool user_visible, core::Agent* owner, const HsaMemoryProperties& mem_props) @@ -508,7 +490,7 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents, assert(cpu_in_list); // This is a system region and only CPU agents in the whitelist. // Remove old mappings. - AMD::MemoryRegion::MakeKfdMemoryUnresident(ptr); + owner()->driver().MakeMemoryUnresident(ptr); return HSA_STATUS_SUCCESS; } @@ -528,10 +510,10 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents, ScopedAcquire lock( core::Runtime::runtime_singleton_->memory_lock_.shared()); uint64_t alternate_va = 0; - if (!AMD::MemoryRegion::MakeKfdMemoryResident( - whitelist_nodes.size(), &whitelist_nodes[0], ptr, - size, &alternate_va, map_flag)) { - return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + if (owner()->driver().MakeMemoryResident(ptr, size, &alternate_va, &map_flag, + whitelist_nodes.size(), + whitelist_nodes.data()) != HSA_STATUS_SUCCESS) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } } @@ -599,8 +581,9 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents, if (owner()->driver().RegisterMemory(host_ptr, size, const_cast(mem_flag_)) == HSA_STATUS_SUCCESS) { uint64_t alternate_va = 0; - if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0], - host_ptr, size, &alternate_va, map_flag_)) { + if (owner()->driver().MakeMemoryResident(host_ptr, size, &alternate_va, &map_flag_, + whitelist_nodes.size(), + whitelist_nodes.data()) == HSA_STATUS_SUCCESS) { if (alternate_va != 0) { *agent_ptr = reinterpret_cast(alternate_va); } else { @@ -625,7 +608,9 @@ hsa_status_t MemoryRegion::Unlock(void* host_ptr) const { return HSA_STATUS_SUCCESS; } - MakeKfdMemoryUnresident(host_ptr); + if (owner()->driver().MakeMemoryUnresident(host_ptr) != HSA_STATUS_SUCCESS) { + assert(false && "Failed to unmap host pointer"); + } if (owner()->driver().DeregisterMemory(host_ptr) != HSA_STATUS_SUCCESS) { assert(false && "Failed to deregister host pointer"); }