diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index baf30aaac9..4d1bfc6602 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -624,6 +624,20 @@ hsa_status_t KfdDriver::AvailableMemory(uint32_t node_id, uint64_t* available_si return HSA_STATUS_SUCCESS; } +hsa_status_t KfdDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const { + assert(ptr); + assert(size > 0); + + if (HSAKMT_CALL(hsaKmtRegisterMemoryWithFlags(ptr, size, mem_flags)) != HSAKMT_STATUS_SUCCESS) + return HSA_STATUS_ERROR; + return HSA_STATUS_SUCCESS; +} + +hsa_status_t KfdDriver::DeregisterMemory(void* ptr) const { + if (HSAKMT_CALL(hsaKmtDeregisterMemory(ptr)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR; + return HSA_STATUS_SUCCESS; +} + hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const { // AIE does not support streaming performance monitor. HSAKMT_STATUS status = HSAKMT_STATUS_ERROR; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp index f4555e4005..9ae1043498 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp @@ -902,5 +902,11 @@ hsa_status_t XdnaDriver::AvailableMemory(uint32_t node_id, uint64_t* available_s return HSA_STATUS_ERROR; } +hsa_status_t XdnaDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const { + return HSA_STATUS_ERROR; +} + +hsa_status_t XdnaDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_ERROR; } + } // namespace AMD } // namespace rocr diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h index e9feb3a39c..87d9a77092 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_kfd_driver.h @@ -130,6 +130,8 @@ public: hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const override; hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const override; hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override; + hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override; + hsa_status_t DeregisterMemory(void* ptr) const override; hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h index 68a161641d..b6ebaceb4e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_memory_region.h @@ -77,10 +77,6 @@ class MemoryRegion : public core::MemoryRegion { return reinterpret_cast(region.handle); } - static bool RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags); - - static void DeregisterMemory(void* ptr); - /// @brief Pin memory. static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr, size_t size, uint64_t* alternate_va, HsaMemMapFlags map_flag); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h index 7ff5cf8e6a..b23f1b2a2e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_xdna_driver.h @@ -244,6 +244,8 @@ public: hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const override; hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const override; hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override; + hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override; + hsa_status_t DeregisterMemory(void* ptr) const override; hsa_status_t IsModelEnabled(bool* enable) const override; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h index f03ec94a0d..766b77db55 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/driver.h @@ -314,6 +314,18 @@ public: /// @return HSA_STATUS_SUCCESS if the driver successfully returns the available memory size. virtual hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const = 0; + /// @brief Register memory to GPU + /// @param[in] ptr Address of memory to be registered + /// @param[in] size Size of memory + /// @param[in] mem_flags Flags of memory registering + /// @return HSA_STATUS_SUCCESS if memory registered successfully. + virtual hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const = 0; + + /// @brief Unregisters with a memory + /// @param[in] ptr Pointer of memory + /// @return HSA_STATUS_SUCCESS if deregister memory successfully. + virtual hsa_status_t DeregisterMemory(void* ptr) const = 0; + /// Unique identifier for supported kernel-mode drivers. const DriverType kernel_driver_type_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp index 93055b0c49..269004e64e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_memory_region.cpp @@ -58,16 +58,6 @@ namespace AMD { size_t MemoryRegion::max_sysmem_alloc_size_ = 0; const size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE); -bool MemoryRegion::RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags) { - assert(ptr != NULL); - assert(size != 0); - - const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtRegisterMemoryWithFlags(ptr, size, MemFlags)); - return (status == HSAKMT_STATUS_SUCCESS); -} - -void MemoryRegion::DeregisterMemory(void* ptr) { HSAKMT_CALL(hsaKmtDeregisterMemory(ptr)); } - bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr, size_t size, uint64_t* alternate_va, HsaMemMapFlags map_flag) { @@ -606,7 +596,8 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents, } // Call kernel driver to register and pin the memory. - if (RegisterMemory(host_ptr, size, mem_flag_)) { + if (owner()->driver().RegisterMemory(host_ptr, size, const_cast(mem_flag_)) == + HSA_STATUS_SUCCESS) { uint64_t alternate_va = 0; if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0], host_ptr, size, &alternate_va, map_flag_)) { @@ -618,7 +609,7 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents, return HSA_STATUS_SUCCESS; } - AMD::MemoryRegion::DeregisterMemory(host_ptr); + owner()->driver().DeregisterMemory(host_ptr); return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } @@ -635,7 +626,9 @@ hsa_status_t MemoryRegion::Unlock(void* host_ptr) const { } MakeKfdMemoryUnresident(host_ptr); - DeregisterMemory(host_ptr); + if (owner()->driver().DeregisterMemory(host_ptr) != HSA_STATUS_SUCCESS) { + assert(false && "Failed to deregister host pointer"); + } return HSA_STATUS_SUCCESS; }