rocr/driver: add memory registration and deregistration into driver

This commit completes the memory register/deregister interface change.

Removed static RegisterMemory and DeregisterMemory from MemoryRegion class

- Added pure virtual methods to base Driver interface in driver class
- Added implementation in KFD driver
- Modified MemoryRegion Lock and Unlock to use driver interface

Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>


[ROCm/ROCR-Runtime commit: ab6bda7e96]
Cette révision appartient à :
Honglei Huang
2025-07-11 13:49:04 +08:00
révisé par Huang, Honglei1
Parent 724c9b9803
révision b61df004ff
7 fichiers modifiés avec 42 ajouts et 17 suppressions
+14
Voir le fichier
@@ -624,6 +624,20 @@ hsa_status_t KfdDriver::AvailableMemory(uint32_t node_id, uint64_t* available_si
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const {
assert(ptr);
assert(size > 0);
if (HSAKMT_CALL(hsaKmtRegisterMemoryWithFlags(ptr, size, mem_flags)) != HSAKMT_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::DeregisterMemory(void* ptr) const {
if (HSAKMT_CALL(hsaKmtDeregisterMemory(ptr)) != HSAKMT_STATUS_SUCCESS) return HSA_STATUS_ERROR;
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const {
// AIE does not support streaming performance monitor.
HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
@@ -902,5 +902,11 @@ hsa_status_t XdnaDriver::AvailableMemory(uint32_t node_id, uint64_t* available_s
return HSA_STATUS_ERROR;
}
hsa_status_t XdnaDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const {
return HSA_STATUS_ERROR;
}
hsa_status_t XdnaDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_ERROR; }
} // namespace AMD
} // namespace rocr
+2
Voir le fichier
@@ -130,6 +130,8 @@ public:
hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const override;
hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const override;
hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
hsa_status_t DeregisterMemory(void* ptr) const override;
hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;
-4
Voir le fichier
@@ -77,10 +77,6 @@ class MemoryRegion : public core::MemoryRegion {
return reinterpret_cast<MemoryRegion*>(region.handle);
}
static bool RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags);
static void DeregisterMemory(void* ptr);
/// @brief Pin memory.
static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr,
size_t size, uint64_t* alternate_va, HsaMemMapFlags map_flag);
+2
Voir le fichier
@@ -244,6 +244,8 @@ public:
hsa_status_t GetWallclockFrequency(uint32_t node_id, uint64_t* frequency) const override;
hsa_status_t AllocateScratchMemory(uint32_t node_id, uint64_t size, void** mem) const override;
hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
hsa_status_t DeregisterMemory(void* ptr) const override;
hsa_status_t IsModelEnabled(bool* enable) const override;
+12
Voir le fichier
@@ -314,6 +314,18 @@ public:
/// @return HSA_STATUS_SUCCESS if the driver successfully returns the available memory size.
virtual hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const = 0;
/// @brief Register memory to GPU
/// @param[in] ptr Address of memory to be registered
/// @param[in] size Size of memory
/// @param[in] mem_flags Flags of memory registering
/// @return HSA_STATUS_SUCCESS if memory registered successfully.
virtual hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const = 0;
/// @brief Unregisters with a memory
/// @param[in] ptr Pointer of memory
/// @return HSA_STATUS_SUCCESS if deregister memory successfully.
virtual hsa_status_t DeregisterMemory(void* ptr) const = 0;
/// Unique identifier for supported kernel-mode drivers.
const DriverType kernel_driver_type_;
+6 -13
Voir le fichier
@@ -58,16 +58,6 @@ namespace AMD {
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
const size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE);
bool MemoryRegion::RegisterMemory(void* ptr, size_t size, const HsaMemFlags& MemFlags) {
assert(ptr != NULL);
assert(size != 0);
const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtRegisterMemoryWithFlags(ptr, size, MemFlags));
return (status == HSAKMT_STATUS_SUCCESS);
}
void MemoryRegion::DeregisterMemory(void* ptr) { HSAKMT_CALL(hsaKmtDeregisterMemory(ptr)); }
bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr,
size_t size, uint64_t* alternate_va,
HsaMemMapFlags map_flag) {
@@ -606,7 +596,8 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
}
// Call kernel driver to register and pin the memory.
if (RegisterMemory(host_ptr, size, mem_flag_)) {
if (owner()->driver().RegisterMemory(host_ptr, size, const_cast<HsaMemFlags&>(mem_flag_)) ==
HSA_STATUS_SUCCESS) {
uint64_t alternate_va = 0;
if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0],
host_ptr, size, &alternate_va, map_flag_)) {
@@ -618,7 +609,7 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
return HSA_STATUS_SUCCESS;
}
AMD::MemoryRegion::DeregisterMemory(host_ptr);
owner()->driver().DeregisterMemory(host_ptr);
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
@@ -635,7 +626,9 @@ hsa_status_t MemoryRegion::Unlock(void* host_ptr) const {
}
MakeKfdMemoryUnresident(host_ptr);
DeregisterMemory(host_ptr);
if (owner()->driver().DeregisterMemory(host_ptr) != HSA_STATUS_SUCCESS) {
assert(false && "Failed to deregister host pointer");
}
return HSA_STATUS_SUCCESS;
}