rocr/driver: add memory residency management interface in driver

This commit introduces MakeMemoryResident and MakeMemoryUnresident
functions to KfdDriver and XdnaDriver classes.

- Added implementations in amd_kfd_driver.cpp
- Added stubs in amd_xdna_driver.cpp returning HSA_STATUS_ERROR
- Updated header files amd_kfd_driver.h and amd_xdna_driver.h
- Removed MakeKfdMemoryResident/Unresident from amd_memory_region.cpp

Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>
Этот коммит содержится в:
Honglei Huang
2025-07-11 13:54:39 +08:00
коммит произвёл Huang, Honglei1
родитель ab6bda7e96
Коммит 6c87f5b5ce
8 изменённых файлов: 77 добавлений и 40 удалений
+25
Просмотреть файл
@@ -638,6 +638,31 @@ hsa_status_t KfdDriver::DeregisterMemory(void* ptr) const {
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
const uint32_t* nodes) const {
if (mem_flags == nullptr && nodes == nullptr) {
if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(const_cast<void*>(mem), size, alternate_va)) !=
HSAKMT_STATUS_SUCCESS) {
return HSA_STATUS_ERROR;
}
} else if (mem_flags != nullptr && nodes != nullptr) {
if (!MakeKfdMemoryResident(num_nodes, nodes, mem, size, alternate_va, *mem_flags)) {
return HSA_STATUS_ERROR;
}
} else {
debug_print("Invalid memory flags ptr:%p nodes ptr:%p\n", mem_flags, nodes);
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::MakeMemoryUnresident(const void* mem) const {
HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast<void*>(mem)));
return HSA_STATUS_SUCCESS;
}
hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const {
// AIE does not support streaming performance monitor.
HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
+8
Просмотреть файл
@@ -908,5 +908,13 @@ hsa_status_t XdnaDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags me
hsa_status_t XdnaDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_ERROR; }
hsa_status_t XdnaDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
const uint32_t* nodes) const {
return HSA_STATUS_ERROR;
}
hsa_status_t XdnaDriver::MakeMemoryUnresident(const void* mem) const { return HSA_STATUS_ERROR; }
} // namespace AMD
} // namespace rocr
+4
Просмотреть файл
@@ -132,6 +132,10 @@ public:
hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
hsa_status_t DeregisterMemory(void* ptr) const override;
hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
const uint32_t* nodes) const override;
hsa_status_t MakeMemoryUnresident(const void* mem) const override;
hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;
-7
Просмотреть файл
@@ -77,13 +77,6 @@ class MemoryRegion : public core::MemoryRegion {
return reinterpret_cast<MemoryRegion*>(region.handle);
}
/// @brief Pin memory.
static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr,
size_t size, uint64_t* alternate_va, HsaMemMapFlags map_flag);
/// @brief Unpin memory.
static bool MakeKfdMemoryUnresident(const void* ptr);
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain,
bool user_visible, core::Agent* owner, const HsaMemoryProperties& mem_props);
+4
Просмотреть файл
@@ -246,6 +246,10 @@ public:
hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
hsa_status_t DeregisterMemory(void* ptr) const override;
hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
const uint32_t* nodes) const override;
hsa_status_t MakeMemoryUnresident(const void* mem) const override;
hsa_status_t IsModelEnabled(bool* enable) const override;
+18
Просмотреть файл
@@ -326,6 +326,24 @@ public:
/// @return HSA_STATUS_SUCCESS if deregister memory successfully.
virtual hsa_status_t DeregisterMemory(void* ptr) const = 0;
/// @brief Make the memory is resident and can be accessed by GPU
/// @param[in] mem address of memory to be made resident
/// @param[in] size size of memory
/// @param[out] alternate_va alternate virtual address
/// @param[in] mem_flags memory flags can be null
/// @param[in] num_nodes number of nodes to be used can be 0 if not used
/// @param[in] nodes nodes to be used can be null
/// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory
virtual hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
const HsaMemMapFlags* mem_flags = nullptr,
uint32_t num_nodes = 0,
const uint32_t* nodes = nullptr) const = 0;
/// @brief Releases the residency of the memory
/// @param[in] mem address of memory to be made unresident
/// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory
virtual hsa_status_t MakeMemoryUnresident(const void* mem) const = 0;
/// Unique identifier for supported kernel-mode drivers.
const DriverType kernel_driver_type_;
+7 -7
Просмотреть файл
@@ -556,7 +556,7 @@ void GpuAgent::ReserveScratch()
void* reserved_base = scratch_pool_.alloc(reserved_sz);
assert(reserved_base && "Could not allocate reserved memory");
if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(reserved_base, reserved_sz, &alt_va)) == HSAKMT_STATUS_SUCCESS)
if (driver().MakeMemoryResident(reserved_base, reserved_sz, &alt_va) == HSA_STATUS_SUCCESS)
scratch_cache_.reserve(reserved_sz, reserved_base);
else
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Reserve scratch memory failed.");
@@ -1887,8 +1887,8 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) {
if (scratch.main_queue_base != nullptr) {
HSAuint64 alternate_va;
if ((profile_ == HSA_PROFILE_FULL) ||
(HSAKMT_CALL(hsaKmtMapMemoryToGPU(scratch.main_queue_base, scratch.main_size, &alternate_va)) ==
HSAKMT_STATUS_SUCCESS)) {
(driver().MakeMemoryResident(scratch.main_queue_base, scratch.main_size,
&alternate_va) == HSA_STATUS_SUCCESS)) {
if (scratch.large) scratch_used_large_ += scratch.main_size;
scratch_cache_.insertMain(scratch);
return;
@@ -1940,7 +1940,7 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) {
HSAuint64 alternate_va;
if ((base != nullptr) &&
((profile_ == HSA_PROFILE_FULL) ||
(HSAKMT_CALL(hsaKmtMapMemoryToGPU(base, size, &alternate_va)) == HSAKMT_STATUS_SUCCESS))) {
(driver().MakeMemoryResident(base, size, &alternate_va) == HSA_STATUS_SUCCESS))) {
// Scratch allocated and either full profile or map succeeded.
scratch.main_queue_base = base;
scratch.main_size = size;
@@ -2020,8 +2020,8 @@ void GpuAgent::AcquireQueueAltScratch(ScratchInfo& scratch) {
if (scratch.alt_queue_base != nullptr) {
HSAuint64 alternate_va;
if ((profile_ == HSA_PROFILE_FULL) ||
(HSAKMT_CALL(hsaKmtMapMemoryToGPU(scratch.alt_queue_base, scratch.alt_size, &alternate_va)) ==
HSAKMT_STATUS_SUCCESS)) {
(driver().MakeMemoryResident(scratch.alt_queue_base, scratch.alt_size, &alternate_va) ==
HSA_STATUS_SUCCESS)) {
scratch_cache_.insertAlt(scratch);
return;
}
@@ -2061,7 +2061,7 @@ void GpuAgent::ReleaseQueueAltScratch(ScratchInfo& scratch) {
void GpuAgent::ReleaseScratch(void* base, size_t size, bool large) {
if (profile_ == HSA_PROFILE_BASE) {
if (HSAKMT_STATUS_SUCCESS != HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(base))) {
if (HSA_STATUS_SUCCESS != driver().MakeMemoryUnresident(base)) {
assert(false && "Unmap scratch subrange failed!");
}
}
+11 -26
Просмотреть файл
@@ -58,24 +58,6 @@ namespace AMD {
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
const size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE);
bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr,
size_t size, uint64_t* alternate_va,
HsaMemMapFlags map_flag) {
assert(num_node > 0);
assert(nodes != NULL);
*alternate_va = 0;
const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(
const_cast<void*>(ptr), size, alternate_va, map_flag, num_node, const_cast<uint32_t*>(nodes)));
return (status == HSAKMT_STATUS_SUCCESS);
}
bool MemoryRegion::MakeKfdMemoryUnresident(const void* ptr) {
const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast<void*>(ptr)));
return (status == HSAKMT_STATUS_SUCCESS);
}
MemoryRegion::MemoryRegion(bool fine_grain, bool kernarg, bool full_profile,
bool extended_scope_fine_grain, bool user_visible, core::Agent* owner,
const HsaMemoryProperties& mem_props)
@@ -508,7 +490,7 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
assert(cpu_in_list);
// This is a system region and only CPU agents in the whitelist.
// Remove old mappings.
AMD::MemoryRegion::MakeKfdMemoryUnresident(ptr);
owner()->driver().MakeMemoryUnresident(ptr);
return HSA_STATUS_SUCCESS;
}
@@ -528,10 +510,10 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
ScopedAcquire<KernelSharedMutex::Shared> lock(
core::Runtime::runtime_singleton_->memory_lock_.shared());
uint64_t alternate_va = 0;
if (!AMD::MemoryRegion::MakeKfdMemoryResident(
whitelist_nodes.size(), &whitelist_nodes[0], ptr,
size, &alternate_va, map_flag)) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
if (owner()->driver().MakeMemoryResident(ptr, size, &alternate_va, &map_flag,
whitelist_nodes.size(),
whitelist_nodes.data()) != HSA_STATUS_SUCCESS) {
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
}
@@ -599,8 +581,9 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
if (owner()->driver().RegisterMemory(host_ptr, size, const_cast<HsaMemFlags&>(mem_flag_)) ==
HSA_STATUS_SUCCESS) {
uint64_t alternate_va = 0;
if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0],
host_ptr, size, &alternate_va, map_flag_)) {
if (owner()->driver().MakeMemoryResident(host_ptr, size, &alternate_va, &map_flag_,
whitelist_nodes.size(),
whitelist_nodes.data()) == HSA_STATUS_SUCCESS) {
if (alternate_va != 0) {
*agent_ptr = reinterpret_cast<void*>(alternate_va);
} else {
@@ -625,7 +608,9 @@ hsa_status_t MemoryRegion::Unlock(void* host_ptr) const {
return HSA_STATUS_SUCCESS;
}
MakeKfdMemoryUnresident(host_ptr);
if (owner()->driver().MakeMemoryUnresident(host_ptr) != HSA_STATUS_SUCCESS) {
assert(false && "Failed to unmap host pointer");
}
if (owner()->driver().DeregisterMemory(host_ptr) != HSA_STATUS_SUCCESS) {
assert(false && "Failed to deregister host pointer");
}