rocr/driver: add memory residency management interface in driver
This commit introduces MakeMemoryResident and MakeMemoryUnresident functions to KfdDriver and XdnaDriver classes. - Added implementations in amd_kfd_driver.cpp - Added stubs in amd_xdna_driver.cpp returning HSA_STATUS_ERROR - Updated header files amd_kfd_driver.h and amd_xdna_driver.h - Removed MakeKfdMemoryResident/Unresident from amd_memory_region.cpp Signed-off-by: Honglei Huang <Honglei1.Huang@amd.com>
Этот коммит содержится в:
коммит произвёл
Huang, Honglei1
родитель
ab6bda7e96
Коммит
6c87f5b5ce
@@ -638,6 +638,31 @@ hsa_status_t KfdDriver::DeregisterMemory(void* ptr) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
|
||||
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
|
||||
const uint32_t* nodes) const {
|
||||
if (mem_flags == nullptr && nodes == nullptr) {
|
||||
if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(const_cast<void*>(mem), size, alternate_va)) !=
|
||||
HSAKMT_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
} else if (mem_flags != nullptr && nodes != nullptr) {
|
||||
if (!MakeKfdMemoryResident(num_nodes, nodes, mem, size, alternate_va, *mem_flags)) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
} else {
|
||||
debug_print("Invalid memory flags ptr:%p nodes ptr:%p\n", mem_flags, nodes);
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::MakeMemoryUnresident(const void* mem) const {
|
||||
HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast<void*>(mem)));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t KfdDriver::IsModelEnabled(bool* enable) const {
|
||||
// AIE does not support streaming performance monitor.
|
||||
HSAKMT_STATUS status = HSAKMT_STATUS_ERROR;
|
||||
|
||||
@@ -908,5 +908,13 @@ hsa_status_t XdnaDriver::RegisterMemory(void* ptr, uint64_t size, HsaMemFlags me
|
||||
|
||||
hsa_status_t XdnaDriver::DeregisterMemory(void* ptr) const { return HSA_STATUS_ERROR; }
|
||||
|
||||
hsa_status_t XdnaDriver::MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
|
||||
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
|
||||
const uint32_t* nodes) const {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
hsa_status_t XdnaDriver::MakeMemoryUnresident(const void* mem) const { return HSA_STATUS_ERROR; }
|
||||
|
||||
} // namespace AMD
|
||||
} // namespace rocr
|
||||
|
||||
@@ -132,6 +132,10 @@ public:
|
||||
hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
|
||||
hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
|
||||
hsa_status_t DeregisterMemory(void* ptr) const override;
|
||||
hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
|
||||
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
|
||||
const uint32_t* nodes) const override;
|
||||
hsa_status_t MakeMemoryUnresident(const void* mem) const override;
|
||||
|
||||
hsa_status_t OpenSMI(uint32_t node_id, int* fd) const override;
|
||||
|
||||
|
||||
@@ -77,13 +77,6 @@ class MemoryRegion : public core::MemoryRegion {
|
||||
return reinterpret_cast<MemoryRegion*>(region.handle);
|
||||
}
|
||||
|
||||
/// @brief Pin memory.
|
||||
static bool MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr,
|
||||
size_t size, uint64_t* alternate_va, HsaMemMapFlags map_flag);
|
||||
|
||||
/// @brief Unpin memory.
|
||||
static bool MakeKfdMemoryUnresident(const void* ptr);
|
||||
|
||||
MemoryRegion(bool fine_grain, bool kernarg, bool full_profile, bool extended_scope_fine_grain,
|
||||
bool user_visible, core::Agent* owner, const HsaMemoryProperties& mem_props);
|
||||
|
||||
|
||||
@@ -246,6 +246,10 @@ public:
|
||||
hsa_status_t AvailableMemory(uint32_t node_id, uint64_t* available_size) const override;
|
||||
hsa_status_t RegisterMemory(void* ptr, uint64_t size, HsaMemFlags mem_flags) const override;
|
||||
hsa_status_t DeregisterMemory(void* ptr) const override;
|
||||
hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
|
||||
const HsaMemMapFlags* mem_flags, uint32_t num_nodes,
|
||||
const uint32_t* nodes) const override;
|
||||
hsa_status_t MakeMemoryUnresident(const void* mem) const override;
|
||||
|
||||
hsa_status_t IsModelEnabled(bool* enable) const override;
|
||||
|
||||
|
||||
@@ -326,6 +326,24 @@ public:
|
||||
/// @return HSA_STATUS_SUCCESS if deregister memory successfully.
|
||||
virtual hsa_status_t DeregisterMemory(void* ptr) const = 0;
|
||||
|
||||
/// @brief Make the memory is resident and can be accessed by GPU
|
||||
/// @param[in] mem address of memory to be made resident
|
||||
/// @param[in] size size of memory
|
||||
/// @param[out] alternate_va alternate virtual address
|
||||
/// @param[in] mem_flags memory flags can be null
|
||||
/// @param[in] num_nodes number of nodes to be used can be 0 if not used
|
||||
/// @param[in] nodes nodes to be used can be null
|
||||
/// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory
|
||||
virtual hsa_status_t MakeMemoryResident(const void* mem, size_t size, uint64_t* alternate_va,
|
||||
const HsaMemMapFlags* mem_flags = nullptr,
|
||||
uint32_t num_nodes = 0,
|
||||
const uint32_t* nodes = nullptr) const = 0;
|
||||
|
||||
/// @brief Releases the residency of the memory
|
||||
/// @param[in] mem address of memory to be made unresident
|
||||
/// @return HSA_STATUS_SUCCESS if the driver successfully makes the memory
|
||||
virtual hsa_status_t MakeMemoryUnresident(const void* mem) const = 0;
|
||||
|
||||
/// Unique identifier for supported kernel-mode drivers.
|
||||
const DriverType kernel_driver_type_;
|
||||
|
||||
|
||||
@@ -556,7 +556,7 @@ void GpuAgent::ReserveScratch()
|
||||
void* reserved_base = scratch_pool_.alloc(reserved_sz);
|
||||
assert(reserved_base && "Could not allocate reserved memory");
|
||||
|
||||
if (HSAKMT_CALL(hsaKmtMapMemoryToGPU(reserved_base, reserved_sz, &alt_va)) == HSAKMT_STATUS_SUCCESS)
|
||||
if (driver().MakeMemoryResident(reserved_base, reserved_sz, &alt_va) == HSA_STATUS_SUCCESS)
|
||||
scratch_cache_.reserve(reserved_sz, reserved_base);
|
||||
else
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Reserve scratch memory failed.");
|
||||
@@ -1887,8 +1887,8 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) {
|
||||
if (scratch.main_queue_base != nullptr) {
|
||||
HSAuint64 alternate_va;
|
||||
if ((profile_ == HSA_PROFILE_FULL) ||
|
||||
(HSAKMT_CALL(hsaKmtMapMemoryToGPU(scratch.main_queue_base, scratch.main_size, &alternate_va)) ==
|
||||
HSAKMT_STATUS_SUCCESS)) {
|
||||
(driver().MakeMemoryResident(scratch.main_queue_base, scratch.main_size,
|
||||
&alternate_va) == HSA_STATUS_SUCCESS)) {
|
||||
if (scratch.large) scratch_used_large_ += scratch.main_size;
|
||||
scratch_cache_.insertMain(scratch);
|
||||
return;
|
||||
@@ -1940,7 +1940,7 @@ void GpuAgent::AcquireQueueMainScratch(ScratchInfo& scratch) {
|
||||
HSAuint64 alternate_va;
|
||||
if ((base != nullptr) &&
|
||||
((profile_ == HSA_PROFILE_FULL) ||
|
||||
(HSAKMT_CALL(hsaKmtMapMemoryToGPU(base, size, &alternate_va)) == HSAKMT_STATUS_SUCCESS))) {
|
||||
(driver().MakeMemoryResident(base, size, &alternate_va) == HSA_STATUS_SUCCESS))) {
|
||||
// Scratch allocated and either full profile or map succeeded.
|
||||
scratch.main_queue_base = base;
|
||||
scratch.main_size = size;
|
||||
@@ -2020,8 +2020,8 @@ void GpuAgent::AcquireQueueAltScratch(ScratchInfo& scratch) {
|
||||
if (scratch.alt_queue_base != nullptr) {
|
||||
HSAuint64 alternate_va;
|
||||
if ((profile_ == HSA_PROFILE_FULL) ||
|
||||
(HSAKMT_CALL(hsaKmtMapMemoryToGPU(scratch.alt_queue_base, scratch.alt_size, &alternate_va)) ==
|
||||
HSAKMT_STATUS_SUCCESS)) {
|
||||
(driver().MakeMemoryResident(scratch.alt_queue_base, scratch.alt_size, &alternate_va) ==
|
||||
HSA_STATUS_SUCCESS)) {
|
||||
scratch_cache_.insertAlt(scratch);
|
||||
return;
|
||||
}
|
||||
@@ -2061,7 +2061,7 @@ void GpuAgent::ReleaseQueueAltScratch(ScratchInfo& scratch) {
|
||||
|
||||
void GpuAgent::ReleaseScratch(void* base, size_t size, bool large) {
|
||||
if (profile_ == HSA_PROFILE_BASE) {
|
||||
if (HSAKMT_STATUS_SUCCESS != HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(base))) {
|
||||
if (HSA_STATUS_SUCCESS != driver().MakeMemoryUnresident(base)) {
|
||||
assert(false && "Unmap scratch subrange failed!");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,24 +58,6 @@ namespace AMD {
|
||||
size_t MemoryRegion::max_sysmem_alloc_size_ = 0;
|
||||
const size_t MemoryRegion::kPageSize_ = sysconf(_SC_PAGESIZE);
|
||||
|
||||
bool MemoryRegion::MakeKfdMemoryResident(size_t num_node, const uint32_t* nodes, const void* ptr,
|
||||
size_t size, uint64_t* alternate_va,
|
||||
HsaMemMapFlags map_flag) {
|
||||
assert(num_node > 0);
|
||||
assert(nodes != NULL);
|
||||
|
||||
*alternate_va = 0;
|
||||
const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtMapMemoryToGPUNodes(
|
||||
const_cast<void*>(ptr), size, alternate_va, map_flag, num_node, const_cast<uint32_t*>(nodes)));
|
||||
|
||||
return (status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
bool MemoryRegion::MakeKfdMemoryUnresident(const void* ptr) {
|
||||
const HSAKMT_STATUS status = HSAKMT_CALL(hsaKmtUnmapMemoryToGPU(const_cast<void*>(ptr)));
|
||||
return (status == HSAKMT_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
MemoryRegion::MemoryRegion(bool fine_grain, bool kernarg, bool full_profile,
|
||||
bool extended_scope_fine_grain, bool user_visible, core::Agent* owner,
|
||||
const HsaMemoryProperties& mem_props)
|
||||
@@ -508,7 +490,7 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
|
||||
assert(cpu_in_list);
|
||||
// This is a system region and only CPU agents in the whitelist.
|
||||
// Remove old mappings.
|
||||
AMD::MemoryRegion::MakeKfdMemoryUnresident(ptr);
|
||||
owner()->driver().MakeMemoryUnresident(ptr);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -528,10 +510,10 @@ hsa_status_t MemoryRegion::AllowAccess(uint32_t num_agents,
|
||||
ScopedAcquire<KernelSharedMutex::Shared> lock(
|
||||
core::Runtime::runtime_singleton_->memory_lock_.shared());
|
||||
uint64_t alternate_va = 0;
|
||||
if (!AMD::MemoryRegion::MakeKfdMemoryResident(
|
||||
whitelist_nodes.size(), &whitelist_nodes[0], ptr,
|
||||
size, &alternate_va, map_flag)) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
if (owner()->driver().MakeMemoryResident(ptr, size, &alternate_va, &map_flag,
|
||||
whitelist_nodes.size(),
|
||||
whitelist_nodes.data()) != HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -599,8 +581,9 @@ hsa_status_t MemoryRegion::Lock(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
if (owner()->driver().RegisterMemory(host_ptr, size, const_cast<HsaMemFlags&>(mem_flag_)) ==
|
||||
HSA_STATUS_SUCCESS) {
|
||||
uint64_t alternate_va = 0;
|
||||
if (MakeKfdMemoryResident(whitelist_nodes.size(), &whitelist_nodes[0],
|
||||
host_ptr, size, &alternate_va, map_flag_)) {
|
||||
if (owner()->driver().MakeMemoryResident(host_ptr, size, &alternate_va, &map_flag_,
|
||||
whitelist_nodes.size(),
|
||||
whitelist_nodes.data()) == HSA_STATUS_SUCCESS) {
|
||||
if (alternate_va != 0) {
|
||||
*agent_ptr = reinterpret_cast<void*>(alternate_va);
|
||||
} else {
|
||||
@@ -625,7 +608,9 @@ hsa_status_t MemoryRegion::Unlock(void* host_ptr) const {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
MakeKfdMemoryUnresident(host_ptr);
|
||||
if (owner()->driver().MakeMemoryUnresident(host_ptr) != HSA_STATUS_SUCCESS) {
|
||||
assert(false && "Failed to unmap host pointer");
|
||||
}
|
||||
if (owner()->driver().DeregisterMemory(host_ptr) != HSA_STATUS_SUCCESS) {
|
||||
assert(false && "Failed to deregister host pointer");
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user