rocr/aie: Support VMEM handle creation
Adds support for AllocateMemoryOnly inside XDNA driver.
Move the IsLocalMemory() check inside the KFD driver
since the XDNA driver can, and needs to, create handles
on system memory buffer objects.
Changed handle variable name from thunk_handle to user_mode_driver_handle,
which is more representative if we support non-GPU drivers.
Change-Id: I95db9d575afd1ab0ff2de74cea5175d9a12a721b
[ROCm/ROCR-Runtime commit: 4bf102dc6b]
Esse commit está contido em:
@@ -103,6 +103,11 @@ KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
kmt_alloc_flags.ui32.NonPaged = 1;
|
||||
}
|
||||
|
||||
if (!m_region.IsLocalMemory() &&
|
||||
(alloc_flags & core::MemoryRegion::AllocateMemoryOnly)) {
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Allocating a memory handle for virtual memory
|
||||
kmt_alloc_flags.ui32.NoAddress =
|
||||
!!(alloc_flags & core::MemoryRegion::AllocateMemoryOnly);
|
||||
|
||||
@@ -136,6 +136,7 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
|
||||
amdxdna_drm_get_bo_info get_bo_info_args{0};
|
||||
drm_gem_close close_bo_args{0};
|
||||
void *mapped_mem(nullptr);
|
||||
|
||||
if (!m_region.IsSystem()) {
|
||||
return HSA_STATUS_ERROR_INVALID_REGION;
|
||||
@@ -162,18 +163,29 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
/// TODO: For now we always map the memory and keep a mapping from handles
|
||||
/// to VA memory addresses. Once we can support the separate VMEM call to
|
||||
/// map handles we can fix this.
|
||||
if (m_region.kernarg()) {
|
||||
*mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_,
|
||||
get_bo_info_args.map_offset);
|
||||
if (*mem == MAP_FAILED) {
|
||||
mapped_mem = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_,
|
||||
get_bo_info_args.map_offset);
|
||||
if (mapped_mem == MAP_FAILED) {
|
||||
// Close the BO in the case when a mapping fails and we got a BO handle.
|
||||
ioctl(fd_, DRM_IOCTL_GEM_CLOSE, &close_bo_args);
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
} else {
|
||||
*mem = reinterpret_cast<void *>(get_bo_info_args.vaddr);
|
||||
mapped_mem = reinterpret_cast<void *>(get_bo_info_args.vaddr);
|
||||
}
|
||||
|
||||
if (alloc_flags & core::MemoryRegion::AllocateMemoryOnly) {
|
||||
*mem = reinterpret_cast<void *>(create_bo_args.handle);
|
||||
} else {
|
||||
*mem = mapped_mem;
|
||||
}
|
||||
|
||||
vmem_handle_mappings.emplace(create_bo_args.handle, mapped_mem);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#define HSA_RUNTIME_CORE_INC_AMD_XDNA_DRIVER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "core/inc/driver.h"
|
||||
#include "core/inc/memory_region.h"
|
||||
@@ -89,6 +90,12 @@ private:
|
||||
hsa_status_t InitDeviceHeap();
|
||||
hsa_status_t FreeDeviceHeap();
|
||||
|
||||
/// TODO: Remove this in the future and rely on the core Runtime
|
||||
/// object to track handle allocations. Using the VMEM API for mapping XDNA
|
||||
/// driver handles requires a bit more refactoring. So rely on the XDNA driver
|
||||
/// to manage some of this for now.
|
||||
std::unordered_map<uint32_t, void *> vmem_handle_mappings;
|
||||
|
||||
/// @brief Virtual address range allocated for the device heap.
|
||||
///
|
||||
/// Allocate a large enough space so we can carve out the device heap in
|
||||
|
||||
@@ -3111,20 +3111,22 @@ hsa_status_t Runtime::VMemoryHandleCreate(const MemoryRegion* region, size_t siz
|
||||
uint64_t flags_unused,
|
||||
hsa_amd_vmem_alloc_handle_t* memoryOnlyHandle) {
|
||||
const AMD::MemoryRegion* memRegion = static_cast<const AMD::MemoryRegion*>(region);
|
||||
if (!memRegion->IsLocalMemory()) return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
if (!IsMultipleOf(size, memRegion->GetPageSize()))
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
ScopedAcquire<KernelSharedMutex> lock(&memory_lock_);
|
||||
void* thunk_handle;
|
||||
hsa_status_t status = region->Allocate(size, alloc_flags, &thunk_handle, 0);
|
||||
void *user_mode_driver_handle;
|
||||
hsa_status_t status =
|
||||
region->Allocate(size, alloc_flags, &user_mode_driver_handle, 0);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
memory_handle_map_.emplace(std::piecewise_construct,
|
||||
std::forward_as_tuple(thunk_handle),
|
||||
std::forward_as_tuple(region, size, flags_unused, thunk_handle, alloc_flags));
|
||||
std::forward_as_tuple(user_mode_driver_handle),
|
||||
std::forward_as_tuple(region, size, flags_unused,
|
||||
user_mode_driver_handle,
|
||||
alloc_flags));
|
||||
|
||||
*memoryOnlyHandle = MemoryHandle::Convert(thunk_handle);
|
||||
*memoryOnlyHandle = MemoryHandle::Convert(user_mode_driver_handle);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário