wsl/hsakmt: add supporting contiguous memory allocation
Reviewed-by: Longlong Yao <Longlong.Yao@amd.com> Signed-off-by: tiancyin <tianci.yin@amd.com>
This commit is contained in:
@@ -57,11 +57,12 @@ class WDDMDevice;
|
||||
|
||||
union GpuMemoryCreateFlags {
|
||||
struct {
|
||||
uint64_t virtual_alloc : 1;
|
||||
uint64_t physical_only : 1;
|
||||
uint64_t interprocess : 1;
|
||||
uint64_t locked : 1;
|
||||
uint64_t unused : 60;
|
||||
uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer
|
||||
uint64_t physical_only : 1; // only allocate physical buffer, without virutal address
|
||||
uint64_t interprocess : 1; // physical buffer need share info between exporter and importer
|
||||
uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area
|
||||
uint64_t physical_contiguous : 1; // contiguous physical pages
|
||||
uint64_t unused : 59;
|
||||
};
|
||||
uint64_t reserved;
|
||||
};
|
||||
@@ -120,8 +121,8 @@ struct GpuMemoryDesc {
|
||||
uint32_t is_physical_only : 1;
|
||||
uint32_t is_locked : 1;
|
||||
uint32_t is_queue_referenced : 1;
|
||||
|
||||
uint32_t unused : 27;
|
||||
uint32_t is_physical_contiguous : 1;
|
||||
uint32_t unused : 25;
|
||||
};
|
||||
|
||||
uint32_t reserved;
|
||||
@@ -160,6 +161,7 @@ public:
|
||||
inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; }
|
||||
inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; }
|
||||
inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; }
|
||||
inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; }
|
||||
inline bool IsVirtual() const { return desc_.flags.is_virtual; }
|
||||
inline bool IsShared() const { return desc_.flags.is_shared; }
|
||||
inline bool IsExternal() const { return desc_.flags.is_external; }
|
||||
|
||||
+3
-3
@@ -99,11 +99,11 @@ HSAKMTAPI int amdgpu_bo_import(amdgpu_device_handle dev,
|
||||
enum amdgpu_bo_handle_type type,
|
||||
uint32_t shared_handle,
|
||||
struct amdgpu_bo_import_result *output) {
|
||||
void *MemoryAddress = nullptr;
|
||||
HSAKMT_STATUS ret = hsaKmtImportDMABufHandle(shared_handle, &MemoryAddress);
|
||||
HsaGraphicsResourceInfo GraphicsResourceInfo;
|
||||
HSAKMT_STATUS ret = hsaKmtImportDMABufHandle(shared_handle, &GraphicsResourceInfo);
|
||||
if (ret == HSAKMT_STATUS_SUCCESS) {
|
||||
//use GpuMemory object's address as drm buf handle
|
||||
output->buf_handle = reinterpret_cast<amdgpu_bo_handle>(MemoryAddress);
|
||||
output->buf_handle = reinterpret_cast<amdgpu_bo_handle>(GraphicsResourceInfo.MemoryAddress);
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
|
||||
+1
-1
@@ -199,6 +199,6 @@ bool queue_release_buffer(void *MemoryAddress);
|
||||
uint32_t get_vgpr_size_per_cu(HSA_ENGINE_ID id);
|
||||
#define SGPR_SIZE_PER_CU 0x4000
|
||||
|
||||
HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd, void **MemoryAddress);
|
||||
HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd, HsaGraphicsResourceInfo *GraphicsResourceInfo);
|
||||
|
||||
#endif
|
||||
|
||||
+14
-9
@@ -203,17 +203,20 @@ HSAKMT_STATUS hsaKmtAllocMemoryAlignInternal(HSAuint32 PreferredNode,
|
||||
|
||||
create_info.flags.physical_only = MemFlags.ui32.NoAddress;
|
||||
create_info.flags.interprocess = MemFlags.ui32.NoAddress;
|
||||
create_info.flags.interprocess |= MemFlags.ui32.Contiguous;
|
||||
create_info.flags.physical_contiguous = MemFlags.ui32.Contiguous;
|
||||
create_info.flags.locked = MemFlags.ui32.NoSubstitute;//AllocatePinned
|
||||
create_info.flags.virtual_alloc = MemFlags.ui32.OnlyAddress;
|
||||
/*when only alloc virtual or only physical, it's vmm allocation, force to local*/
|
||||
if (create_info.flags.virtual_alloc || create_info.flags.physical_only)
|
||||
if (create_info.flags.virtual_alloc || create_info.flags.physical_only
|
||||
|| create_info.flags.physical_contiguous) {
|
||||
create_info.domain = thunk_proxy::AllocDomain::kLocal;
|
||||
SkipSubAlloc = true;
|
||||
}
|
||||
|
||||
/* Only allow using the suballocator for ordinary VRAM.*/
|
||||
bool trim_safe = false;
|
||||
if (!SkipSubAlloc &&
|
||||
create_info.domain == thunk_proxy::AllocDomain::kLocal &&
|
||||
!(create_info.flags.virtual_alloc || create_info.flags.physical_only)) {
|
||||
if (!SkipSubAlloc && create_info.domain == thunk_proxy::AllocDomain::kLocal) {
|
||||
std::lock_guard<std::mutex> gard(*fragment_allocator_lock_);
|
||||
|
||||
/* just quickly skip SA if size is bigger than SA block size.*/
|
||||
@@ -434,7 +437,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 Graphic
|
||||
pr_debug("number of nodes %lu\n", NumberOfNodes);
|
||||
|
||||
GraphicsResourceInfo->NodeId = 1;
|
||||
return hsaKmtImportDMABufHandle(GraphicsResourceHandle, &GraphicsResourceInfo->MemoryAddress);
|
||||
return hsaKmtImportDMABufHandle(GraphicsResourceHandle, GraphicsResourceInfo);
|
||||
}
|
||||
|
||||
|
||||
@@ -459,7 +462,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress,
|
||||
|
||||
|
||||
HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd,
|
||||
void **MemoryAddress) {
|
||||
HsaGraphicsResourceInfo *GraphicsResourceInfo) {
|
||||
|
||||
CHECK_DXG_OPEN();
|
||||
|
||||
@@ -470,17 +473,19 @@ HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd,
|
||||
|
||||
auto code = dev->CreateGpuMemory(create_info, &gpu_mem);
|
||||
if (code == ErrorCode::Success) {
|
||||
*MemoryAddress = reinterpret_cast<void *>(gpu_mem);
|
||||
void *MemoryAddress = reinterpret_cast<void *>(gpu_mem->HandleApeAddress());
|
||||
std::lock_guard<std::mutex> gard(*allocation_map_lock_);
|
||||
/*
|
||||
* the gpu_mem->Flags() need convert back from GpuMemoryCreateFlags to
|
||||
* HsaMemFlags, reference hsaKmtAllocMemoryAlign
|
||||
* */
|
||||
allocation_map_[*MemoryAddress] = Allocation(
|
||||
gpu_mem->GetGpuMemoryHandle(), *MemoryAddress, (uint64_t)*MemoryAddress,
|
||||
allocation_map_[MemoryAddress] = Allocation(
|
||||
gpu_mem->GetGpuMemoryHandle(), MemoryAddress, (uint64_t)MemoryAddress,
|
||||
gpu_mem->Size(), false, nullptr, gpu_mem->ClientSize(),
|
||||
1, gpu_mem->Flags());
|
||||
|
||||
GraphicsResourceInfo->MemoryAddress = MemoryAddress;
|
||||
GraphicsResourceInfo->SizeInBytes = gpu_mem->ClientSize();
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
@@ -59,6 +59,7 @@ ErrorCode GpuMemory::Init(const GpuMemoryCreateInfo &create_info) {
|
||||
desc_.engine_flag = create_info.engine_flag;
|
||||
desc_.flags.is_virtual = create_info.flags.virtual_alloc;
|
||||
desc_.flags.is_physical_only = create_info.flags.physical_only;
|
||||
desc_.flags.is_physical_contiguous = create_info.flags.physical_contiguous;
|
||||
desc_.flags.is_shared = create_info.flags.interprocess;
|
||||
desc_.flags.is_locked = create_info.flags.locked;
|
||||
|
||||
@@ -303,6 +304,10 @@ ErrorCode GpuMemory::CreatePhysicalMemory() {
|
||||
args.NumAllocations = num_allocations;
|
||||
args.pAllocationInfo2 = alloc_info;
|
||||
|
||||
/* The PhysicallyContiguous flag causes allocation failure
|
||||
* args.Flags.PhysicallyContiguous = IsPhysicalContiguous();
|
||||
*/
|
||||
|
||||
SharedHandleInfo shared_info;
|
||||
if (IsShared()) {
|
||||
shared_info.size = desc_.size;
|
||||
|
||||
Viittaa uudesa ongelmassa
Block a user