From 0fae2bb2623af13d1f826879d01d12e32e06fb27 Mon Sep 17 00:00:00 2001 From: tiancyin Date: Wed, 13 Nov 2024 07:45:39 +0800 Subject: [PATCH] wsl/hsakmt: add supporting contiguous memory allocation Reviewed-by: Longlong Yao Signed-off-by: tiancyin --- inc/wddm/gpu_memory.h | 16 +++++++++------- libdrm.cpp | 6 +++--- libhsakmt.h | 2 +- memory.cpp | 23 ++++++++++++++--------- wddm/gpu_memory.cpp | 5 +++++ 5 files changed, 32 insertions(+), 20 deletions(-) diff --git a/inc/wddm/gpu_memory.h b/inc/wddm/gpu_memory.h index 073bea0d77..da0433c7bc 100644 --- a/inc/wddm/gpu_memory.h +++ b/inc/wddm/gpu_memory.h @@ -57,11 +57,12 @@ class WDDMDevice; union GpuMemoryCreateFlags { struct { - uint64_t virtual_alloc : 1; - uint64_t physical_only : 1; - uint64_t interprocess : 1; - uint64_t locked : 1; - uint64_t unused : 60; + uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer + uint64_t physical_only : 1; // only allocate physical buffer, without virutal address + uint64_t interprocess : 1; // physical buffer need share info between exporter and importer + uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area + uint64_t physical_contiguous : 1; // contiguous physical pages + uint64_t unused : 59; }; uint64_t reserved; }; @@ -120,8 +121,8 @@ struct GpuMemoryDesc { uint32_t is_physical_only : 1; uint32_t is_locked : 1; uint32_t is_queue_referenced : 1; - - uint32_t unused : 27; + uint32_t is_physical_contiguous : 1; + uint32_t unused : 25; }; uint32_t reserved; @@ -160,6 +161,7 @@ public: inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; } inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; } inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; } + inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; } inline bool IsVirtual() const { return desc_.flags.is_virtual; } inline bool IsShared() const { return desc_.flags.is_shared; } inline bool IsExternal() const { return desc_.flags.is_external; } diff --git a/libdrm.cpp b/libdrm.cpp index 878318ae80..c9ed48d60e 100644 --- a/libdrm.cpp +++ b/libdrm.cpp @@ -99,11 +99,11 @@ HSAKMTAPI int amdgpu_bo_import(amdgpu_device_handle dev, enum amdgpu_bo_handle_type type, uint32_t shared_handle, struct amdgpu_bo_import_result *output) { - void *MemoryAddress = nullptr; - HSAKMT_STATUS ret = hsaKmtImportDMABufHandle(shared_handle, &MemoryAddress); + HsaGraphicsResourceInfo GraphicsResourceInfo; + HSAKMT_STATUS ret = hsaKmtImportDMABufHandle(shared_handle, &GraphicsResourceInfo); if (ret == HSAKMT_STATUS_SUCCESS) { //use GpuMemory object's address as drm buf handle - output->buf_handle = reinterpret_cast(MemoryAddress); + output->buf_handle = reinterpret_cast(GraphicsResourceInfo.MemoryAddress); return 0; } else { return -1; diff --git a/libhsakmt.h b/libhsakmt.h index 7f14a1ca5d..10a7bc7139 100644 --- a/libhsakmt.h +++ b/libhsakmt.h @@ -199,6 +199,6 @@ bool queue_release_buffer(void *MemoryAddress); uint32_t get_vgpr_size_per_cu(HSA_ENGINE_ID id); #define SGPR_SIZE_PER_CU 0x4000 -HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd, void **MemoryAddress); +HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd, HsaGraphicsResourceInfo *GraphicsResourceInfo); #endif diff --git a/memory.cpp b/memory.cpp index e0a8f7548c..b080f72082 100644 --- a/memory.cpp +++ b/memory.cpp @@ -203,17 +203,20 @@ HSAKMT_STATUS hsaKmtAllocMemoryAlignInternal(HSAuint32 PreferredNode, create_info.flags.physical_only = MemFlags.ui32.NoAddress; create_info.flags.interprocess = MemFlags.ui32.NoAddress; + create_info.flags.interprocess |= MemFlags.ui32.Contiguous; + create_info.flags.physical_contiguous = MemFlags.ui32.Contiguous; create_info.flags.locked = MemFlags.ui32.NoSubstitute;//AllocatePinned create_info.flags.virtual_alloc = MemFlags.ui32.OnlyAddress; /*when only alloc virtual or only physical, it's vmm allocation, force to local*/ - if (create_info.flags.virtual_alloc || create_info.flags.physical_only) + if (create_info.flags.virtual_alloc || create_info.flags.physical_only + || create_info.flags.physical_contiguous) { create_info.domain = thunk_proxy::AllocDomain::kLocal; + SkipSubAlloc = true; + } /* Only allow using the suballocator for ordinary VRAM.*/ bool trim_safe = false; - if (!SkipSubAlloc && - create_info.domain == thunk_proxy::AllocDomain::kLocal && - !(create_info.flags.virtual_alloc || create_info.flags.physical_only)) { + if (!SkipSubAlloc && create_info.domain == thunk_proxy::AllocDomain::kLocal) { std::lock_guard gard(*fragment_allocator_lock_); /* just quickly skip SA if size is bigger than SA block size.*/ @@ -434,7 +437,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 Graphic pr_debug("number of nodes %lu\n", NumberOfNodes); GraphicsResourceInfo->NodeId = 1; - return hsaKmtImportDMABufHandle(GraphicsResourceHandle, &GraphicsResourceInfo->MemoryAddress); + return hsaKmtImportDMABufHandle(GraphicsResourceHandle, GraphicsResourceInfo); } @@ -459,7 +462,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress, HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd, - void **MemoryAddress) { + HsaGraphicsResourceInfo *GraphicsResourceInfo) { CHECK_DXG_OPEN(); @@ -470,17 +473,19 @@ HSAKMT_STATUS hsaKmtImportDMABufHandle(int DMABufFd, auto code = dev->CreateGpuMemory(create_info, &gpu_mem); if (code == ErrorCode::Success) { - *MemoryAddress = reinterpret_cast(gpu_mem); + void *MemoryAddress = reinterpret_cast(gpu_mem->HandleApeAddress()); std::lock_guard gard(*allocation_map_lock_); /* * the gpu_mem->Flags() need convert back from GpuMemoryCreateFlags to * HsaMemFlags, reference hsaKmtAllocMemoryAlign * */ - allocation_map_[*MemoryAddress] = Allocation( - gpu_mem->GetGpuMemoryHandle(), *MemoryAddress, (uint64_t)*MemoryAddress, + allocation_map_[MemoryAddress] = Allocation( + gpu_mem->GetGpuMemoryHandle(), MemoryAddress, (uint64_t)MemoryAddress, gpu_mem->Size(), false, nullptr, gpu_mem->ClientSize(), 1, gpu_mem->Flags()); + GraphicsResourceInfo->MemoryAddress = MemoryAddress; + GraphicsResourceInfo->SizeInBytes = gpu_mem->ClientSize(); return HSAKMT_STATUS_SUCCESS; } diff --git a/wddm/gpu_memory.cpp b/wddm/gpu_memory.cpp index 24e04482a8..3a1a080e78 100644 --- a/wddm/gpu_memory.cpp +++ b/wddm/gpu_memory.cpp @@ -59,6 +59,7 @@ ErrorCode GpuMemory::Init(const GpuMemoryCreateInfo &create_info) { desc_.engine_flag = create_info.engine_flag; desc_.flags.is_virtual = create_info.flags.virtual_alloc; desc_.flags.is_physical_only = create_info.flags.physical_only; + desc_.flags.is_physical_contiguous = create_info.flags.physical_contiguous; desc_.flags.is_shared = create_info.flags.interprocess; desc_.flags.is_locked = create_info.flags.locked; @@ -303,6 +304,10 @@ ErrorCode GpuMemory::CreatePhysicalMemory() { args.NumAllocations = num_allocations; args.pAllocationInfo2 = alloc_info; + /* The PhysicallyContiguous flag causes allocation failure + * args.Flags.PhysicallyContiguous = IsPhysicalContiguous(); + */ + SharedHandleInfo shared_info; if (IsShared()) { shared_info.size = desc_.size;