From f8d1663b396b16e4c0dd71ff5f1b893c20e41dfc Mon Sep 17 00:00:00 2001 From: tiancyin Date: Wed, 2 Jul 2025 13:41:57 +0800 Subject: [PATCH] wsl/libhsakmt: move handle aperture from device to thunk runtime In multi-GPU, handle aperture is shared between all GPUs, not belongs to specific one GPU, so move it from wddm device (which presents a specific GPU) to thunk runtime which has gloable view, can manage handle aperture for all GPUs. Reviewed-by: Flora Cui Signed-off-by: tiancyin --- libhsakmt.h | 10 +++++++ openclose.cpp | 68 +++++++++++++++++++++++++++++++++++++++++++++ wddm/device.cpp | 57 ------------------------------------- wddm/gpu_memory.cpp | 6 ++-- 4 files changed, 81 insertions(+), 60 deletions(-) diff --git a/libhsakmt.h b/libhsakmt.h index 6f5451d5af..641e853913 100644 --- a/libhsakmt.h +++ b/libhsakmt.h @@ -65,6 +65,8 @@ struct hsakmtRuntime { local_heap_space_size_(0), system_heap_space_start_(0), system_heap_space_size_(0), + handle_aperture_start_(0), + handle_aperture_size_(0), default_node(1) {} void HeapInit(); @@ -78,6 +80,10 @@ struct hsakmtRuntime { uint64_t SystemHeapSize() { return system_heap_space_size_; } bool FreeSystemHeapSpace(); void InitSystemHeapMgr(); + bool InitHandleApertureSpace(); + void InitHandleApertureMgr(); + ErrorCode HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr); + void HandleApertureFree(gpusize gpu_addr); pthread_mutex_t hsakmt_mutex; const char *dxg_device_name = "/dev/dxg"; @@ -110,6 +116,10 @@ struct hsakmtRuntime { /* manage the reserved system heap space which shared by CPU and GPUs */ std::unique_ptr system_heap_mgr_; + + uint64_t handle_aperture_start_; + uint64_t handle_aperture_size_; + std::unique_ptr handle_aperture_mgr_; }; extern hsakmtRuntime *dxg_runtime; diff --git a/openclose.cpp b/openclose.cpp index e0da7c1131..eda64b176f 100644 --- a/openclose.cpp +++ b/openclose.cpp @@ -42,8 +42,10 @@ hsakmtRuntime *dxg_runtime = new hsakmtRuntime(); void hsakmtRuntime::HeapInit() { ReserveLocalHeapSpace(); ReserveSystemHeapSpace(); + InitHandleApertureSpace(); InitLocalHeapMgr(); InitSystemHeapMgr(); + InitHandleApertureMgr(); } void hsakmtRuntime::HeapFini() { @@ -204,6 +206,72 @@ void hsakmtRuntime::InitSystemHeapMgr() { DEFAULT_GPU_PAGE_SIZE); } + +bool hsakmtRuntime::InitHandleApertureSpace() { + wsl::thunk::WDDMDevice* device; + size_t num_adapters = get_num_wddmdev(); + handle_aperture_start_ = START_NON_CANONICAL_ADDR; + handle_aperture_size_ = 1ULL << 47; + + while (handle_aperture_start_ < END_NON_CANONICAL_ADDR - 1) { + for (uint32_t j = 0; j < num_adapters;) { + device = get_wddmdev(j+1); + if (device == nullptr) + return -1; + + if (device->PrivateApertureBase() && + IS_OVERLAPPING(device->PrivateApertureBase(), + device->PrivateApertureSize(), + handle_aperture_start_, + handle_aperture_size_)) { + handle_aperture_start_ += (1ULL << 47); + continue; + } + + if (device->SharedApertureBase() && + IS_OVERLAPPING(device->SharedApertureBase(), + device->SharedApertureSize(), + handle_aperture_start_, + handle_aperture_size_)) { + handle_aperture_start_ += (1ULL << 47); + continue; + } + + j++; + } + pr_debug("handle aperture start %lx, size %lx\n", handle_aperture_start_, handle_aperture_size_); + return true; + } + + handle_aperture_start_ = 0; + pr_err("fail\n"); + + return false; +} + +void hsakmtRuntime::InitHandleApertureMgr() { + handle_aperture_mgr_ = std::make_unique(handle_aperture_start_, + handle_aperture_size_, + DEFAULT_GPU_PAGE_SIZE); +} + +ErrorCode hsakmtRuntime::HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr) { + uint64_t align = DEFAULT_GPU_PAGE_SIZE; + + if (size >= GPU_HUGE_PAGE_SIZE) + align = GPU_HUGE_PAGE_SIZE; + + *out_gpu_virt_addr = handle_aperture_mgr_->Alloc(size, align); + if (*out_gpu_virt_addr == 0) + return ErrorCode::OutOfHandleApeMemory; + + return ErrorCode::Success; +} + +void hsakmtRuntime::HandleApertureFree(gpusize gpu_addr) { + handle_aperture_mgr_->Free(gpu_addr); +} + /* is_forked_child detects when the process has forked since the last * time this function was called. We cannot rely on pthread_atfork * because the process can fork without calling the fork function in diff --git a/wddm/device.cpp b/wddm/device.cpp index acdca8e19f..05aa6c31c8 100644 --- a/wddm/device.cpp +++ b/wddm/device.cpp @@ -67,8 +67,6 @@ WDDMDevice::WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid, uint32_t node_i CreateDevice(); SetPowerOptimization(false); CreatePagingQueue(); - InitHandleApertureSpace(); - InitHandleApertureMgr(); InitCmdbufInfo(); } @@ -273,45 +271,6 @@ bool WDDMDevice::DecommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd return true; } -void WDDMDevice::InitHandleApertureMgr() { - handle_aperture_mgr_ = std::make_unique(handle_aperture_start_, - handle_aperture_size_, - DEFAULT_GPU_PAGE_SIZE); -} - -bool WDDMDevice::InitHandleApertureSpace(void) { - handle_aperture_start_ = START_NON_CANONICAL_ADDR; - handle_aperture_size_ = 1ULL << 47; - - while (handle_aperture_start_ < END_NON_CANONICAL_ADDR - 1) { - if (device_info_.private_aperture_base && - IS_OVERLAPPING(device_info_.private_aperture_base, - device_info_.private_aperture_size, - handle_aperture_start_, - handle_aperture_size_)) { - handle_aperture_start_ += (1ULL << 47); - continue; - } - - if (device_info_.shared_aperture_base && - IS_OVERLAPPING(device_info_.shared_aperture_base, - device_info_.shared_aperture_size, - handle_aperture_start_, - handle_aperture_size_)) { - handle_aperture_start_ += (1ULL << 47); - continue; - } - - pr_debug("handle aperture start %lx, size %lx\n", handle_aperture_start_, handle_aperture_size_); - return true; - } - - handle_aperture_start_ = 0; - pr_err("fail\n"); - - return false; -} - void WDDMDevice::SetPowerOptimization(bool restore) { void *priv_data; int priv_size; @@ -414,22 +373,6 @@ ErrorCode WDDMDevice::FreeIPCSysMem(gpusize gpu_addr, gpusize size, int &memfd) return code; } -ErrorCode WDDMDevice::HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr) { - uint64_t align = DEFAULT_GPU_PAGE_SIZE; - - if (size >= GPU_HUGE_PAGE_SIZE) - align = GPU_HUGE_PAGE_SIZE; - - *out_gpu_virt_addr = handle_aperture_mgr_->Alloc(size, align); - if (*out_gpu_virt_addr == 0) - return ErrorCode::OutOfHandleApeMemory; - - return ErrorCode::Success; -} - -void WDDMDevice::HandleApertureFree(gpusize gpu_addr) { - handle_aperture_mgr_->Free(gpu_addr); -} void WDDMDevice::UpdatePageFence(uint64_t fence_value) { uint64_t current = page_fence_value_.load(); diff --git a/wddm/gpu_memory.cpp b/wddm/gpu_memory.cpp index 582310bb84..90a3088359 100644 --- a/wddm/gpu_memory.cpp +++ b/wddm/gpu_memory.cpp @@ -49,7 +49,7 @@ GpuMemory::~GpuMemory() { FreeGpuVirtualAddress(GpuAddress(), Size()); FreePhysicalMemory(); if (desc_.handle_ape_addr > 0) - device_->HandleApertureFree(desc_.handle_ape_addr); + dxg_runtime->HandleApertureFree(desc_.handle_ape_addr); } ErrorCode GpuMemory::Init(const GpuMemoryCreateInfo &create_info) { @@ -92,7 +92,7 @@ ErrorCode GpuMemory::Init(const GpuMemoryCreateInfo &create_info) { if (IsPhysicalOnly()) { code = CreatePhysicalMemory(); if (code == ErrorCode::Success) - code = device_->HandleApertureAlloc(desc_.size, &desc_.handle_ape_addr); + code = dxg_runtime->HandleApertureAlloc(desc_.size, &desc_.handle_ape_addr); return code; } @@ -576,7 +576,7 @@ ErrorCode GpuMemory::ImportPhysicalHandle(const GpuMemoryCreateInfo &create_info return ret; } else { desc_.flags.is_imported_vram_vmem = 1; - return device_->HandleApertureAlloc(desc_.size, &desc_.handle_ape_addr); + return dxg_runtime->HandleApertureAlloc(desc_.size, &desc_.handle_ape_addr); } } }