From 8e07aca2ae681127fa9546f0e75fb41ea8d8264a Mon Sep 17 00:00:00 2001 From: tiancyin Date: Fri, 27 Jun 2025 16:47:51 +0800 Subject: [PATCH] wsl/libhsakmt: move system heap from device to thunk runtime In multi-GPU, system heap space is shared between all GPUs, not belongs to specific one GPU, so move it from wddm device (which presents a specific GPU) to thunk runtime which has gloable view, can manage system heap for all GPUs. Introduce a new va_Mgr instance to manage system heap, since local heap and system heap both comply with SVM(Shared Virtual Memory), without this new mgr, every allocation has to call KMD at least once (each GPU needs a call) to allocate GPU VA, the new mgr manage the space itself, no longer call KMD. Reviewed-by: Flora Cui Signed-off-by: tiancyin --- libhsakmt.h | 15 +++++++ memory.cpp | 2 +- openclose.cpp | 108 ++++++++++++++++++++++++++++++++++-------------- wddm/device.cpp | 62 ++++----------------------- 4 files changed, 102 insertions(+), 85 deletions(-) diff --git a/libhsakmt.h b/libhsakmt.h index 9f5094eedf..6f5451d5af 100644 --- a/libhsakmt.h +++ b/libhsakmt.h @@ -63,13 +63,21 @@ struct hsakmtRuntime { enable_thunk_sub_allocator(0), local_heap_space_start_(0), local_heap_space_size_(0), + system_heap_space_start_(0), + system_heap_space_size_(0), default_node(1) {} void HeapInit(); void HeapFini(); + bool ReserveSvmSpace(uint64_t &base, uint64_t &size, uint64_t align); + bool FreeSvmSpace(uint64_t &base, uint64_t &size); bool ReserveLocalHeapSpace(); bool FreeLocalHeapSpace(); void InitLocalHeapMgr(); + bool ReserveSystemHeapSpace(); + uint64_t SystemHeapSize() { return system_heap_space_size_; } + bool FreeSystemHeapSpace(); + void InitSystemHeapMgr(); pthread_mutex_t hsakmt_mutex; const char *dxg_device_name = "/dev/dxg"; @@ -95,6 +103,13 @@ struct hsakmtRuntime { /* manage the reserved local heap space which shared by CPU and GPUs */ std::unique_ptr local_heap_mgr_; + + /* system heap means bo's backend is system ram */ + uint64_t system_heap_space_start_; + uint64_t system_heap_space_size_; + + /* manage the reserved system heap space which shared by CPU and GPUs */ + std::unique_ptr system_heap_mgr_; }; extern hsakmtRuntime *dxg_runtime; diff --git a/memory.cpp b/memory.cpp index 7085c034e2..52843ff6d3 100644 --- a/memory.cpp +++ b/memory.cpp @@ -547,7 +547,7 @@ HSAKMT_STATUS import_dmabuf_fd(int DMABufFd, struct stat st; fstat(DMABufFd, &st); uint64_t sz = st.st_size; - if (4096 <= sz && sz < dev->SystemHeapSize() && (sz & 0xfff) == 0) { + if (4096 <= sz && sz < dxg_runtime->SystemHeapSize() && (sz & 0xfff) == 0) { pr_debug("DMABufFd %d is sys mem fd(IPC signal), get size:%ld from it\n", DMABufFd, st.st_size); create_info.flags.sysmem_ipc_sig_importer = 1; // set to 1 when backend is system memory create_info.size = st.st_size; diff --git a/openclose.cpp b/openclose.cpp index 85ad89387c..e0da7c1131 100644 --- a/openclose.cpp +++ b/openclose.cpp @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,41 +41,28 @@ hsakmtRuntime *dxg_runtime = new hsakmtRuntime(); void hsakmtRuntime::HeapInit() { ReserveLocalHeapSpace(); + ReserveSystemHeapSpace(); InitLocalHeapMgr(); + InitSystemHeapMgr(); } void hsakmtRuntime::HeapFini() { + FreeSystemHeapSpace(); FreeLocalHeapSpace(); } -/* - * To find the avaliable same range for cpu - * virtual space and gpu virtual space. - * sys_va_size of cpu va range is larger 1G - * than gpu va range, otherwise ReserveGPUVirtualAddress - * will return error. - */ -bool hsakmtRuntime::ReserveLocalHeapSpace() { +bool hsakmtRuntime::ReserveSvmSpace(uint64_t &base, uint64_t &size, uint64_t align) { uint64_t sys_va[16] = {0}; uint64_t local_va; uint64_t sys_va_size; int match_index = -1; - uint64_t align = 0x40000000; /* 1G */ void* ptr = NULL; wsl::thunk::WDDMDevice* device; - uint64_t total_local_size = 0; size_t num_adapters = get_num_wddmdev(); - for (uint32_t j = 0; j < num_adapters; j++) { - device = get_wddmdev(j+1); - if (device == nullptr) - return -1; - total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; - } - local_heap_space_start_ = 0; - local_heap_space_size_ = total_local_size; - sys_va_size = local_heap_space_size_ + align; + base = 0; + sys_va_size = size + align; /* it will retry 16 times to find the avaliable range. */ for (int i = 0; i < 16; i++) { @@ -89,16 +78,16 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { int match_cnt = 0; for (uint32_t j = 0; j < num_adapters; j++) { device = get_wddmdev(j+1); - uint64_t start = (local_heap_space_start_ == 0) ? (uint64_t)ptr : local_heap_space_start_; - uint64_t end = start + ((local_heap_space_start_ == 0) ? sys_va_size : local_heap_space_size_) + 1; + uint64_t start = (base == 0) ? (uint64_t)ptr : base; + uint64_t end = start + ((base == 0) ? sys_va_size : size) + 1; if (wsl::thunk::d3dthunk::ReserveGpuVirtualAddress( - device->GetAdapter(), local_heap_space_size_, + device->GetAdapter(), size, start, end, &local_va) == ErrorCode::Success) { match_cnt++; - local_heap_space_start_ = local_va; + base = local_va; pr_debug("success to reserve gpu va %lx and va cpu %p in %d time\n", local_va, ptr, i); } else { @@ -119,12 +108,12 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { uint64_t right_size = align - left_size; if ((left_size > 0) && munmap((void*)sys_va[match_index], left_size)) pr_err("fail to unmap left %lx with size %lx\n", sys_va[match_index], left_size); - if ((right_size > 0) && munmap((void*)(local_va + local_heap_space_size_), right_size)) - pr_err("fail to unmap right %lx with size %lx\n", (local_va + local_heap_space_size_), right_size); + if ((right_size > 0) && munmap((void*)(local_va + size), right_size)) + pr_err("fail to unmap right %lx with size %lx\n", (local_va + size), right_size); } else { pr_err("fail to reserve Local Heap Space!\n"); - local_heap_space_start_ = 0; - local_heap_space_size_ = 0; + base = 0; + size = 0; } /* free match fail address for cpu va */ @@ -138,18 +127,51 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { return match_index >= 0; } -bool hsakmtRuntime::FreeLocalHeapSpace() { +/* + * To find the avaliable same range for cpu + * virtual space and gpu virtual space. + * sys_va_size of cpu va range is larger 1G + * than gpu va range, otherwise ReserveGPUVirtualAddress + * will return error. + */ +bool hsakmtRuntime::ReserveLocalHeapSpace() { + wsl::thunk::WDDMDevice* device; + uint64_t total_local_size = 0; + uint64_t align = 0x40000000; /* 1G */ + size_t num_adapters = get_num_wddmdev(); + + for (uint32_t j = 0; j < num_adapters; j++) { + device = get_wddmdev(j+1); + if (device == nullptr) + return -1; + total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; + } + + local_heap_space_start_ = 0; + local_heap_space_size_ = total_local_size; + + return ReserveSvmSpace(local_heap_space_start_, local_heap_space_size_, align); +} + +bool hsakmtRuntime::FreeSvmSpace(uint64_t &base, uint64_t &size) { wsl::thunk::WDDMDevice* device; size_t num_adapters = get_num_wddmdev(); for (uint32_t j = 0; j < num_adapters; j++) { device = get_wddmdev(j+1); if (device == nullptr) return -1; - wsl::thunk::d3dthunk::FreeGpuVirtualAddress(device->GetAdapter(), local_heap_space_start_, local_heap_space_size_); + wsl::thunk::d3dthunk::FreeGpuVirtualAddress(device->GetAdapter(), base, size); } - void *cpu = (void *)local_heap_space_start_; - return munmap(cpu, local_heap_space_size_) == 0; + void *cpu = (void *)base; + auto r = (munmap(cpu, size) == 0); + base = 0; + size = 0; + return r; +} + +bool hsakmtRuntime::FreeLocalHeapSpace() { + return FreeSvmSpace(local_heap_space_start_, local_heap_space_size_); } void hsakmtRuntime::InitLocalHeapMgr() { @@ -158,6 +180,30 @@ void hsakmtRuntime::InitLocalHeapMgr() { DEFAULT_GPU_PAGE_SIZE); } +bool hsakmtRuntime::ReserveSystemHeapSpace() { + struct sysinfo info; + int ret = sysinfo(&info); + uint64_t max_ram = 0x10000000000; + uint64_t alignment = 0x100000000; + assert(!ret); + + int32_t protFlags = PROT_NONE; + // minimum of reserve size is 8G, maximum of reserve size is 1T. + system_heap_space_size_ = std::min(wsl::AlignUp(info.totalram, alignment) * 2, max_ram); + + return ReserveSvmSpace(system_heap_space_start_, system_heap_space_size_, alignment); +} + +bool hsakmtRuntime::FreeSystemHeapSpace(void) { + return FreeSvmSpace(system_heap_space_start_, system_heap_space_size_); +} + +void hsakmtRuntime::InitSystemHeapMgr() { + system_heap_mgr_ = std::make_unique(system_heap_space_start_, + system_heap_space_size_, + DEFAULT_GPU_PAGE_SIZE); +} + /* is_forked_child detects when the process has forked since the last * time this function was called. We cannot rely on pthread_atfork * because the process can fork without calling the fork function in diff --git a/wddm/device.cpp b/wddm/device.cpp index 2ab4a2afea..acdca8e19f 100644 --- a/wddm/device.cpp +++ b/wddm/device.cpp @@ -67,14 +67,12 @@ WDDMDevice::WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid, uint32_t node_i CreateDevice(); SetPowerOptimization(false); CreatePagingQueue(); - ReserveSystemHeapSpace(); InitHandleApertureSpace(); InitHandleApertureMgr(); InitCmdbufInfo(); } WDDMDevice::~WDDMDevice() { - FreeSystemHeapSpace(); DestroyPagingQueue(); SetPowerOptimization(true); DestroyDevice(); @@ -275,36 +273,6 @@ bool WDDMDevice::DecommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd return true; } -bool WDDMDevice::ReserveSystemHeapSpace() { - struct sysinfo info; - int ret = sysinfo(&info); - uint64_t max_ram = 0x10000000000; - uint64_t alignment = 0x100000000; - assert(!ret); - - int32_t protFlags = PROT_NONE; - // minimum of reserve size is 8G, maximum of reserve size is 1T. - system_heap_space_size_ = std::min(AlignUp(info.totalram, alignment) * 2, max_ram); - void* cpu = mmap(NULL, system_heap_space_size_, protFlags, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (cpu == MAP_FAILED) { - pr_err("fail to reserve system_heap_space_size_ = %lx \n", system_heap_space_size_); - return false; - } - - system_heap_space_start_ = (uint64_t)cpu; - return true; -} - -bool WDDMDevice::FreeSystemHeapSpace(void) { - void *cpu = (void *)system_heap_space_start_; - if (munmap(cpu, system_heap_space_size_) != 0) { - pr_err("fail to unmap = %p \n", cpu); - return false; - } - return true; -} - void WDDMDevice::InitHandleApertureMgr() { handle_aperture_mgr_ = std::make_unique(handle_aperture_start_, handle_aperture_size_, @@ -372,28 +340,23 @@ ErrorCode WDDMDevice::ReserveGpuVirtualAddress(const thunk_proxy::AllocDomain do gpusize gpu_addr = 0; ErrorCode code = ErrorCode::Success; - if (domain == thunk_proxy::kSystem) { + uint64_t align = alignment == 0 ? (64 * 1024) : alignment; // default 64K alignment + if (size >= GPU_HUGE_PAGE_SIZE) + align = GPU_HUGE_PAGE_SIZE; - code = d3dthunk::ReserveGpuVirtualAddress(adapter_, size, - system_heap_space_start_, - system_heap_space_start_ + system_heap_space_size_, - &gpu_addr); - if (code != ErrorCode::Success) - return code; + if (domain == thunk_proxy::kSystem) { + gpu_addr = dxg_runtime->system_heap_mgr_->Alloc(size, align, hit_base_addr); + if (gpu_addr == 0) + code = ErrorCode::OutOfMemory; if (!CommitSystemHeapSpace((void*)gpu_addr, size, lock)) { - d3dthunk::FreeGpuVirtualAddress(adapter_, gpu_addr, size); + dxg_runtime->system_heap_mgr_->Free(gpu_addr); code = ErrorCode::SyscallFail; } } else { - uint64_t align = alignment == 0 ? (64 * 1024) : alignment; // default 64K alignment - if (domain == thunk_proxy::kLocal && size >= GPU_HUGE_PAGE_SIZE) - align = GPU_HUGE_PAGE_SIZE; - gpu_addr = dxg_runtime->local_heap_mgr_->Alloc(size, align, hit_base_addr); if (gpu_addr == 0) code = ErrorCode::OutOfGpuMemory; - } *out_gpu_virt_addr = (code == ErrorCode::Success) ? gpu_addr : 0; @@ -405,15 +368,8 @@ ErrorCode WDDMDevice::FreeGpuVirtualAddress(const thunk_proxy::AllocDomain domai auto code = ErrorCode::Success; if (domain == thunk_proxy::kSystem) { - DecommitSystemHeapSpace((void *)gpu_addr, size); - - d3dthunk::FreeGpuVirtualAddressArgs free_args{}; - free_args.hAdapter = adapter_; - free_args.BaseAddress = gpu_addr; - free_args.Size = size; - - code = d3dthunk::FreeGpuVirtualAddress(&free_args); + dxg_runtime->system_heap_mgr_->Free(gpu_addr); } else { dxg_runtime->local_heap_mgr_->Free(gpu_addr); }