diff --git a/libhsakmt.h b/libhsakmt.h index 9f5094eedf..6f5451d5af 100644 --- a/libhsakmt.h +++ b/libhsakmt.h @@ -63,13 +63,21 @@ struct hsakmtRuntime { enable_thunk_sub_allocator(0), local_heap_space_start_(0), local_heap_space_size_(0), + system_heap_space_start_(0), + system_heap_space_size_(0), default_node(1) {} void HeapInit(); void HeapFini(); + bool ReserveSvmSpace(uint64_t &base, uint64_t &size, uint64_t align); + bool FreeSvmSpace(uint64_t &base, uint64_t &size); bool ReserveLocalHeapSpace(); bool FreeLocalHeapSpace(); void InitLocalHeapMgr(); + bool ReserveSystemHeapSpace(); + uint64_t SystemHeapSize() { return system_heap_space_size_; } + bool FreeSystemHeapSpace(); + void InitSystemHeapMgr(); pthread_mutex_t hsakmt_mutex; const char *dxg_device_name = "/dev/dxg"; @@ -95,6 +103,13 @@ struct hsakmtRuntime { /* manage the reserved local heap space which shared by CPU and GPUs */ std::unique_ptr local_heap_mgr_; + + /* system heap means bo's backend is system ram */ + uint64_t system_heap_space_start_; + uint64_t system_heap_space_size_; + + /* manage the reserved system heap space which shared by CPU and GPUs */ + std::unique_ptr system_heap_mgr_; }; extern hsakmtRuntime *dxg_runtime; diff --git a/memory.cpp b/memory.cpp index 7085c034e2..52843ff6d3 100644 --- a/memory.cpp +++ b/memory.cpp @@ -547,7 +547,7 @@ HSAKMT_STATUS import_dmabuf_fd(int DMABufFd, struct stat st; fstat(DMABufFd, &st); uint64_t sz = st.st_size; - if (4096 <= sz && sz < dev->SystemHeapSize() && (sz & 0xfff) == 0) { + if (4096 <= sz && sz < dxg_runtime->SystemHeapSize() && (sz & 0xfff) == 0) { pr_debug("DMABufFd %d is sys mem fd(IPC signal), get size:%ld from it\n", DMABufFd, st.st_size); create_info.flags.sysmem_ipc_sig_importer = 1; // set to 1 when backend is system memory create_info.size = st.st_size; diff --git a/openclose.cpp b/openclose.cpp index 85ad89387c..e0da7c1131 100644 --- a/openclose.cpp +++ b/openclose.cpp @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,41 +41,28 @@ hsakmtRuntime *dxg_runtime = new hsakmtRuntime(); void hsakmtRuntime::HeapInit() { ReserveLocalHeapSpace(); + ReserveSystemHeapSpace(); InitLocalHeapMgr(); + InitSystemHeapMgr(); } void hsakmtRuntime::HeapFini() { + FreeSystemHeapSpace(); FreeLocalHeapSpace(); } -/* - * To find the avaliable same range for cpu - * virtual space and gpu virtual space. - * sys_va_size of cpu va range is larger 1G - * than gpu va range, otherwise ReserveGPUVirtualAddress - * will return error. - */ -bool hsakmtRuntime::ReserveLocalHeapSpace() { +bool hsakmtRuntime::ReserveSvmSpace(uint64_t &base, uint64_t &size, uint64_t align) { uint64_t sys_va[16] = {0}; uint64_t local_va; uint64_t sys_va_size; int match_index = -1; - uint64_t align = 0x40000000; /* 1G */ void* ptr = NULL; wsl::thunk::WDDMDevice* device; - uint64_t total_local_size = 0; size_t num_adapters = get_num_wddmdev(); - for (uint32_t j = 0; j < num_adapters; j++) { - device = get_wddmdev(j+1); - if (device == nullptr) - return -1; - total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; - } - local_heap_space_start_ = 0; - local_heap_space_size_ = total_local_size; - sys_va_size = local_heap_space_size_ + align; + base = 0; + sys_va_size = size + align; /* it will retry 16 times to find the avaliable range. */ for (int i = 0; i < 16; i++) { @@ -89,16 +78,16 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { int match_cnt = 0; for (uint32_t j = 0; j < num_adapters; j++) { device = get_wddmdev(j+1); - uint64_t start = (local_heap_space_start_ == 0) ? (uint64_t)ptr : local_heap_space_start_; - uint64_t end = start + ((local_heap_space_start_ == 0) ? sys_va_size : local_heap_space_size_) + 1; + uint64_t start = (base == 0) ? (uint64_t)ptr : base; + uint64_t end = start + ((base == 0) ? sys_va_size : size) + 1; if (wsl::thunk::d3dthunk::ReserveGpuVirtualAddress( - device->GetAdapter(), local_heap_space_size_, + device->GetAdapter(), size, start, end, &local_va) == ErrorCode::Success) { match_cnt++; - local_heap_space_start_ = local_va; + base = local_va; pr_debug("success to reserve gpu va %lx and va cpu %p in %d time\n", local_va, ptr, i); } else { @@ -119,12 +108,12 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { uint64_t right_size = align - left_size; if ((left_size > 0) && munmap((void*)sys_va[match_index], left_size)) pr_err("fail to unmap left %lx with size %lx\n", sys_va[match_index], left_size); - if ((right_size > 0) && munmap((void*)(local_va + local_heap_space_size_), right_size)) - pr_err("fail to unmap right %lx with size %lx\n", (local_va + local_heap_space_size_), right_size); + if ((right_size > 0) && munmap((void*)(local_va + size), right_size)) + pr_err("fail to unmap right %lx with size %lx\n", (local_va + size), right_size); } else { pr_err("fail to reserve Local Heap Space!\n"); - local_heap_space_start_ = 0; - local_heap_space_size_ = 0; + base = 0; + size = 0; } /* free match fail address for cpu va */ @@ -138,18 +127,51 @@ bool hsakmtRuntime::ReserveLocalHeapSpace() { return match_index >= 0; } -bool hsakmtRuntime::FreeLocalHeapSpace() { +/* + * To find the avaliable same range for cpu + * virtual space and gpu virtual space. + * sys_va_size of cpu va range is larger 1G + * than gpu va range, otherwise ReserveGPUVirtualAddress + * will return error. + */ +bool hsakmtRuntime::ReserveLocalHeapSpace() { + wsl::thunk::WDDMDevice* device; + uint64_t total_local_size = 0; + uint64_t align = 0x40000000; /* 1G */ + size_t num_adapters = get_num_wddmdev(); + + for (uint32_t j = 0; j < num_adapters; j++) { + device = get_wddmdev(j+1); + if (device == nullptr) + return -1; + total_local_size += wsl::AlignUp(device->LocalHeapSize(), align) * 4; + } + + local_heap_space_start_ = 0; + local_heap_space_size_ = total_local_size; + + return ReserveSvmSpace(local_heap_space_start_, local_heap_space_size_, align); +} + +bool hsakmtRuntime::FreeSvmSpace(uint64_t &base, uint64_t &size) { wsl::thunk::WDDMDevice* device; size_t num_adapters = get_num_wddmdev(); for (uint32_t j = 0; j < num_adapters; j++) { device = get_wddmdev(j+1); if (device == nullptr) return -1; - wsl::thunk::d3dthunk::FreeGpuVirtualAddress(device->GetAdapter(), local_heap_space_start_, local_heap_space_size_); + wsl::thunk::d3dthunk::FreeGpuVirtualAddress(device->GetAdapter(), base, size); } - void *cpu = (void *)local_heap_space_start_; - return munmap(cpu, local_heap_space_size_) == 0; + void *cpu = (void *)base; + auto r = (munmap(cpu, size) == 0); + base = 0; + size = 0; + return r; +} + +bool hsakmtRuntime::FreeLocalHeapSpace() { + return FreeSvmSpace(local_heap_space_start_, local_heap_space_size_); } void hsakmtRuntime::InitLocalHeapMgr() { @@ -158,6 +180,30 @@ void hsakmtRuntime::InitLocalHeapMgr() { DEFAULT_GPU_PAGE_SIZE); } +bool hsakmtRuntime::ReserveSystemHeapSpace() { + struct sysinfo info; + int ret = sysinfo(&info); + uint64_t max_ram = 0x10000000000; + uint64_t alignment = 0x100000000; + assert(!ret); + + int32_t protFlags = PROT_NONE; + // minimum of reserve size is 8G, maximum of reserve size is 1T. + system_heap_space_size_ = std::min(wsl::AlignUp(info.totalram, alignment) * 2, max_ram); + + return ReserveSvmSpace(system_heap_space_start_, system_heap_space_size_, alignment); +} + +bool hsakmtRuntime::FreeSystemHeapSpace(void) { + return FreeSvmSpace(system_heap_space_start_, system_heap_space_size_); +} + +void hsakmtRuntime::InitSystemHeapMgr() { + system_heap_mgr_ = std::make_unique(system_heap_space_start_, + system_heap_space_size_, + DEFAULT_GPU_PAGE_SIZE); +} + /* is_forked_child detects when the process has forked since the last * time this function was called. We cannot rely on pthread_atfork * because the process can fork without calling the fork function in diff --git a/wddm/device.cpp b/wddm/device.cpp index 2ab4a2afea..acdca8e19f 100644 --- a/wddm/device.cpp +++ b/wddm/device.cpp @@ -67,14 +67,12 @@ WDDMDevice::WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid, uint32_t node_i CreateDevice(); SetPowerOptimization(false); CreatePagingQueue(); - ReserveSystemHeapSpace(); InitHandleApertureSpace(); InitHandleApertureMgr(); InitCmdbufInfo(); } WDDMDevice::~WDDMDevice() { - FreeSystemHeapSpace(); DestroyPagingQueue(); SetPowerOptimization(true); DestroyDevice(); @@ -275,36 +273,6 @@ bool WDDMDevice::DecommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd return true; } -bool WDDMDevice::ReserveSystemHeapSpace() { - struct sysinfo info; - int ret = sysinfo(&info); - uint64_t max_ram = 0x10000000000; - uint64_t alignment = 0x100000000; - assert(!ret); - - int32_t protFlags = PROT_NONE; - // minimum of reserve size is 8G, maximum of reserve size is 1T. - system_heap_space_size_ = std::min(AlignUp(info.totalram, alignment) * 2, max_ram); - void* cpu = mmap(NULL, system_heap_space_size_, protFlags, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (cpu == MAP_FAILED) { - pr_err("fail to reserve system_heap_space_size_ = %lx \n", system_heap_space_size_); - return false; - } - - system_heap_space_start_ = (uint64_t)cpu; - return true; -} - -bool WDDMDevice::FreeSystemHeapSpace(void) { - void *cpu = (void *)system_heap_space_start_; - if (munmap(cpu, system_heap_space_size_) != 0) { - pr_err("fail to unmap = %p \n", cpu); - return false; - } - return true; -} - void WDDMDevice::InitHandleApertureMgr() { handle_aperture_mgr_ = std::make_unique(handle_aperture_start_, handle_aperture_size_, @@ -372,28 +340,23 @@ ErrorCode WDDMDevice::ReserveGpuVirtualAddress(const thunk_proxy::AllocDomain do gpusize gpu_addr = 0; ErrorCode code = ErrorCode::Success; - if (domain == thunk_proxy::kSystem) { + uint64_t align = alignment == 0 ? (64 * 1024) : alignment; // default 64K alignment + if (size >= GPU_HUGE_PAGE_SIZE) + align = GPU_HUGE_PAGE_SIZE; - code = d3dthunk::ReserveGpuVirtualAddress(adapter_, size, - system_heap_space_start_, - system_heap_space_start_ + system_heap_space_size_, - &gpu_addr); - if (code != ErrorCode::Success) - return code; + if (domain == thunk_proxy::kSystem) { + gpu_addr = dxg_runtime->system_heap_mgr_->Alloc(size, align, hit_base_addr); + if (gpu_addr == 0) + code = ErrorCode::OutOfMemory; if (!CommitSystemHeapSpace((void*)gpu_addr, size, lock)) { - d3dthunk::FreeGpuVirtualAddress(adapter_, gpu_addr, size); + dxg_runtime->system_heap_mgr_->Free(gpu_addr); code = ErrorCode::SyscallFail; } } else { - uint64_t align = alignment == 0 ? (64 * 1024) : alignment; // default 64K alignment - if (domain == thunk_proxy::kLocal && size >= GPU_HUGE_PAGE_SIZE) - align = GPU_HUGE_PAGE_SIZE; - gpu_addr = dxg_runtime->local_heap_mgr_->Alloc(size, align, hit_base_addr); if (gpu_addr == 0) code = ErrorCode::OutOfGpuMemory; - } *out_gpu_virt_addr = (code == ErrorCode::Success) ? gpu_addr : 0; @@ -405,15 +368,8 @@ ErrorCode WDDMDevice::FreeGpuVirtualAddress(const thunk_proxy::AllocDomain domai auto code = ErrorCode::Success; if (domain == thunk_proxy::kSystem) { - DecommitSystemHeapSpace((void *)gpu_addr, size); - - d3dthunk::FreeGpuVirtualAddressArgs free_args{}; - free_args.hAdapter = adapter_; - free_args.BaseAddress = gpu_addr; - free_args.Size = size; - - code = d3dthunk::FreeGpuVirtualAddress(&free_args); + dxg_runtime->system_heap_mgr_->Free(gpu_addr); } else { dxg_runtime->local_heap_mgr_->Free(gpu_addr); }