/* * Copyright © 2014 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including * the next paragraph) shall be included in all copies or substantial * portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include "impl/wddm/gpu_memory.h" #include "util/simple_heap.h" struct Allocation { Allocation() : handle(0), cpu_addr(0), gpu_addr(0), size(0), userptr(false), user_data(nullptr), size_requested(0), node_id(0), mem_flags_value(0), dmabuf_fd(-1), rocr_userdata(nullptr) {} Allocation(wsl::thunk::GpuMemoryHandle handle_arg, void *cpu_addr_arg, uint64_t gpu_addr_arg, size_t size_arg, bool userptr_arg = false, void *user_data_arg = nullptr, size_t user_size_arg = 0, HSAuint32 node_id_arg = 0, HSAuint32 mem_flags_value_arg = 0) : handle(handle_arg), cpu_addr(cpu_addr_arg), gpu_addr(gpu_addr_arg), size(size_arg), userptr(userptr_arg), user_data(user_data_arg), size_requested(user_size_arg), node_id(node_id_arg), mem_flags_value(mem_flags_value_arg), dmabuf_fd(-1), rocr_userdata(nullptr) {} wsl::thunk::GpuMemoryHandle handle; void *cpu_addr; uint64_t gpu_addr; bool userptr; size_t size; /* actual size = align_up(size_requested, granularity) */ void *user_data; size_t size_requested; /* size requested by user */ HSAuint32 node_id; HSAuint32 mem_flags_value; int dmabuf_fd; void *rocr_userdata; }; static std::map* allocation_map_ = new std::map(); static std::mutex* allocation_map_lock_ = new std::mutex(); void clear_allocation_map(void) { //delete allocation_map_lock_; allocation_map_lock_ = new std::mutex(); std::lock_guard lock(*allocation_map_lock_); delete allocation_map_; allocation_map_ = new std::map(); } HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryPolicy(HSAuint32 Node, HSAuint32 DefaultPolicy, HSAuint32 AlternatePolicy, void *MemoryAddressAlternate, HSAuint64 MemorySizeInBytes) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags) { switch (pageSizeFlags) { case HSA_PAGE_SIZE_4KB: return 4 * 1024; case HSA_PAGE_SIZE_64KB: return 64 * 1024; case HSA_PAGE_SIZE_2MB: return 2 * 1024 * 1024; case HSA_PAGE_SIZE_1GB: return 1024 * 1024 * 1024; default: assert(false); return 4 * 1024; } } HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemory(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, HsaMemFlags MemFlags, void **MemoryAddress) { return hsaKmtAllocMemoryAlign(PreferredNode, SizeInBytes, 0, MemFlags, MemoryAddress); } #define POWER_OF_2(x) ((x && (!(x & (x - 1)))) ? 1 : 0) bool isSystemMemoryAvailable(HSAuint64 SizeInBytes) { struct sysinfo info; if (sysinfo(&info) != 0) return false; return SizeInBytes <= info.freeram; } void* BlockAllocator::alloc(size_t request_size, size_t& allocated_size) const { void *address; HsaMemFlags MemFlags; MemFlags.Value = 0; MemFlags.ui32.CoarseGrain = 1; MemFlags.ui32.NoSubstitute = 1; allocated_size = wsl::AlignUp(request_size, block_size()); if (HSAKMT_STATUS_SUCCESS == hsaKmtAllocMemoryAlignInternal(1, allocated_size, 0, MemFlags, &address, true)) return address; return nullptr; } void BlockAllocator::free(void* ptr, size_t length) const { if (HSAKMT_STATUS_SUCCESS != hsaKmtFreeMemoryInternal(ptr, length, true)) pr_err("wsl-thunk: BlockAllocator::free() err, address %p, length:%zu\n", ptr, length); } static wsl::SimpleHeap fragment_allocator_; void reset_suballocator(void) { fragment_allocator_.reset(); } void trim_suballocator(void) { fragment_allocator_.trim(); } HSAKMT_STATUS hsaKmtAllocMemoryAlignInternal(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, HSAuint64 Alignment, HsaMemFlags MemFlags, void **MemoryAddress, bool SkipSubAlloc) { CHECK_DXG_OPEN(); if (!MemoryAddress) return HSAKMT_STATUS_INVALID_PARAMETER; if (MemFlags.ui32.FixedAddress) { if (*MemoryAddress == nullptr) return HSAKMT_STATUS_INVALID_PARAMETER; } else *MemoryAddress = nullptr; uint32_t node = (PreferredNode == 0) ? dxg_runtime->default_node : PreferredNode; wsl::thunk::WDDMDevice *dev = get_wddmdev(node); if (!dev) return HSAKMT_STATUS_ERROR; wsl::thunk::GpuMemory *gpu_mem = nullptr; wsl::thunk::GpuMemoryCreateInfo create_info{}; create_info.size = SizeInBytes; /* If initialize scratch pool of GpuAgent, treat it as SVM reserve */ if (MemFlags.ui32.Scratch && MemFlags.ui32.HostAccess && SizeInBytes > 0x80000000) MemFlags.ui32.OnlyAddress = 1; create_info.alignment = Alignment; create_info.va_hint = reinterpret_cast(*MemoryAddress); if ((PreferredNode == 0 && MemFlags.ui32.HostAccess) || dxg_runtime->zfb_support || MemFlags.ui32.GTTAccess) { if (SizeInBytes > dxg_runtime->max_single_alloc_size) return HSAKMT_STATUS_NO_MEMORY; if (dxg_runtime->check_avail_sysram && !isSystemMemoryAvailable(SizeInBytes)) return HSAKMT_STATUS_NO_MEMORY; /* If allocate VRAM under ZFB mode */ if (dxg_runtime->zfb_support && MemFlags.ui32.NonPaged == 1) MemFlags.ui32.CoarseGrain = 1; // AllocateNonPaged == AllocateIPC create_info.flags.sysmem_ipc_sig_exporter = !!(MemFlags.ui32.NonPaged && !MemFlags.ui32.GTTAccess); create_info.domain = thunk_proxy::AllocDomain::kSystem; } else { create_info.domain = thunk_proxy::AllocDomain::kLocal; } if (!MemFlags.ui32.CoarseGrain) create_info.mem_flags = thunk_proxy::kFineGrain; //In hsa-runtime, only kernarg region set Uncached. if (MemFlags.ui32.Uncached) create_info.mem_flags |= thunk_proxy::kKernarg; create_info.flags.physical_only = MemFlags.ui32.NoAddress; create_info.flags.alloc_va = !create_info.flags.physical_only; create_info.flags.interprocess = MemFlags.ui32.NoAddress; create_info.flags.interprocess |= MemFlags.ui32.Contiguous; create_info.flags.physical_contiguous = MemFlags.ui32.Contiguous; create_info.flags.locked = MemFlags.ui32.NoSubstitute;//AllocatePinned create_info.flags.virtual_alloc = MemFlags.ui32.OnlyAddress; create_info.flags.blit_kernel_object = (MemFlags.ui32.ExecuteBlit && MemFlags.ui32.ExecuteAccess && (create_info.domain == thunk_proxy::AllocDomain::kSystem)); /*when only alloc virtual or only physical, it's vmm allocation, force to local*/ if (create_info.flags.virtual_alloc || create_info.flags.physical_only || create_info.flags.physical_contiguous) { create_info.domain = thunk_proxy::AllocDomain::kLocal; SkipSubAlloc = true; } /* Only allow using the suballocator for ordinary VRAM.*/ bool trim_safe = false; if (!SkipSubAlloc && create_info.domain == thunk_proxy::AllocDomain::kLocal) { /* just quickly skip SA if size is bigger than SA block size.*/ gpusize real_size; if (create_info.size > GPU_HUGE_PAGE_SIZE) real_size = wsl::AlignUp(create_info.size, GPU_HUGE_PAGE_SIZE); else real_size = wsl::AlignUp(create_info.size, getpagesize()); if (real_size < fragment_allocator_.default_block_size()) { *MemoryAddress = fragment_allocator_.alloc(real_size); if (*MemoryAddress) return HSAKMT_STATUS_SUCCESS; } /* SA might keep a lot of free blocks as *cache*. * We can trim them if direct allocation fails at first time. */ trim_safe = true; } after_trim: auto code = dev->CreateGpuMemory(create_info, &gpu_mem); if (code == ErrorCode::Success) { std::lock_guard gard(*allocation_map_lock_); /* For these physical allcations, use GpuMemory object's address as thunk handle*/ if (create_info.flags.physical_only || create_info.dmabuf_fd > 0) *MemoryAddress = reinterpret_cast(gpu_mem->HandleApeAddress()); else *MemoryAddress = reinterpret_cast(gpu_mem->GpuAddress()); (*allocation_map_)[*MemoryAddress] = Allocation( gpu_mem->GetGpuMemoryHandle(), *MemoryAddress, (uint64_t)*MemoryAddress, create_info.size, false, nullptr, SizeInBytes, MemFlags.ui32.GTTAccess ? 0 : PreferredNode, MemFlags.Value); return HSAKMT_STATUS_SUCCESS; } else if (trim_safe) { /* attempt to release memory from the block allocator and retry */ fragment_allocator_.trim(); trim_safe = false; goto after_trim; } return HSAKMT_STATUS_ERROR; } HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode, HSAuint64 SizeInBytes, HSAuint64 Alignment, HsaMemFlags MemFlags, void **MemoryAddress) { return hsaKmtAllocMemoryAlignInternal(PreferredNode, SizeInBytes, Alignment, MemFlags, MemoryAddress, !dxg_runtime->enable_thunk_sub_allocator); } HSAKMT_STATUS hsaKmtFreeMemoryInternal(void *MemoryAddress, HSAuint64 SizeInBytes, bool SkipSubAlloc) { CHECK_DXG_OPEN(); if (!MemoryAddress) return HSAKMT_STATUS_INVALID_PARAMETER; if (!SkipSubAlloc) { if (fragment_allocator_.free(MemoryAddress)) return HSAKMT_STATUS_SUCCESS; } wsl::thunk::GpuMemory *gpu_mem = nullptr; { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(MemoryAddress); if (it == allocation_map_->end()) { return HSAKMT_STATUS_ERROR; } gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); if (gpu_mem->IsQueueReferenced()) return HSAKMT_STATUS_ERROR; wsl::thunk::GpuMemoryDescFlags flags; flags.reserved = gpu_mem->Flags(); if (flags.is_imported_vram_ipc && gpu_mem->DecSharedReference()) { pr_info("memory is still referenced\n"); return HSAKMT_STATUS_SUCCESS; } if (it->second.dmabuf_fd >= 0) { close(it->second.dmabuf_fd); it->second.dmabuf_fd = -1; } allocation_map_->erase(it); } delete gpu_mem; return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtFreeMemory(void *MemoryAddress, HSAuint64 SizeInBytes) { return hsaKmtFreeMemoryInternal(MemoryAddress, SizeInBytes); } bool queue_acquire_buffer(void *MemoryAddress) { if (!MemoryAddress) return false; wsl::thunk::GpuMemory *gpu_mem = nullptr; { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(MemoryAddress); if (it == allocation_map_->end()) { return HSAKMT_STATUS_ERROR; } gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); gpu_mem->GetQueueReference(); } if (gpu_mem == nullptr) return false; return true; } bool queue_release_buffer(void *MemoryAddress) { if (!MemoryAddress) return false; wsl::thunk::GpuMemory *gpu_mem = nullptr; { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(MemoryAddress); if (it == allocation_map_->end()) { return HSAKMT_STATUS_ERROR; } gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); gpu_mem->PutQueueReference(); } if (gpu_mem == nullptr) return false; return true; } wsl::thunk::GpuMemory *get_gpu_mem(void *MemoryAddress) { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(MemoryAddress); if (it == allocation_map_->end()) { return nullptr; } return wsl::thunk::GpuMemory::Convert(it->second.handle); } HSAKMT_STATUS HSAKMTAPI hsaKmtAvailableMemory(HSAuint32 Node, HSAuint64 *AvailableBytes) { CHECK_DXG_OPEN(); if (!AvailableBytes) return HSAKMT_STATUS_INVALID_PARAMETER; wsl::thunk::WDDMDevice *dev = get_wddmdev(Node); if (!dev) return HSAKMT_STATUS_ERROR; *AvailableBytes = dev->VramAvail(); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemory(void *MemoryAddress, HSAuint64 MemorySizeInBytes) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryToNodes(void *MemoryAddress, HSAuint64 MemorySizeInBytes, HSAuint64 NumberOfNodes, HSAuint32 *NodeArray) { CHECK_DXG_OPEN(); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags( void *MemoryAddress, HSAuint64 MemorySizeInBytes, HsaMemFlags MemFlags) { CHECK_DXG_OPEN(); if (!MemoryAddress) return HSAKMT_STATUS_INVALID_PARAMETER; pr_debug("address %p\n", MemoryAddress); if (MemFlags.ui32.ExtendedCoherent && MemFlags.ui32.CoarseGrain) return HSAKMT_STATUS_INVALID_PARAMETER; // Registered memory should be ordinary paged host memory. if ((MemFlags.ui32.HostAccess != 1) || (MemFlags.ui32.NonPaged == 1)) return HSAKMT_STATUS_NOT_SUPPORTED; if (!dxg_runtime->hsakmt_is_dgpu) /* TODO: support mixed APU and dGPU configurations */ return HSAKMT_STATUS_NOT_SUPPORTED; return HSAKMT_STATUS_SUCCESS; } bool is_ipc_sysmemfd(int fd) { std::string fdPath = "/proc/self/fd/" + std::to_string(fd); char linkTarget[256]; ssize_t bytes = readlink(fdPath.c_str(), linkTarget, sizeof(linkTarget) - 1); if (bytes == -1) return false; linkTarget[bytes] = '\0'; return strstr(linkTarget, "rocr4wsl_gtt") != nullptr; } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodes(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, HSAuint64 NumberOfNodes, HSAuint32 *NodeArray) { HSA_REGISTER_MEM_FLAGS regFlags; regFlags.Value = 0; return hsaKmtRegisterGraphicsHandleToNodesExt(GraphicsResourceHandle, GraphicsResourceInfo, NumberOfNodes, NodeArray, regFlags); } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterGraphicsHandleToNodesExt(HSAuint64 GraphicsResourceHandle, HsaGraphicsResourceInfo *GraphicsResourceInfo, HSAuint64 NumberOfNodes, HSAuint32 *NodeArray, HSA_REGISTER_MEM_FLAGS RegisterFlags) { CHECK_DXG_OPEN(); uint32_t *gpu_id_array = NULL; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; if (is_ipc_sysmemfd(GraphicsResourceHandle)) { GraphicsResourceInfo->NodeId = dxg_runtime->default_node; pr_info("skip register sysmemfd. It would be released in next step\n"); return HSAKMT_STATUS_SUCCESS; } if (NumberOfNodes == 0) { RegisterFlags.ui32.requiresVAddr = 0; NumberOfNodes = 1; NodeArray = (HSAuint32*)&(dxg_runtime->default_node); } pr_debug("number of nodes %lu\n", NumberOfNodes); wsl::thunk::GpuMemoryHandle mem_handle; ret = import_dmabuf_fd(GraphicsResourceHandle, NodeArray[0], RegisterFlags.ui32.requiresVAddr, false, &mem_handle); if (ret != HSAKMT_STATUS_SUCCESS) { pr_err("hsaKmtRegisterGraphicsHandleToNodesExt: import_dmabuf_fd failed, " "GraphicsResourceHandle: %lu, NodeId: %u\n", GraphicsResourceHandle, NodeArray[0]); return ret; } wsl::thunk::GpuMemory *gpu_mem = wsl::thunk::GpuMemory::Convert(mem_handle); GraphicsResourceInfo->NodeId = gpu_mem->GetDevice()->NodeId(); GraphicsResourceInfo->SizeInBytes = gpu_mem->ClientSize(); GraphicsResourceInfo->MemoryAddress = RegisterFlags.ui32.requiresVAddr ? reinterpret_cast(gpu_mem->GpuAddress()): reinterpret_cast(gpu_mem->HandleApeAddress()); return ret; } HSAKMT_STATUS HSAKMTAPI hsaKmtExportDMABufHandle(void *MemoryAddress, HSAuint64 MemorySizeInBytes, int *DMABufFd, HSAuint64 *Offset) { CHECK_DXG_OPEN(); std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->upper_bound(MemoryAddress); if (it != allocation_map_->begin()) { --it; if (it->second.dmabuf_fd == -1) { auto gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); auto code = gpu_mem->ExportPhysicalHandle(DMABufFd); if (code != ErrorCode::Success) return HSAKMT_STATUS_ERROR; it->second.dmabuf_fd = *DMABufFd; } *DMABufFd = dup(it->second.dmabuf_fd); *Offset = reinterpret_cast(MemoryAddress) - it->second.gpu_addr; return HSAKMT_STATUS_SUCCESS; } return HSAKMT_STATUS_ERROR; } HSAKMT_STATUS HSAKMTAPI hsaKmtGetMemoryHandle(void *MemoryAddress, HSAuint64 SizeInBytes, uint64_t *SharedMemoryHandle) { CHECK_DXG_OPEN(); return HSAKMT_STATUS_NOT_SUPPORTED; } HSAKMT_STATUS import_dmabuf_fd(int DMABufFd, uint32_t NodeId, bool alloc_va, bool is_ipc_memfd, wsl::thunk::GpuMemoryHandle *GpuMemHandle) { CHECK_DXG_OPEN(); *GpuMemHandle = nullptr; wsl::thunk::WDDMDevice* dev = get_wddmdev(NodeId); wsl::thunk::GpuMemory *gpu_mem = nullptr; wsl::thunk::GpuMemoryCreateInfo create_info{}; create_info.dmabuf_fd = DMABufFd; create_info.flags.alloc_va = alloc_va; if (is_ipc_memfd) { struct stat st; fstat(DMABufFd, &st); uint64_t sz = st.st_size; if (4096 <= sz && sz < dxg_runtime->SystemHeapSize() && (sz & 0xfff) == 0) { pr_debug("DMABufFd %d is sys mem fd(IPC signal), get size:%ld from it\n", DMABufFd, st.st_size); create_info.flags.sysmem_ipc_sig_importer = 1; // set to 1 when backend is system memory create_info.size = st.st_size; } } gpusize gpu_va = 0; auto code = dev->CreateGpuMemory(create_info, &gpu_mem, &gpu_va); if (code == ErrorCode::SameProcessSameDevice) { /* Unit_hipMemPoolExportToShareableHandle_SameProc */ pr_info("imported from same process, use the old one\n"); std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find((void*)gpu_va); if (it == allocation_map_->end()) { pr_err("where's the conflict buffer? va %#lx\n", create_info.va_hint); return HSAKMT_STATUS_ERROR; } wsl::thunk::GpuMemory *conflict_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); conflict_mem->IncSharedReference(); *GpuMemHandle = it->second.handle; return HSAKMT_STATUS_SUCCESS; } else if (code != ErrorCode::Success) { pr_err("fail to import fd, ret %d\n", (int)code); return HSAKMT_STATUS_ERROR; } void *MemoryAddress; if (alloc_va) MemoryAddress = reinterpret_cast(gpu_mem->GpuAddress()); else MemoryAddress = reinterpret_cast(gpu_mem->HandleApeAddress()); *GpuMemHandle = gpu_mem->GetGpuMemoryHandle(); std::lock_guard gard(*allocation_map_lock_); /* * the gpu_mem->Flags() need convert back from GpuMemoryCreateFlags to * HsaMemFlags, reference hsaKmtAllocMemoryAlign * */ (*allocation_map_)[MemoryAddress] = Allocation( *GpuMemHandle, MemoryAddress, (uint64_t)MemoryAddress, gpu_mem->Size(), false, nullptr, gpu_mem->ClientSize(), NodeId, gpu_mem->Flags()); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtShareMemory(void *MemoryAddress, HSAuint64 SizeInBytes, HsaSharedMemoryHandle *SharedMemoryHandle) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandle(const HsaSharedMemoryHandle *SharedMemoryHandle, void **MemoryAddress, HSAuint64 *SizeInBytes) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterSharedHandleToNodes( const HsaSharedMemoryHandle *SharedMemoryHandle, void **MemoryAddress, HSAuint64 *SizeInBytes, HSAuint64 NumberOfNodes, HSAuint32 *NodeArray) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMRead(HSAuint32 Pid, HsaMemoryRange *LocalMemoryArray, HSAuint64 LocalMemoryArrayCount, HsaMemoryRange *RemoteMemoryArray, HSAuint64 RemoteMemoryArrayCount, HSAuint64 *SizeCopied) { CHECK_DXG_OPEN(); pr_warn_once("has been deprecated\n"); assert(false); return HSAKMT_STATUS_NOT_IMPLEMENTED; } HSAKMT_STATUS HSAKMTAPI hsaKmtProcessVMWrite(HSAuint32 Pid, HsaMemoryRange *LocalMemoryArray, HSAuint64 LocalMemoryArrayCount, HsaMemoryRange *RemoteMemoryArray, HSAuint64 RemoteMemoryArrayCount, HSAuint64 *SizeCopied) { CHECK_DXG_OPEN(); pr_warn_once("has been deprecated\n"); assert(false); return HSAKMT_STATUS_NOT_IMPLEMENTED; } HSAKMT_STATUS HSAKMTAPI hsaKmtDeregisterMemory(void *MemoryAddress) { CHECK_DXG_OPEN(); if (!MemoryAddress) return HSAKMT_STATUS_INVALID_PARAMETER; pr_debug("address %p\n", MemoryAddress); { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(MemoryAddress); if (it == allocation_map_->end()) { return HSAKMT_STATUS_SUCCESS; } auto *gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); wsl::thunk::GpuMemoryDescFlags flags; flags.reserved = gpu_mem->Flags(); // IPC mem(vram) if (flags.is_imported_vram_ipc && gpu_mem->DecSharedReference() == 0) { allocation_map_->erase(it); delete gpu_mem; return HSAKMT_STATUS_SUCCESS; } if (it->second.userptr) { allocation_map_->erase(it); allocation_map_->erase((void *)it->second.gpu_addr); delete gpu_mem; return HSAKMT_STATUS_SUCCESS; } } return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPU(void *MemoryAddress, HSAuint64 MemorySizeInBytes, HSAuint64 *AlternateVAGPU) { HSAuint64 NumberOfNodes = 1; HSAuint32 NodeArray[] = {dxg_runtime->default_node}; HsaMemMapFlags MemMapFlags; MemMapFlags.Value = 0; return hsaKmtMapMemoryToGPUNodes(MemoryAddress, MemorySizeInBytes, AlternateVAGPU, MemMapFlags, NumberOfNodes, NodeArray); } HSAKMT_STATUS HSAKMTAPI hsaKmtMapMemoryToGPUNodes( void *MemoryAddress, HSAuint64 MemorySizeInBytes, HSAuint64 *AlternateVAGPU, HsaMemMapFlags MemMapFlags, HSAuint64 NumberOfNodes, HSAuint32 *NodeArray) { CHECK_DXG_OPEN(); if (!MemoryAddress || !AlternateVAGPU) { pr_err("FIXME: mapping NULL pointer\n"); return HSAKMT_STATUS_ERROR; } uint64_t start = wsl::AlignDown((uint64_t)MemoryAddress, 4096); uint64_t end = wsl::AlignUp((uint64_t)MemoryAddress + MemorySizeInBytes, 4096); void *aligned_ptr = (void *)start; size_t aligned_size = end - start; { if (nullptr != fragment_allocator_.block_base(aligned_ptr)) return HSAKMT_STATUS_SUCCESS; } { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(aligned_ptr); if (it != allocation_map_->end()) { wsl::thunk::GpuMemory *gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); wsl::thunk::GpuMemoryDescFlags flags; flags.reserved = gpu_mem->Flags(); // IPC mem if (flags.is_imported_vram_ipc) { auto code = gpu_mem->MapGpuVirtualAddress(gpu_mem->GpuAddress(), gpu_mem->Size()); if (code != ErrorCode::Success) return HSAKMT_STATUS_ERROR; code = gpu_mem->MakeResident(); if (code != ErrorCode::Success) return HSAKMT_STATUS_ERROR; wsl::thunk::WDDMDevice *dev = gpu_mem->GetDevice(); if (!dev->WaitOnPagingFenceFromCpu()) return HSAKMT_STATUS_ERROR; return HSAKMT_STATUS_SUCCESS; } if (!it->second.userptr) { // GTT/Local mem if (it->second.size >= MemorySizeInBytes) { *AlternateVAGPU = (uint64_t)MemoryAddress; return HSAKMT_STATUS_SUCCESS; } else { return HSAKMT_STATUS_ERROR; } } } // userptr mem it = allocation_map_->find(MemoryAddress); if (it != allocation_map_->end()) { if (it->second.userptr && it->second.size >= MemorySizeInBytes) { *AlternateVAGPU = (uintptr_t)it->second.gpu_addr + ((uintptr_t)MemoryAddress - (uintptr_t)it->second.cpu_addr); return HSAKMT_STATUS_SUCCESS; } } } // map userptr wsl::thunk::WDDMDevice *dev = get_wddmdev(NodeArray[0]); if (!dev) return HSAKMT_STATUS_ERROR; wsl::thunk::GpuMemory *gpu_mem = nullptr; wsl::thunk::GpuMemoryHandle handle = 0; uint64_t addr; wsl::thunk::GpuMemoryCreateInfo create_info{}; create_info.domain = thunk_proxy::kUserMemory; create_info.size = aligned_size; create_info.user_ptr = aligned_ptr; auto code = dev->CreateGpuMemory(create_info, &gpu_mem); if (code == ErrorCode::Success) { addr = gpu_mem->GpuAddress(); handle = gpu_mem->GetGpuMemoryHandle(); } else { return HSAKMT_STATUS_ERROR; } { std::lock_guard guard(*allocation_map_lock_); (*allocation_map_)[MemoryAddress] = Allocation(handle, aligned_ptr, addr, aligned_size, true, MemoryAddress, MemorySizeInBytes); (*allocation_map_)[(void *)addr] = Allocation(handle, aligned_ptr, addr, aligned_size, true, nullptr, MemorySizeInBytes); } *AlternateVAGPU = addr + ((uintptr_t)MemoryAddress - (uintptr_t)aligned_ptr); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapMemoryToGPU(void *MemoryAddress) { CHECK_DXG_OPEN(); if (!MemoryAddress) { /* Workaround for runtime bug */ pr_err("FIXME: Unmapping NULL pointer\n"); return HSAKMT_STATUS_SUCCESS; } pr_debug("address %p\n", MemoryAddress); { if (nullptr != fragment_allocator_.block_base(MemoryAddress)) return HSAKMT_STATUS_SUCCESS; } wsl::thunk::GpuMemory *gpu_mem = nullptr; { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find(MemoryAddress); if (it == allocation_map_->end()) { return HSAKMT_STATUS_ERROR; } gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); if (gpu_mem->IsQueueReferenced()) return HSAKMT_STATUS_ERROR; // IPC mem wsl::thunk::GpuMemoryDescFlags flags; flags.reserved = gpu_mem->Flags(); if (flags.is_imported_vram_ipc && !gpu_mem->IsSharedFromSameProcess()) { auto code = gpu_mem->UnmapGpuVirtualAddress(gpu_mem->GpuAddress(), gpu_mem->Size()); if (code != ErrorCode::Success) return HSAKMT_STATUS_ERROR; gpu_mem->Evict(); return HSAKMT_STATUS_SUCCESS; } } return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtMapGraphicHandle(HSAuint32 NodeId, HSAuint64 GraphicDeviceHandle, HSAuint64 GraphicResourceHandle, HSAuint64 GraphicResourceOffset, HSAuint64 GraphicResourceSize, HSAuint64 *FlatMemoryAddress) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); /* This API was only ever implemented in KFD for Kaveri and * was never upstreamed. There are no open-source users of * this interface. It has been superseded by * RegisterGraphicsHandleToNodes. */ return HSAKMT_STATUS_NOT_IMPLEMENTED; } HSAKMT_STATUS HSAKMTAPI hsaKmtUnmapGraphicHandle(HSAuint32 NodeId, HSAuint64 FlatMemoryAddress, HSAuint64 SizeInBytes) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtGetTileConfig(HSAuint32 NodeId, HsaGpuTileConfig *config) { CHECK_DXG_OPEN(); pr_warn_once("not implemented\n"); assert(false); return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtQueryPointerInfo(const void *Pointer, HsaPointerInfo *PointerInfo) { CHECK_DXG_OPEN(); if (!Pointer || !PointerInfo) return HSAKMT_STATUS_INVALID_PARAMETER; pr_debug("pointer %p\n", Pointer); memset(PointerInfo, 0, sizeof(HsaPointerInfo)); wsl::thunk::GpuMemory *gpu_mem = nullptr; Allocation allocation_info; bool found = false; { std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->upper_bound(Pointer); if (it != allocation_map_->begin()) { --it; if (Pointer >= it->first && (Pointer < reinterpret_cast(it->first) + it->second.size_requested)) { allocation_info = it->second; gpu_mem = wsl::thunk::GpuMemory::Convert(it->second.handle); found = true; } } } if (!found) { pr_debug("can't found allocation for %p\n", Pointer); PointerInfo->Type = HSA_POINTER_UNKNOWN; return HSAKMT_STATUS_ERROR; } if (allocation_info.userptr) { PointerInfo->Type = HSA_POINTER_REGISTERED_USER; PointerInfo->SizeInBytes = allocation_info.size; } else if (gpu_mem->IsVirtual()) { PointerInfo->Type = HSA_POINTER_RESERVED_ADDR; } else { PointerInfo->Type = HSA_POINTER_ALLOCATED; PointerInfo->SizeInBytes = allocation_info.size_requested; } PointerInfo->Node = allocation_info.node_id; PointerInfo->MemFlags.Value = allocation_info.mem_flags_value; PointerInfo->CPUAddress = allocation_info.cpu_addr; PointerInfo->GPUAddress = allocation_info.gpu_addr; PointerInfo->UserData = allocation_info.rocr_userdata; return HSAKMT_STATUS_SUCCESS; } HSAKMT_STATUS HSAKMTAPI hsaKmtSetMemoryUserData(const void *Pointer, void *UserData) { CHECK_DXG_OPEN(); uint64_t aligned_ptr = wsl::AlignDown((uint64_t)Pointer, 4096); std::lock_guard gard(*allocation_map_lock_); auto it = allocation_map_->find((void *)aligned_ptr); if (it != allocation_map_->end()) { it->second.rocr_userdata = UserData; return HSAKMT_STATUS_SUCCESS; } return HSAKMT_STATUS_ERROR; } HSAKMT_STATUS HSAKMTAPI hsaKmtReplaceAsanHeaderPage(void *addr) { CHECK_DXG_OPEN(); pr_warn_once("not supported\n"); assert(false); #ifdef SANITIZER_AMDGPU pr_debug("address %p\n", addr); CHECK_DXG_OPEN(); return HSAKMT_STATUS_SUCCESS; #else return HSAKMT_STATUS_NOT_SUPPORTED; #endif } HSAKMT_STATUS HSAKMTAPI hsaKmtReturnAsanHeaderPage(void *addr) { CHECK_DXG_OPEN(); pr_warn_once("not supported\n"); assert(false); #ifdef SANITIZER_AMDGPU pr_debug("address %p\n", addr); CHECK_DXG_OPEN(); return HSAKMT_STATUS_SUCCESS; #else return HSAKMT_STATUS_NOT_SUPPORTED; #endif }