2024-04-30 09:01:09 -05:00
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
//
|
|
|
|
|
// The University of Illinois/NCSA
|
|
|
|
|
// Open Source License (NCSA)
|
|
|
|
|
//
|
|
|
|
|
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// Developed by:
|
|
|
|
|
//
|
|
|
|
|
// AMD Research and AMD HSA Software Development
|
|
|
|
|
//
|
|
|
|
|
// Advanced Micro Devices, Inc.
|
|
|
|
|
//
|
|
|
|
|
// www.amd.com
|
|
|
|
|
//
|
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
// of this software and associated documentation files (the "Software"), to
|
|
|
|
|
// deal with the Software without restriction, including without limitation
|
|
|
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
//
|
|
|
|
|
// - Redistributions of source code must retain the above copyright notice,
|
|
|
|
|
// this list of conditions and the following disclaimers.
|
|
|
|
|
// - Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
// notice, this list of conditions and the following disclaimers in
|
|
|
|
|
// the documentation and/or other materials provided with the distribution.
|
|
|
|
|
// - Neither the names of Advanced Micro Devices, Inc,
|
|
|
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
|
|
|
// products derived from this Software without specific prior written
|
|
|
|
|
// permission.
|
|
|
|
|
//
|
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
|
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
|
// DEALINGS WITH THE SOFTWARE.
|
|
|
|
|
//
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
#include <cinttypes>
|
|
|
|
|
#include <bitset>
|
|
|
|
|
|
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
#include <sys/sysinfo.h>
|
2025-02-24 16:17:58 +08:00
|
|
|
#include <sys/stat.h>
|
2024-04-30 09:01:09 -05:00
|
|
|
#include <linux/mman.h>
|
|
|
|
|
#include <fcntl.h>
|
|
|
|
|
#include <unistd.h>
|
2024-12-05 18:02:00 +08:00
|
|
|
#include "impl/wddm/status.h"
|
|
|
|
|
#include "impl/wddm/types.h"
|
|
|
|
|
#include "impl/wddm/device.h"
|
|
|
|
|
#include "impl/wddm/queue.h"
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2024-09-10 08:16:11 -07:00
|
|
|
namespace wsl {
|
|
|
|
|
namespace thunk {
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
const uint32_t WDDMDevice::cmdbuf_aql_frame_num_ = 0x1000;
|
|
|
|
|
|
|
|
|
|
WDDMDevice::WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid)
|
|
|
|
|
: adapter_(adapter), adapter_luid_(adapter_luid) {
|
|
|
|
|
memset(&device_info_, 0, sizeof(device_info_));
|
|
|
|
|
|
|
|
|
|
ParseDeviceInfo();
|
|
|
|
|
CreateDevice();
|
|
|
|
|
SetPowerOptimization(false);
|
|
|
|
|
CreatePagingQueue();
|
|
|
|
|
ReserveLocalHeapSpace();
|
|
|
|
|
ReserveSystemHeapSpace();
|
2024-10-15 14:31:54 +08:00
|
|
|
InitHandleApertureSpace();
|
2024-04-30 09:01:09 -05:00
|
|
|
InitVaMgr();
|
2024-10-15 14:31:54 +08:00
|
|
|
InitHandleApertureMgr();
|
2024-04-30 09:01:09 -05:00
|
|
|
InitCmdbufInfo();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
WDDMDevice::~WDDMDevice() {
|
|
|
|
|
FreeLocalHeapSpace();
|
|
|
|
|
FreeSystemHeapSpace();
|
|
|
|
|
DestroyPagingQueue();
|
|
|
|
|
SetPowerOptimization(true);
|
|
|
|
|
DestroyDevice();
|
|
|
|
|
|
|
|
|
|
DestroyDeviceInfo();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static NTSTATUS WDDMQueryAdapter(D3DKMT_HANDLE adapter, KMTQUERYADAPTERINFOTYPE type,
|
|
|
|
|
void *data, int size)
|
|
|
|
|
{
|
|
|
|
|
D3DKMT_QUERYADAPTERINFO args = {0};
|
|
|
|
|
|
|
|
|
|
args.hAdapter = adapter;
|
|
|
|
|
args.Type = type;
|
|
|
|
|
args.pPrivateDriverData = data;
|
|
|
|
|
args.PrivateDriverDataSize = size;
|
|
|
|
|
|
|
|
|
|
return D3DKMTQueryAdapterInfo(&args);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t WDDMDevice::VramAvail(void) {
|
|
|
|
|
D3DKMT_QUERYSTATISTICS stats;
|
|
|
|
|
NTSTATUS ret;
|
|
|
|
|
uint64_t usedVis = 0;
|
|
|
|
|
uint64_t usedInv = 0;
|
|
|
|
|
|
|
|
|
|
// wait fence complete
|
|
|
|
|
uint64_t value = page_fence_value_.load();
|
|
|
|
|
if(!CpuWait(&page_syncobj_, &value, 1, false))
|
|
|
|
|
return HSA_STATUS_ERROR;
|
|
|
|
|
|
|
|
|
|
// local cpu-visible memory
|
|
|
|
|
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
|
|
|
|
|
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
|
|
|
|
|
stats.AdapterLuid = adapter_luid_;
|
|
|
|
|
stats.QuerySegment.SegmentId = 0;
|
|
|
|
|
ret = D3DKMTQueryStatistics(&stats);
|
|
|
|
|
if (ret == 0)
|
|
|
|
|
usedVis = stats.QueryResult.SegmentInformation.BytesResident;
|
|
|
|
|
|
|
|
|
|
// local invisible memory
|
|
|
|
|
memset(&stats, 0, sizeof(D3DKMT_QUERYSTATISTICS));
|
|
|
|
|
stats.Type = D3DKMT_QUERYSTATISTICS_SEGMENT;
|
|
|
|
|
stats.AdapterLuid = adapter_luid_;
|
|
|
|
|
stats.QuerySegment.SegmentId = 1;
|
|
|
|
|
|
|
|
|
|
ret = D3DKMTQueryStatistics(&stats);
|
|
|
|
|
if (ret == 0)
|
|
|
|
|
usedInv = stats.QueryResult.SegmentInformation.BytesResident;
|
|
|
|
|
|
|
|
|
|
return LocalHeapSize() - usedVis - usedInv;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CreateDevice(void) {
|
|
|
|
|
D3DKMT_CREATEDEVICE args = {0};
|
|
|
|
|
args.hAdapter = adapter_;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTCreateDevice(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS) {
|
|
|
|
|
device_ = args.hDevice;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::DestroyDevice(void) {
|
|
|
|
|
D3DKMT_DESTROYDEVICE args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTDestroyDevice(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CreatePagingQueue(void) {
|
|
|
|
|
D3DKMT_CREATEPAGINGQUEUE args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
args.Priority = D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTCreatePagingQueue(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS) {
|
|
|
|
|
page_queue_ = args.hPagingQueue;
|
|
|
|
|
page_syncobj_ = args.hSyncObject;
|
|
|
|
|
page_fence_addr_ = (uint64_t *)args.FenceValueCPUVirtualAddress;
|
|
|
|
|
page_fence_value_ = 0;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::DestroyPagingQueue(void) {
|
|
|
|
|
D3DDDI_DESTROYPAGINGQUEUE args = {0};
|
|
|
|
|
args.hPagingQueue = page_queue_;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTDestroyPagingQueue(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CommitSystemHeapSpace(void* addr, int64_t size, bool lock) {
|
|
|
|
|
int32_t protFlags = PROT_READ | PROT_WRITE | PROT_EXEC;
|
|
|
|
|
int32_t mapFlags = MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|
|
|
|
|
|
MAP_NORESERVE|MAP_UNINITIALIZED;
|
|
|
|
|
if (lock)
|
|
|
|
|
mapFlags |= MAP_LOCKED;
|
|
|
|
|
void* paddr = mmap(addr, size, protFlags, mapFlags, -1, 0);
|
|
|
|
|
if (paddr == MAP_FAILED) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to commit %s addr = %p, paddr = %p\n", (lock ? "locked" : ""), addr, paddr);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
assert(addr == paddr);
|
|
|
|
|
|
|
|
|
|
/*if (!Runtime::runtime_singleton_->PinWARequired())
|
|
|
|
|
return true;*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Do not make the pages in this range available to the child
|
|
|
|
|
* after a fork(2). This is useful to prevent copy-on-write
|
|
|
|
|
* semantics from changing the physical location of a page if
|
|
|
|
|
* the parent writes to it after a fork(2). (Such page
|
|
|
|
|
* relocations cause problems for hardware that DMAs into the
|
|
|
|
|
* page.)
|
|
|
|
|
*
|
|
|
|
|
* https://man7.org/linux/man-pages/man2/madvise.2.html
|
|
|
|
|
*/
|
|
|
|
|
if (madvise(addr, size, MADV_DONTFORK))
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to set MADV_DONTFORK for addr = %p\n", addr);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::DecommitSystemHeapSpace(void* addr, int64_t size) {
|
|
|
|
|
int32_t protFlags = PROT_NONE;
|
|
|
|
|
int32_t mapFlags = MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|
|
|
|
|
|
MAP_NORESERVE|MAP_UNINITIALIZED;
|
|
|
|
|
void* paddr = mmap(addr, size, protFlags, mapFlags, -1, 0);
|
|
|
|
|
if (paddr == MAP_FAILED) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to decommit addr = %p, paddr = %p\n", addr, paddr);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
assert(addr == paddr);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-24 16:17:58 +08:00
|
|
|
bool WDDMDevice::CommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd, bool lock) {
|
|
|
|
|
int fd = -1;
|
|
|
|
|
|
|
|
|
|
if (memfd == -1) {
|
|
|
|
|
fd = memfd_create("rocr4wsl_gtt", MFD_CLOEXEC);
|
|
|
|
|
if (fd < 0) {
|
|
|
|
|
pr_err("memfd_create failed\n");
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ftruncate(fd, size);
|
|
|
|
|
} else {
|
|
|
|
|
fd = memfd;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t protFlags = PROT_READ | PROT_WRITE;
|
|
|
|
|
int32_t mapFlags = MAP_SHARED | MAP_FIXED | MAP_NORESERVE |
|
|
|
|
|
MAP_UNINITIALIZED | (lock ? MAP_LOCKED : 0);
|
|
|
|
|
|
|
|
|
|
void* paddr = mmap(addr, size, protFlags, mapFlags, fd, 0);
|
|
|
|
|
if (paddr == MAP_FAILED) {
|
|
|
|
|
pr_err("fail to commit %s addr = %p, paddr = %p\n", (lock ? "locked" : ""), addr, paddr);
|
|
|
|
|
if (memfd == -1)
|
|
|
|
|
close(fd);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
assert(addr == paddr);
|
|
|
|
|
|
|
|
|
|
memfd = fd;
|
|
|
|
|
|
|
|
|
|
if (madvise(addr, size, MADV_DONTFORK))
|
|
|
|
|
pr_err("fail to set MADV_DONTFORK for addr = %p\n", addr);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::DecommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd) {
|
|
|
|
|
if (munmap(addr, size) != 0) {
|
|
|
|
|
pr_err("fail to unmap = %p \n", addr);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
close(memfd);
|
|
|
|
|
memfd = -1;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-30 09:01:09 -05:00
|
|
|
bool WDDMDevice::ReserveSystemHeapSpace() {
|
|
|
|
|
struct sysinfo info;
|
|
|
|
|
int ret = sysinfo(&info);
|
|
|
|
|
uint64_t max_ram = 0x10000000000;
|
|
|
|
|
uint64_t alignment = 0x100000000;
|
|
|
|
|
assert(!ret);
|
|
|
|
|
|
|
|
|
|
int32_t protFlags = PROT_NONE;
|
|
|
|
|
// minimum of reserve size is 8G, maximum of reserve size is 1T.
|
|
|
|
|
system_heap_space_size_ = std::min(AlignUp(info.totalram, alignment) * 2, max_ram);
|
|
|
|
|
void* cpu = mmap(NULL, system_heap_space_size_, protFlags,
|
|
|
|
|
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
|
|
|
|
if (cpu == MAP_FAILED) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to reserve system_heap_space_size_ = %lx \n", system_heap_space_size_);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
system_heap_space_start_ = (uint64_t)cpu;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::FreeSystemHeapSpace(void) {
|
|
|
|
|
void *cpu = (void *)system_heap_space_start_;
|
2025-02-13 10:52:15 +08:00
|
|
|
if (munmap(cpu, system_heap_space_size_) != 0) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to unmap = %p \n", cpu);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* To find the avaliable same range for cpu
|
|
|
|
|
* virtual space and gpu virtual space.
|
|
|
|
|
* sys_va_size of cpu va range is larger 1G
|
|
|
|
|
* than gpu va range, otherwise ReserveGPUVirtualAddress
|
|
|
|
|
* will return error.
|
|
|
|
|
*/
|
|
|
|
|
bool WDDMDevice::ReserveLocalHeapSpace(void) {
|
|
|
|
|
uint64_t sys_va[16] = {0};
|
|
|
|
|
uint64_t local_va;
|
|
|
|
|
uint64_t sys_va_size;
|
|
|
|
|
int match_index = -1;
|
|
|
|
|
uint64_t align = 0x40000000; /* 1G */
|
|
|
|
|
void* ptr = NULL;
|
|
|
|
|
|
|
|
|
|
local_heap_space_start_ = 0;
|
|
|
|
|
local_heap_space_size_ = AlignUp(LocalHeapSize(), align) * 4;
|
|
|
|
|
sys_va_size = local_heap_space_size_ + align;
|
|
|
|
|
|
|
|
|
|
/* it will retry 16 times to find the avaliable range. */
|
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
|
|
|
local_va = 0;
|
|
|
|
|
ptr = mmap(NULL, sys_va_size , PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
|
|
|
|
if (ptr == MAP_FAILED) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to reserve cpu va in %d time!\n", i);
|
2024-04-30 09:01:09 -05:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
sys_va[i] = (uint64_t)ptr;
|
|
|
|
|
|
2024-09-10 08:16:11 -07:00
|
|
|
if (d3dthunk::ReserveGpuVirtualAddress(
|
2024-04-30 09:01:09 -05:00
|
|
|
adapter_, local_heap_space_size_,
|
|
|
|
|
(uint64_t)ptr,
|
|
|
|
|
(uint64_t)ptr + sys_va_size, &local_va) == ErrorCode::Success) {
|
|
|
|
|
|
|
|
|
|
match_index = i;
|
|
|
|
|
local_heap_space_start_ = local_va;
|
2024-10-09 15:35:44 +08:00
|
|
|
pr_debug("success to reserve gpu va %lx and va cpu %p in %d time\n",
|
|
|
|
|
local_va, ptr, i);
|
2024-04-30 09:01:09 -05:00
|
|
|
break;
|
|
|
|
|
} else {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("%s fail to reserve gpu va for cpu va %p in %d time!\n",
|
2024-04-30 09:01:09 -05:00
|
|
|
__FUNCTION__, ptr, i);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (match_index >= 0) {
|
|
|
|
|
/* release cpu unused ranges*/
|
|
|
|
|
uint64_t left_size = local_va - sys_va[match_index];
|
|
|
|
|
uint64_t right_size = align - left_size;
|
|
|
|
|
if ((left_size > 0) && munmap((void*)sys_va[match_index], left_size))
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to unmap left %lx with size %lx\n", sys_va[match_index], left_size);
|
2024-04-30 09:01:09 -05:00
|
|
|
if ((right_size > 0) && munmap((void*)(local_va + local_heap_space_size_), right_size))
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to unmap right %lx with size %lx\n", (local_va + local_heap_space_size_), right_size);
|
2024-04-30 09:01:09 -05:00
|
|
|
} else {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to reserve Local Heap Space!\n");
|
2024-04-30 09:01:09 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* free match fail address for cpu va */
|
|
|
|
|
int free = match_index >= 0 ? match_index : 16;
|
|
|
|
|
for (int j = 0; j < free; j++) {
|
|
|
|
|
if (sys_va[j] != 0 && munmap((void*)sys_va[j], sys_va_size)) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail to unmap %d %lx\n", j, sys_va[j]);
|
2024-04-30 09:01:09 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return match_index >= 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::FreeLocalHeapSpace(void) {
|
2024-09-10 08:16:11 -07:00
|
|
|
d3dthunk::FreeGpuVirtualAddress(adapter_, local_heap_space_start_, local_heap_space_size_);
|
2024-04-30 09:01:09 -05:00
|
|
|
void *cpu = (void *)local_heap_space_start_;
|
|
|
|
|
return munmap(cpu, local_heap_space_size_) == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::InitVaMgr() {
|
2024-10-15 14:31:54 +08:00
|
|
|
local_va_mgr_ = std::make_unique<VaMgr>(local_heap_space_start_,
|
|
|
|
|
local_heap_space_size_,
|
|
|
|
|
DEFAULT_GPU_PAGE_SIZE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::InitHandleApertureMgr() {
|
|
|
|
|
handle_aperture_mgr_ = std::make_unique<VaMgr>(handle_aperture_start_,
|
|
|
|
|
handle_aperture_size_,
|
|
|
|
|
DEFAULT_GPU_PAGE_SIZE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::InitHandleApertureSpace(void) {
|
|
|
|
|
handle_aperture_start_ = START_NON_CANONICAL_ADDR;
|
|
|
|
|
handle_aperture_size_ = 1ULL << 47;
|
|
|
|
|
|
|
|
|
|
while (handle_aperture_start_ < END_NON_CANONICAL_ADDR - 1) {
|
|
|
|
|
if (device_info_.private_aperture_base &&
|
|
|
|
|
IS_OVERLAPPING(device_info_.private_aperture_base,
|
|
|
|
|
device_info_.private_aperture_size,
|
|
|
|
|
handle_aperture_start_,
|
|
|
|
|
handle_aperture_size_)) {
|
|
|
|
|
handle_aperture_start_ += (1ULL << 47);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (device_info_.shared_aperture_base &&
|
|
|
|
|
IS_OVERLAPPING(device_info_.shared_aperture_base,
|
|
|
|
|
device_info_.shared_aperture_size,
|
|
|
|
|
handle_aperture_start_,
|
|
|
|
|
handle_aperture_size_)) {
|
|
|
|
|
handle_aperture_start_ += (1ULL << 47);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pr_debug("handle aperture start %lx, size %lx\n", handle_aperture_start_, handle_aperture_size_);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
handle_aperture_start_ = 0;
|
|
|
|
|
pr_err("fail\n");
|
|
|
|
|
|
|
|
|
|
return false;
|
2024-04-30 09:01:09 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::SetPowerOptimization(bool restore) {
|
|
|
|
|
void *priv_data;
|
|
|
|
|
int priv_size;
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
priv_size = thunk_proxy::CreatePowerOptPrivData(&priv_data, restore);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
D3DKMT_ESCAPE d3dkmt_escape;
|
|
|
|
|
memset(&d3dkmt_escape, 0, sizeof(d3dkmt_escape));
|
|
|
|
|
|
|
|
|
|
d3dkmt_escape.hAdapter = adapter_;
|
|
|
|
|
d3dkmt_escape.hDevice = device_;
|
|
|
|
|
d3dkmt_escape.hContext = 0; //KMD only use device to identify the process
|
|
|
|
|
d3dkmt_escape.Type = D3DKMT_ESCAPE_DRIVERPRIVATE;
|
|
|
|
|
d3dkmt_escape.pPrivateDriverData = priv_data;
|
|
|
|
|
d3dkmt_escape.PrivateDriverDataSize = priv_size;
|
|
|
|
|
d3dkmt_escape.Flags.HardwareAccess = true;
|
|
|
|
|
|
|
|
|
|
NTSTATUS status = D3DKMTEscape(&d3dkmt_escape);
|
2024-10-09 15:35:44 +08:00
|
|
|
pr_debug("status %d, restore %d\n", status, restore);
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
}
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
ErrorCode WDDMDevice::ReserveGpuVirtualAddress(const thunk_proxy::AllocDomain domain,
|
2024-04-30 09:01:09 -05:00
|
|
|
gpusize hit_base_addr, gpusize size,
|
|
|
|
|
gpusize *out_gpu_virt_addr, gpusize alignment, bool lock) {
|
|
|
|
|
gpusize gpu_addr = 0;
|
|
|
|
|
ErrorCode code = ErrorCode::Success;
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
if (domain == thunk_proxy::kSystem) {
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2024-09-10 08:16:11 -07:00
|
|
|
code = d3dthunk::ReserveGpuVirtualAddress(adapter_, size,
|
2024-04-30 09:01:09 -05:00
|
|
|
system_heap_space_start_,
|
|
|
|
|
system_heap_space_start_ + system_heap_space_size_,
|
|
|
|
|
&gpu_addr);
|
|
|
|
|
if (code != ErrorCode::Success)
|
|
|
|
|
return code;
|
|
|
|
|
|
|
|
|
|
if (!CommitSystemHeapSpace((void*)gpu_addr, size, lock)) {
|
2024-09-10 08:16:11 -07:00
|
|
|
d3dthunk::FreeGpuVirtualAddress(adapter_, gpu_addr, size);
|
2024-04-30 09:01:09 -05:00
|
|
|
code = ErrorCode::SyscallFail;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
uint64_t align = alignment == 0 ? (64 * 1024) : alignment; // default 64K alignment
|
2024-10-14 15:52:52 +08:00
|
|
|
if (domain == thunk_proxy::kLocal && size >= GPU_HUGE_PAGE_SIZE)
|
2024-04-30 09:01:09 -05:00
|
|
|
align = GPU_HUGE_PAGE_SIZE;
|
|
|
|
|
|
|
|
|
|
gpu_addr = local_va_mgr_->Alloc(size, align, hit_base_addr);
|
|
|
|
|
if (gpu_addr == 0)
|
|
|
|
|
code = ErrorCode::OutOfGpuMemory;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*out_gpu_virt_addr = (code == ErrorCode::Success) ? gpu_addr : 0;
|
|
|
|
|
return code;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
ErrorCode WDDMDevice::FreeGpuVirtualAddress(const thunk_proxy::AllocDomain domain,
|
2024-04-30 09:01:09 -05:00
|
|
|
gpusize gpu_addr, gpusize size) {
|
|
|
|
|
auto code = ErrorCode::Success;
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
if (domain == thunk_proxy::kSystem) {
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
DecommitSystemHeapSpace((void *)gpu_addr, size);
|
|
|
|
|
|
2024-09-10 08:16:11 -07:00
|
|
|
d3dthunk::FreeGpuVirtualAddressArgs free_args{};
|
2024-04-30 09:01:09 -05:00
|
|
|
free_args.hAdapter = adapter_;
|
|
|
|
|
free_args.BaseAddress = gpu_addr;
|
|
|
|
|
free_args.Size = size;
|
|
|
|
|
|
2024-09-10 08:16:11 -07:00
|
|
|
code = d3dthunk::FreeGpuVirtualAddress(&free_args);
|
2024-04-30 09:01:09 -05:00
|
|
|
} else {
|
|
|
|
|
local_va_mgr_->Free(gpu_addr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return code;
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-24 16:17:58 +08:00
|
|
|
ErrorCode WDDMDevice::ReserveIPCSysMem(gpusize size,
|
|
|
|
|
gpusize *out_gpu_virt_addr, gpusize alignment,
|
|
|
|
|
int &memfd, bool lock) {
|
|
|
|
|
gpusize gpu_addr = 0;
|
|
|
|
|
ErrorCode code = ErrorCode::Success;
|
|
|
|
|
|
|
|
|
|
code = d3dthunk::ReserveGpuVirtualAddress(adapter_, size,
|
|
|
|
|
system_heap_space_start_,
|
|
|
|
|
system_heap_space_start_ + system_heap_space_size_,
|
|
|
|
|
&gpu_addr);
|
|
|
|
|
if (code != ErrorCode::Success)
|
|
|
|
|
return code;
|
|
|
|
|
|
|
|
|
|
if (!CommitSystemHeapSpaceIPC((void*)gpu_addr, size, memfd, lock)) {
|
|
|
|
|
d3dthunk::FreeGpuVirtualAddress(adapter_, gpu_addr, size);
|
|
|
|
|
code = ErrorCode::SyscallFail;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*out_gpu_virt_addr = (code == ErrorCode::Success) ? gpu_addr : 0;
|
|
|
|
|
return code;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ErrorCode WDDMDevice::FreeIPCSysMem(gpusize gpu_addr, gpusize size, int &memfd) {
|
|
|
|
|
auto code = ErrorCode::Success;
|
|
|
|
|
|
|
|
|
|
DecommitSystemHeapSpaceIPC((void *)gpu_addr, size, memfd);
|
|
|
|
|
|
|
|
|
|
d3dthunk::FreeGpuVirtualAddressArgs free_args{};
|
|
|
|
|
free_args.hAdapter = adapter_;
|
|
|
|
|
free_args.BaseAddress = gpu_addr;
|
|
|
|
|
free_args.Size = size;
|
|
|
|
|
|
|
|
|
|
code = d3dthunk::FreeGpuVirtualAddress(&free_args);
|
|
|
|
|
|
|
|
|
|
return code;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-15 14:31:54 +08:00
|
|
|
ErrorCode WDDMDevice::HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr) {
|
|
|
|
|
uint64_t align = DEFAULT_GPU_PAGE_SIZE;
|
|
|
|
|
|
|
|
|
|
if (size >= GPU_HUGE_PAGE_SIZE)
|
|
|
|
|
align = GPU_HUGE_PAGE_SIZE;
|
|
|
|
|
|
|
|
|
|
*out_gpu_virt_addr = handle_aperture_mgr_->Alloc(size, align);
|
|
|
|
|
if (*out_gpu_virt_addr == 0)
|
|
|
|
|
return ErrorCode::OutOfHandleApeMemory;
|
|
|
|
|
|
|
|
|
|
return ErrorCode::Success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::HandleApertureFree(gpusize gpu_addr) {
|
|
|
|
|
handle_aperture_mgr_->Free(gpu_addr);
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-30 09:01:09 -05:00
|
|
|
void WDDMDevice::UpdatePageFence(uint64_t fence_value) {
|
|
|
|
|
uint64_t current = page_fence_value_.load();
|
|
|
|
|
|
|
|
|
|
// atomically set fence value when target is bigger than current one
|
|
|
|
|
do {
|
|
|
|
|
if (current >= fence_value)
|
|
|
|
|
break;
|
|
|
|
|
} while (!page_fence_value_.compare_exchange_weak(current, fence_value));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ErrorCode WDDMDevice::CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem) {
|
|
|
|
|
ErrorCode ret;
|
|
|
|
|
|
|
|
|
|
*gpu_mem = nullptr;
|
|
|
|
|
auto mem = new GpuMemory(this);
|
|
|
|
|
if (create_info.dmabuf_fd > 0)
|
2025-01-21 10:13:33 +08:00
|
|
|
ret = mem->ImportPhysicalHandle(create_info);
|
2024-04-30 09:01:09 -05:00
|
|
|
else
|
|
|
|
|
ret = mem->Init(create_info);
|
|
|
|
|
if (ret == ErrorCode::Success)
|
|
|
|
|
*gpu_mem = mem;
|
2024-09-10 10:38:30 +08:00
|
|
|
else
|
|
|
|
|
delete mem;
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void *WDDMDevice::Lock(D3DKMT_HANDLE handle) {
|
|
|
|
|
D3DKMT_LOCK2 args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
args.hAllocation = handle;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTLock2(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return args.pData;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::Unlock(D3DKMT_HANDLE handle) {
|
|
|
|
|
D3DKMT_UNLOCK2 args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
args.hAllocation = handle;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTUnlock2(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CreateContext(int engine, D3DKMT_HANDLE *handle) {
|
|
|
|
|
void *priv_data;
|
|
|
|
|
int priv_size;
|
|
|
|
|
|
|
|
|
|
int ordinal = EngineOrdinal(engine, &device_info_);
|
|
|
|
|
if (ordinal < 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
bool FwManagedGfxState = SupportStateShadowingByCpFw();
|
2024-10-14 15:52:52 +08:00
|
|
|
priv_size = thunk_proxy::CreateContextPrivData(&priv_data, FwManagedGfxState);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
D3DKMT_CREATECONTEXTVIRTUAL args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
args.EngineAffinity = 1 << 0;
|
|
|
|
|
args.NodeOrdinal = ordinal;
|
|
|
|
|
args.pPrivateDriverData = priv_data;
|
|
|
|
|
args.PrivateDriverDataSize = priv_size;
|
|
|
|
|
args.ClientHint = D3DKMT_CLIENTHINT_OPENCL;
|
|
|
|
|
|
|
|
|
|
if (IsHwsEnabled(engine))
|
|
|
|
|
args.Flags.HwQueueSupported = 1;
|
|
|
|
|
else
|
2024-10-14 15:52:52 +08:00
|
|
|
args.Flags.DisableGpuTimeout = thunk_proxy::ShouldDisableGpuTimeout(engine, &device_info_);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTCreateContextVirtual(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS) {
|
|
|
|
|
*handle = args.hContext;
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::DestroyContext(D3DKMT_HANDLE handle) {
|
|
|
|
|
D3DKMT_DESTROYCONTEXT args = {0};
|
|
|
|
|
args.hContext = handle;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTDestroyContext(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs,
|
|
|
|
|
uint64_t *values, int count) {
|
|
|
|
|
|
|
|
|
|
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU args = {0};
|
|
|
|
|
args.hContext = queue->context;
|
|
|
|
|
args.ObjectCount = count;
|
|
|
|
|
args.ObjectHandleArray = syncobjs;
|
|
|
|
|
args.MonitoredFenceValueArray = values;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTWaitForSynchronizationObjectFromGpu(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs,
|
|
|
|
|
uint64_t *value, int count) {
|
|
|
|
|
D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU args = {0};
|
|
|
|
|
args.hContext = context;
|
|
|
|
|
args.ObjectCount = count;
|
|
|
|
|
args.ObjectHandleArray = syncobjs;
|
|
|
|
|
args.MonitoredFenceValueArray = value;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTSignalSynchronizationObjectFromGpu(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value,
|
|
|
|
|
int count, bool wait_any) {
|
|
|
|
|
D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
args.ObjectCount = count;
|
|
|
|
|
args.ObjectHandleArray = syncobjs;
|
|
|
|
|
args.FenceValueArray = value;
|
|
|
|
|
args.Flags.WaitAny = wait_any;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTWaitForSynchronizationObjectFromCpu(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS)
|
|
|
|
|
return true;
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::WaitOnPagingFenceFromCpu() {
|
|
|
|
|
uint64_t page_fence_value = 0;
|
|
|
|
|
|
|
|
|
|
page_fence_value = page_fence_value_.load();
|
|
|
|
|
if (CpuWait(&page_syncobj_, &page_fence_value, 1, false))
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr) {
|
|
|
|
|
D3DKMT_CREATESYNCHRONIZATIONOBJECT2 args = {0};
|
|
|
|
|
args.hDevice = device_;
|
|
|
|
|
args.Info.Type = D3DDDI_MONITORED_FENCE;
|
|
|
|
|
args.Info.MonitoredFence.EngineAffinity = 1 << 0;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTCreateSynchronizationObject2(&args);
|
|
|
|
|
if (ret == STATUS_SUCCESS) {
|
|
|
|
|
*handle = args.hSyncObject;
|
|
|
|
|
*addr = (uint64_t *)args.Info.MonitoredFence.FenceValueCPUVirtualAddress;
|
2024-10-09 15:35:44 +08:00
|
|
|
pr_debug("create syncobj cpu addr=%p gpu addr=%" PRIx64 "\n",
|
|
|
|
|
args.Info.MonitoredFence.FenceValueCPUVirtualAddress,
|
|
|
|
|
args.Info.MonitoredFence.FenceValueGPUVirtualAddress);
|
|
|
|
|
|
2024-04-30 09:01:09 -05:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::DestroySyncobj(D3DKMT_HANDLE handle) {
|
|
|
|
|
D3DKMT_DESTROYSYNCHRONIZATIONOBJECT args = {0};
|
|
|
|
|
args.hSyncObject = handle;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTDestroySynchronizationObject(&args);
|
|
|
|
|
if (ret != STATUS_SUCCESS)
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::InitCmdbufInfo(void) {
|
|
|
|
|
if (device_info_.major == 9) {
|
|
|
|
|
cmdbuf_aql_frame_size_ = 2 * sizeof(gfx9::AcquireMemTemplate);
|
|
|
|
|
} else if (device_info_.major >= 10) {
|
|
|
|
|
cmdbuf_aql_frame_size_ = 2 * sizeof(gfx10::AcquireMemTemplate);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (device_info_.major >= 11)
|
|
|
|
|
cmdbuf_aql_frame_size_ += sizeof(SetScratchTemplate);
|
|
|
|
|
|
|
|
|
|
cmdbuf_aql_frame_size_ +=
|
|
|
|
|
sizeof(PM4MEC_COPY_DATA) * 2 +
|
|
|
|
|
sizeof(BarrierTemplate) * 2 +
|
|
|
|
|
sizeof(DispatchTemplate) +
|
|
|
|
|
sizeof(AtomicTemplate) * 2;
|
|
|
|
|
cmdbuf_aql_frame_size_ = AlignUp(cmdbuf_aql_frame_size_, 0x10);
|
|
|
|
|
|
|
|
|
|
cmdbuf_size_ = AlignUp(cmdbuf_aql_frame_num_ * cmdbuf_aql_frame_size_, 0x1000);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t WDDMDevice::LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt) {
|
|
|
|
|
static const uint32_t blk_sz = 512;
|
|
|
|
|
uint32_t total_sz = pkt->group_segment_size;
|
|
|
|
|
uint32_t blk_num = (total_sz + blk_sz - 1) / blk_sz;
|
|
|
|
|
return blk_num;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
NTSTATUS WDDMGetAdapters(D3DKMT_ADAPTERINFO *&adapters, int &num_adapters)
|
|
|
|
|
{
|
|
|
|
|
bool supported = false;
|
|
|
|
|
D3DKMT_ENUMADAPTERS2 args = {0};
|
|
|
|
|
NTSTATUS ret = D3DKMTEnumAdapters2(&args);
|
|
|
|
|
if (ret != STATUS_SUCCESS)
|
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
|
|
if (!args.NumAdapters) {
|
|
|
|
|
adapters = NULL;
|
|
|
|
|
num_adapters = 0;
|
|
|
|
|
return STATUS_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
D3DKMT_ADAPTERINFO *info = new D3DKMT_ADAPTERINFO[args.NumAdapters];
|
|
|
|
|
if (!info)
|
|
|
|
|
return STATUS_NO_MEMORY;
|
|
|
|
|
|
|
|
|
|
args.pAdapters = info;
|
|
|
|
|
ret = D3DKMTEnumAdapters2(&args);
|
|
|
|
|
if (ret != STATUS_SUCCESS)
|
|
|
|
|
goto err_out0;
|
|
|
|
|
|
|
|
|
|
adapters = new D3DKMT_ADAPTERINFO[args.NumAdapters];
|
|
|
|
|
if (!adapters)
|
|
|
|
|
goto err_out0;
|
|
|
|
|
|
|
|
|
|
num_adapters = 0;
|
|
|
|
|
for (int i = 0; i < args.NumAdapters; i++) {
|
|
|
|
|
D3DKMT_ADAPTERREGISTRYINFO query = {0};
|
|
|
|
|
|
|
|
|
|
ret = WDDMQueryAdapter(info[i].hAdapter, KMTQAITYPE_ADAPTERREGISTRYINFO,
|
|
|
|
|
&query, sizeof(query));
|
|
|
|
|
if (ret != STATUS_SUCCESS)
|
|
|
|
|
goto err_out1;
|
|
|
|
|
|
2025-03-19 16:56:29 +08:00
|
|
|
if (!std::wcsstr(query.ChipType, L"AMD"))
|
|
|
|
|
continue;
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
supported = thunk_proxy::QueryAdapterSupported(info[i].hAdapter);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2025-03-19 16:56:29 +08:00
|
|
|
if (supported) {
|
2024-04-30 09:01:09 -05:00
|
|
|
adapters[num_adapters++] = info[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-04 16:44:42 +08:00
|
|
|
delete[] info;
|
2024-04-30 09:01:09 -05:00
|
|
|
return STATUS_SUCCESS;
|
|
|
|
|
|
|
|
|
|
err_out1:
|
2024-11-04 16:44:42 +08:00
|
|
|
delete[] adapters;
|
2024-04-30 09:01:09 -05:00
|
|
|
adapters = NULL;
|
|
|
|
|
err_out0:
|
2024-11-04 16:44:42 +08:00
|
|
|
delete[] info;
|
2024-04-30 09:01:09 -05:00
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::ParseDeviceInfo() {
|
|
|
|
|
bool ret;
|
|
|
|
|
|
|
|
|
|
memset(&device_info_, 0, sizeof(device_info_));
|
2024-10-14 15:52:52 +08:00
|
|
|
ret = thunk_proxy::ParseAdapterInfo(adapter_, &device_info_);
|
2024-04-30 09:01:09 -05:00
|
|
|
if (!ret)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::DestroyDeviceInfo() {
|
|
|
|
|
free(device_info_.adapter_info);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::GetClockCounters(uint64_t *gpu, uint64_t *cpu) {
|
|
|
|
|
|
2025-06-25 17:10:12 +08:00
|
|
|
uint32_t engine = GetComputeEngine();
|
|
|
|
|
int ordinal = EngineOrdinal(engine, &device_info_);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2025-06-25 17:10:12 +08:00
|
|
|
D3DKMT_QUERYCLOCKCALIBRATION args = {0};
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2025-06-25 17:10:12 +08:00
|
|
|
/* LDA(Linked Display Adapter)
|
|
|
|
|
* In the LDA design multiple physical GPUs are linked together to be controlled
|
|
|
|
|
* as a single object from the point of view of power manager, GPU scheduler and
|
|
|
|
|
* GPU memory manager. The physical GPUs are represented by a signal logical adapter
|
|
|
|
|
* object. There is a single DXGADAPTER objects, a single KMD adapter object.
|
|
|
|
|
*
|
|
|
|
|
* Set PhysicalAdapterIndex to 0 by default with None LDA mode.
|
|
|
|
|
*/
|
|
|
|
|
args.hAdapter = adapter_;
|
|
|
|
|
args.NodeOrdinal = ordinal;
|
|
|
|
|
args.PhysicalAdapterIndex = 0;
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2025-06-25 17:10:12 +08:00
|
|
|
NTSTATUS status = D3DKMTQueryClockCalibration(&args);
|
2024-04-30 09:01:09 -05:00
|
|
|
if (status) {
|
2024-10-09 15:35:44 +08:00
|
|
|
pr_debug("status %d \n", status);
|
2024-04-30 09:01:09 -05:00
|
|
|
} else {
|
2025-06-25 17:10:12 +08:00
|
|
|
if (gpu)
|
|
|
|
|
*gpu = args.ClockData.GpuClockCounter;
|
|
|
|
|
|
|
|
|
|
if (cpu)
|
|
|
|
|
*cpu = args.ClockData.CpuClockCounter;
|
2024-04-30 09:01:09 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CreateQueue(WDDMQueue *queue) {
|
|
|
|
|
if (!CreateContext(queue->queue_engine, &queue->context))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
GpuMemory *gpu_mem = nullptr;
|
2024-09-11 14:21:54 +08:00
|
|
|
if (queue->cmdbuf_addr == 0) {
|
|
|
|
|
GpuMemoryCreateInfo create_info{};
|
|
|
|
|
create_info.size = queue->cmdbuf_size;
|
|
|
|
|
create_info.domain = thunk_proxy::kSystem;
|
|
|
|
|
|
|
|
|
|
auto code = CreateGpuMemory(create_info, &gpu_mem);
|
|
|
|
|
if (code != ErrorCode::Success)
|
|
|
|
|
goto err_out0;
|
2024-04-30 09:01:09 -05:00
|
|
|
|
2024-09-11 14:21:54 +08:00
|
|
|
queue->cmdbuf = gpu_mem->GetGpuMemoryHandle();
|
|
|
|
|
queue->cmdbuf_addr = gpu_mem->GpuAddress();
|
|
|
|
|
}
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
if (queue->Init())
|
|
|
|
|
goto err_out1;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
|
err_out1:
|
|
|
|
|
delete gpu_mem;
|
|
|
|
|
err_out0:
|
|
|
|
|
DestroyContext(queue->context);
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WDDMDevice::DestroyQueue(WDDMQueue *queue) {
|
|
|
|
|
|
|
|
|
|
queue->Fini();
|
|
|
|
|
|
|
|
|
|
auto cmdbuf_mem = GpuMemory::Convert(queue->cmdbuf);
|
|
|
|
|
delete cmdbuf_mem;
|
|
|
|
|
|
|
|
|
|
DestroyContext(queue->context);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr,
|
|
|
|
|
uint64_t command_size, uint64_t fence_value) {
|
|
|
|
|
void *priv_data;
|
|
|
|
|
int priv_size;
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
priv_size = thunk_proxy::CreateSubmitPrivData(&priv_data, queue->queue, command_addr, command_size, false);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
D3DKMT_SUBMITCOMMAND args = {0};
|
|
|
|
|
args.Commands = command_addr;
|
|
|
|
|
args.CommandLength = command_size;
|
|
|
|
|
args.BroadcastContextCount = 1;
|
|
|
|
|
args.BroadcastContext[0] = queue->context;
|
|
|
|
|
args.pPrivateDriverData = priv_data;
|
|
|
|
|
args.PrivateDriverDataSize = priv_size;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTSubmitCommand(&args);
|
|
|
|
|
if (ret != STATUS_SUCCESS) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
if (!GpuSignal(queue->context, &queue->syncobj, &fence_value, 1))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::CreateHwQueue(WDDMQueue *queue) {
|
|
|
|
|
void *priv_data;
|
|
|
|
|
int priv_size;
|
|
|
|
|
|
|
|
|
|
bool FwManagedGfxState = SupportStateShadowingByCpFw();
|
2024-10-14 15:52:52 +08:00
|
|
|
priv_size = thunk_proxy::CreateHwQueuePrivData(&priv_data, queue->context,
|
2024-04-30 09:01:09 -05:00
|
|
|
FwManagedGfxState, queue->prio);
|
|
|
|
|
|
|
|
|
|
D3DKMT_CREATEHWQUEUE createHwQueue = {0};
|
|
|
|
|
createHwQueue.hHwContext = queue->context;
|
2024-10-14 15:52:52 +08:00
|
|
|
createHwQueue.Flags.DisableGpuTimeout = thunk_proxy::ShouldDisableGpuTimeout(queue->queue_engine, &device_info_);
|
2024-04-30 09:01:09 -05:00
|
|
|
createHwQueue.pPrivateDriverData = priv_data;
|
|
|
|
|
createHwQueue.PrivateDriverDataSize = priv_size;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTCreateHwQueue(&createHwQueue);
|
|
|
|
|
if (ret != STATUS_SUCCESS) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
queue->queue = createHwQueue.hHwQueue;
|
|
|
|
|
queue->syncobj = createHwQueue.hHwQueueProgressFence;
|
|
|
|
|
queue->sync_addr = (uint64_t *)createHwQueue.HwQueueProgressFenceCPUVirtualAddress;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::DestroyHwQueue(WDDMQueue *queue) {
|
|
|
|
|
D3DKMT_DESTROYHWQUEUE DestroyHwQueue = {
|
|
|
|
|
.hHwQueue = queue->queue,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTDestroyHwQueue(&DestroyHwQueue);
|
|
|
|
|
if (ret != STATUS_SUCCESS) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WDDMDevice::SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr,
|
|
|
|
|
uint64_t command_size, uint64_t fence_value) {
|
|
|
|
|
void *priv_data;
|
|
|
|
|
int priv_size;
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
priv_size = thunk_proxy::CreateSubmitPrivData(&priv_data, queue->queue, command_addr, command_size, true);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
D3DKMT_SUBMITCOMMANDTOHWQUEUE args = {0};
|
|
|
|
|
args.hHwQueue = queue->queue;
|
|
|
|
|
args.HwQueueProgressFenceId = fence_value;
|
|
|
|
|
args.CommandBuffer = command_addr;
|
|
|
|
|
args.CommandLength = command_size;
|
|
|
|
|
args.pPrivateDriverData = priv_data;
|
|
|
|
|
args.PrivateDriverDataSize = priv_size;
|
|
|
|
|
|
|
|
|
|
NTSTATUS ret = D3DKMTSubmitCommandToHwQueue(&args);
|
|
|
|
|
if (ret != STATUS_SUCCESS) {
|
2024-10-14 16:04:16 +08:00
|
|
|
pr_err("fail %x\n", ret);
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2024-10-14 15:52:52 +08:00
|
|
|
thunk_proxy::DestroyPrivData(priv_data);
|
2024-04-30 09:01:09 -05:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2024-09-10 08:16:11 -07:00
|
|
|
} // namespace thunk
|
|
|
|
|
} // namespace wsl
|