c33dcd2d07
Signed-off-by: Flora Cui <flora.cui@amd.com> Reviewed-by: Longlong Yao <Longlong.Yao@amd.com> Part-of: <http://10.67.69.192/wsl/rocr-runtime/-/merge_requests/114>
627 lines
19 KiB
C++
627 lines
19 KiB
C++
/*
|
|
* Copyright © 2014 Advanced Micro Devices, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person
|
|
* obtaining a copy of this software and associated documentation
|
|
* files (the "Software"), to deal in the Software without
|
|
* restriction, including without limitation the rights to use, copy,
|
|
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
* of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including
|
|
* the next paragraph) shall be included in all copies or substantial
|
|
* portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
#include <stdlib.h>
|
|
#include <cstring>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/sysinfo.h>
|
|
#include <linux/mman.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <cstdio>
|
|
#include <strings.h>
|
|
#include <cassert>
|
|
|
|
|
|
hsakmtRuntime *dxg_runtime = new hsakmtRuntime();
|
|
|
|
void hsakmtRuntime::HeapInit() {
|
|
ReserveLocalHeapSpace();
|
|
ReserveSystemHeapSpace();
|
|
InitHandleApertureSpace();
|
|
InitLocalHeapMgr();
|
|
InitSystemHeapMgr();
|
|
InitHandleApertureMgr();
|
|
}
|
|
|
|
void hsakmtRuntime::HeapFini() {
|
|
FreeSystemHeapSpace();
|
|
FreeLocalHeapSpace();
|
|
}
|
|
|
|
bool hsakmtRuntime::ReserveSvmSpace(uint64_t &base, uint64_t &size, uint64_t align) {
|
|
uint64_t sys_va[16] = {0};
|
|
uint64_t local_va;
|
|
uint64_t sys_va_size;
|
|
int match_index = -1;
|
|
void* ptr = NULL;
|
|
|
|
wsl::thunk::WDDMDevice* device;
|
|
size_t num_adapters = get_num_wddmdev();
|
|
|
|
base = 0;
|
|
sys_va_size = size + align;
|
|
|
|
/* it will retry 16 times to find the avaliable range. */
|
|
for (int i = 0; i < 16; i++) {
|
|
local_va = 0;
|
|
ptr = mmap(NULL, sys_va_size , PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
|
if (ptr == MAP_FAILED) {
|
|
pr_err("fail to reserve cpu va in %d time!\n", i);
|
|
break;
|
|
}
|
|
|
|
sys_va[i] = (uint64_t)ptr;
|
|
|
|
int match_cnt = 0;
|
|
for (uint32_t j = 0; j < num_adapters; j++) {
|
|
device = get_wddmdev(j+1);
|
|
uint64_t start = (base == 0) ? (uint64_t)ptr : base;
|
|
uint64_t end = start + ((base == 0) ? sys_va_size : size) + 1;
|
|
|
|
if (wsl::thunk::d3dthunk::ReserveGpuVirtualAddress(
|
|
device->GetAdapter(), size,
|
|
start,
|
|
end, &local_va) == ErrorCode::Success) {
|
|
|
|
match_cnt++;
|
|
base = local_va;
|
|
pr_debug("success to reserve gpu va %lx and va cpu %p in %d time\n",
|
|
local_va, ptr, i);
|
|
} else {
|
|
pr_err("%s fail to reserve gpu va for cpu va %p in %d time!\n",
|
|
__FUNCTION__, ptr, i);
|
|
}
|
|
}
|
|
|
|
if (match_cnt == num_adapters) {
|
|
match_index = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (match_index >= 0) {
|
|
/* release cpu unused ranges*/
|
|
uint64_t left_size = local_va - sys_va[match_index];
|
|
uint64_t right_size = align - left_size;
|
|
if ((left_size > 0) && munmap((void*)sys_va[match_index], left_size))
|
|
pr_err("fail to unmap left %lx with size %lx\n", sys_va[match_index], left_size);
|
|
if ((right_size > 0) && munmap((void*)(local_va + size), right_size))
|
|
pr_err("fail to unmap right %lx with size %lx\n", (local_va + size), right_size);
|
|
} else {
|
|
pr_err("fail to reserve Local Heap Space!\n");
|
|
base = 0;
|
|
size = 0;
|
|
}
|
|
|
|
/* free match fail address for cpu va */
|
|
int free = match_index >= 0 ? match_index : 16;
|
|
for (int j = 0; j < free; j++) {
|
|
if (sys_va[j] != 0 && munmap((void*)sys_va[j], sys_va_size)) {
|
|
pr_err("fail to unmap %d %lx\n", j, sys_va[j]);
|
|
}
|
|
}
|
|
|
|
return match_index >= 0;
|
|
}
|
|
|
|
/*
|
|
* To find the avaliable same range for cpu
|
|
* virtual space and gpu virtual space.
|
|
* sys_va_size of cpu va range is larger 1G
|
|
* than gpu va range, otherwise ReserveGPUVirtualAddress
|
|
* will return error.
|
|
*/
|
|
bool hsakmtRuntime::ReserveLocalHeapSpace() {
|
|
wsl::thunk::WDDMDevice* device;
|
|
uint64_t total_local_size = 0;
|
|
uint64_t align = 0x40000000; /* 1G */
|
|
size_t num_adapters = get_num_wddmdev();
|
|
|
|
for (uint32_t j = 0; j < num_adapters; j++) {
|
|
device = get_wddmdev(j+1);
|
|
if (device == nullptr)
|
|
return -1;
|
|
/*
|
|
* For APU, use non local memory(shared GPU memory) as GPU memory,
|
|
* because it has small local memory
|
|
*/
|
|
if (device->IsDgpu())
|
|
total_local_size = wsl::Max(device->LocalHeapSize(), total_local_size);
|
|
else
|
|
total_local_size = wsl::Max(device->LocalHeapSize(), device->NonLocalHeapSize(), total_local_size);
|
|
}
|
|
|
|
total_local_size = wsl::AlignUp(total_local_size, align) * 4;
|
|
local_heap_space_start_ = 0;
|
|
local_heap_space_size_ = total_local_size;
|
|
|
|
return ReserveSvmSpace(local_heap_space_start_, local_heap_space_size_, align);
|
|
}
|
|
|
|
bool hsakmtRuntime::FreeSvmSpace(uint64_t &base, uint64_t &size) {
|
|
wsl::thunk::WDDMDevice* device;
|
|
size_t num_adapters = get_num_wddmdev();
|
|
for (uint32_t j = 0; j < num_adapters; j++) {
|
|
device = get_wddmdev(j+1);
|
|
if (device == nullptr)
|
|
return -1;
|
|
wsl::thunk::d3dthunk::FreeGpuVirtualAddress(device->GetAdapter(), base, size);
|
|
}
|
|
|
|
void *cpu = (void *)base;
|
|
auto r = (munmap(cpu, size) == 0);
|
|
base = 0;
|
|
size = 0;
|
|
return r;
|
|
}
|
|
|
|
bool hsakmtRuntime::FreeLocalHeapSpace() {
|
|
return FreeSvmSpace(local_heap_space_start_, local_heap_space_size_);
|
|
}
|
|
|
|
void hsakmtRuntime::InitLocalHeapMgr() {
|
|
local_heap_mgr_ = std::make_unique<wsl::thunk::VaMgr>(local_heap_space_start_,
|
|
local_heap_space_size_,
|
|
DEFAULT_GPU_PAGE_SIZE);
|
|
}
|
|
|
|
bool hsakmtRuntime::ReserveSystemHeapSpace() {
|
|
struct sysinfo info;
|
|
int ret = sysinfo(&info);
|
|
uint64_t max_ram = 0x10000000000;
|
|
uint64_t alignment = 0x100000000;
|
|
assert(!ret);
|
|
|
|
int32_t protFlags = PROT_NONE;
|
|
// minimum of reserve size is 8G, maximum of reserve size is 1T.
|
|
system_heap_space_size_ = std::min(wsl::AlignUp(info.totalram, alignment) * 2, max_ram);
|
|
|
|
return ReserveSvmSpace(system_heap_space_start_, system_heap_space_size_, alignment);
|
|
}
|
|
|
|
bool hsakmtRuntime::FreeSystemHeapSpace(void) {
|
|
return FreeSvmSpace(system_heap_space_start_, system_heap_space_size_);
|
|
}
|
|
|
|
bool hsakmtRuntime::CommitSystemHeapSpace(void* addr, int64_t size, bool lock) {
|
|
int32_t protFlags = PROT_READ | PROT_WRITE | PROT_EXEC;
|
|
int32_t mapFlags = MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|
|
|
MAP_NORESERVE|MAP_UNINITIALIZED;
|
|
if (lock)
|
|
mapFlags |= MAP_LOCKED;
|
|
void* paddr = mmap(addr, size, protFlags, mapFlags, -1, 0);
|
|
if (paddr == MAP_FAILED) {
|
|
pr_err("fail to commit %s addr = %p, paddr = %p\n", (lock ? "locked" : ""), addr, paddr);
|
|
return false;
|
|
}
|
|
assert(addr == paddr);
|
|
|
|
/*if (!Runtime::runtime_singleton_->PinWARequired())
|
|
return true;*/
|
|
|
|
/*
|
|
* Do not make the pages in this range available to the child
|
|
* after a fork(2). This is useful to prevent copy-on-write
|
|
* semantics from changing the physical location of a page if
|
|
* the parent writes to it after a fork(2). (Such page
|
|
* relocations cause problems for hardware that DMAs into the
|
|
* page.)
|
|
*
|
|
* https://man7.org/linux/man-pages/man2/madvise.2.html
|
|
*/
|
|
if (madvise(addr, size, MADV_DONTFORK))
|
|
pr_err("fail to set MADV_DONTFORK for addr = %p\n", addr);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool hsakmtRuntime::DecommitSystemHeapSpace(void* addr, int64_t size) {
|
|
int32_t protFlags = PROT_NONE;
|
|
int32_t mapFlags = MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|
|
|
MAP_NORESERVE|MAP_UNINITIALIZED;
|
|
void* paddr = mmap(addr, size, protFlags, mapFlags, -1, 0);
|
|
if (paddr == MAP_FAILED) {
|
|
pr_err("fail to decommit addr = %p, paddr = %p\n", addr, paddr);
|
|
return false;
|
|
}
|
|
assert(addr == paddr);
|
|
return true;
|
|
}
|
|
|
|
void hsakmtRuntime::InitSystemHeapMgr() {
|
|
system_heap_mgr_ = std::make_unique<wsl::thunk::VaMgr>(system_heap_space_start_,
|
|
system_heap_space_size_,
|
|
DEFAULT_GPU_PAGE_SIZE);
|
|
}
|
|
|
|
ErrorCode hsakmtRuntime::ReserveGpuVirtualAddress(const thunk_proxy::AllocDomain domain,
|
|
gpusize hit_base_addr, gpusize size,
|
|
gpusize *out_gpu_virt_addr, gpusize alignment, bool lock) {
|
|
gpusize gpu_addr = 0;
|
|
ErrorCode code = ErrorCode::Success;
|
|
|
|
uint64_t align = alignment == 0 ? (64 * 1024) : alignment; // default 64K alignment
|
|
if (size >= GPU_HUGE_PAGE_SIZE)
|
|
align = GPU_HUGE_PAGE_SIZE;
|
|
|
|
if (domain == thunk_proxy::kSystem) {
|
|
gpu_addr = system_heap_mgr_->Alloc(size, align, hit_base_addr);
|
|
if (gpu_addr == 0)
|
|
code = ErrorCode::OutOfMemory;
|
|
|
|
if (!CommitSystemHeapSpace((void*)gpu_addr, size, lock)) {
|
|
system_heap_mgr_->Free(gpu_addr);
|
|
code = ErrorCode::SyscallFail;
|
|
}
|
|
} else {
|
|
gpu_addr = local_heap_mgr_->Alloc(size, align, hit_base_addr);
|
|
if (gpu_addr == 0)
|
|
code = ErrorCode::OutOfGpuMemory;
|
|
}
|
|
|
|
*out_gpu_virt_addr = (code == ErrorCode::Success) ? gpu_addr : 0;
|
|
return code;
|
|
}
|
|
|
|
ErrorCode hsakmtRuntime::FreeGpuVirtualAddress(const thunk_proxy::AllocDomain domain,
|
|
gpusize gpu_addr, gpusize size) {
|
|
auto code = ErrorCode::Success;
|
|
|
|
if (domain == thunk_proxy::kSystem) {
|
|
DecommitSystemHeapSpace((void *)gpu_addr, size);
|
|
system_heap_mgr_->Free(gpu_addr);
|
|
} else {
|
|
local_heap_mgr_->Free(gpu_addr);
|
|
}
|
|
|
|
return code;
|
|
}
|
|
|
|
bool hsakmtRuntime::CommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd, bool lock) {
|
|
int fd = -1;
|
|
|
|
if (memfd == -1) {
|
|
fd = memfd_create("rocr4wsl_gtt", MFD_CLOEXEC);
|
|
if (fd < 0) {
|
|
pr_err("memfd_create failed\n");
|
|
return false;
|
|
}
|
|
|
|
ftruncate(fd, size);
|
|
} else {
|
|
fd = memfd;
|
|
}
|
|
|
|
int32_t protFlags = PROT_READ | PROT_WRITE;
|
|
int32_t mapFlags = MAP_SHARED | MAP_FIXED | MAP_NORESERVE |
|
|
MAP_UNINITIALIZED | (lock ? MAP_LOCKED : 0);
|
|
|
|
void* paddr = mmap(addr, size, protFlags, mapFlags, fd, 0);
|
|
if (paddr == MAP_FAILED) {
|
|
pr_err("fail to commit %s addr = %p, paddr = %p\n", (lock ? "locked" : ""), addr, paddr);
|
|
if (memfd == -1)
|
|
close(fd);
|
|
return false;
|
|
}
|
|
assert(addr == paddr);
|
|
|
|
memfd = fd;
|
|
|
|
if (madvise(addr, size, MADV_DONTFORK))
|
|
pr_err("fail to set MADV_DONTFORK for addr = %p\n", addr);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool hsakmtRuntime::DecommitSystemHeapSpaceIPC(void* addr, int64_t size, int &memfd) {
|
|
if (munmap(addr, size) != 0) {
|
|
pr_err("fail to unmap = %p \n", addr);
|
|
return false;
|
|
}
|
|
close(memfd);
|
|
memfd = -1;
|
|
return true;
|
|
}
|
|
|
|
ErrorCode hsakmtRuntime::ReserveIPCSysMem(gpusize size,
|
|
gpusize *out_gpu_virt_addr, gpusize alignment,
|
|
int &memfd, bool lock) {
|
|
gpusize gpu_addr = 0;
|
|
ErrorCode code = ErrorCode::Success;
|
|
gpu_addr = system_heap_mgr_->Alloc(size, alignment, 0);
|
|
if (gpu_addr == 0)
|
|
return ErrorCode::OutOfMemory;
|
|
|
|
if (!CommitSystemHeapSpaceIPC((void*)gpu_addr, size, memfd, lock)) {
|
|
system_heap_mgr_->Free(gpu_addr);
|
|
code = ErrorCode::SyscallFail;
|
|
}
|
|
|
|
*out_gpu_virt_addr = (code == ErrorCode::Success) ? gpu_addr : 0;
|
|
return code;
|
|
}
|
|
|
|
ErrorCode hsakmtRuntime::FreeIPCSysMem(gpusize gpu_addr, gpusize size, int &memfd) {
|
|
auto code = ErrorCode::Success;
|
|
|
|
DecommitSystemHeapSpaceIPC((void *)gpu_addr, size, memfd);
|
|
|
|
system_heap_mgr_->Free(gpu_addr);
|
|
return code;
|
|
}
|
|
|
|
bool hsakmtRuntime::InitHandleApertureSpace() {
|
|
wsl::thunk::WDDMDevice* device;
|
|
size_t num_adapters = get_num_wddmdev();
|
|
handle_aperture_start_ = START_NON_CANONICAL_ADDR;
|
|
handle_aperture_size_ = 1ULL << 47;
|
|
|
|
while (handle_aperture_start_ < END_NON_CANONICAL_ADDR - 1) {
|
|
for (uint32_t j = 0; j < num_adapters;) {
|
|
device = get_wddmdev(j+1);
|
|
if (device == nullptr)
|
|
return -1;
|
|
|
|
if (device->PrivateApertureBase() &&
|
|
IS_OVERLAPPING(device->PrivateApertureBase(),
|
|
device->PrivateApertureSize(),
|
|
handle_aperture_start_,
|
|
handle_aperture_size_)) {
|
|
handle_aperture_start_ += (1ULL << 47);
|
|
continue;
|
|
}
|
|
|
|
if (device->SharedApertureBase() &&
|
|
IS_OVERLAPPING(device->SharedApertureBase(),
|
|
device->SharedApertureSize(),
|
|
handle_aperture_start_,
|
|
handle_aperture_size_)) {
|
|
handle_aperture_start_ += (1ULL << 47);
|
|
continue;
|
|
}
|
|
|
|
j++;
|
|
}
|
|
pr_debug("handle aperture start %lx, size %lx\n", handle_aperture_start_, handle_aperture_size_);
|
|
return true;
|
|
}
|
|
|
|
handle_aperture_start_ = 0;
|
|
pr_err("fail\n");
|
|
|
|
return false;
|
|
}
|
|
|
|
void hsakmtRuntime::InitHandleApertureMgr() {
|
|
handle_aperture_mgr_ = std::make_unique<wsl::thunk::VaMgr>(handle_aperture_start_,
|
|
handle_aperture_size_,
|
|
DEFAULT_GPU_PAGE_SIZE);
|
|
}
|
|
|
|
ErrorCode hsakmtRuntime::HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr) {
|
|
uint64_t align = DEFAULT_GPU_PAGE_SIZE;
|
|
|
|
if (size >= GPU_HUGE_PAGE_SIZE)
|
|
align = GPU_HUGE_PAGE_SIZE;
|
|
|
|
*out_gpu_virt_addr = handle_aperture_mgr_->Alloc(size, align);
|
|
if (*out_gpu_virt_addr == 0)
|
|
return ErrorCode::OutOfHandleApeMemory;
|
|
|
|
return ErrorCode::Success;
|
|
}
|
|
|
|
void hsakmtRuntime::HandleApertureFree(gpusize gpu_addr) {
|
|
handle_aperture_mgr_->Free(gpu_addr);
|
|
}
|
|
|
|
/* is_forked_child detects when the process has forked since the last
|
|
* time this function was called. We cannot rely on pthread_atfork
|
|
* because the process can fork without calling the fork function in
|
|
* libc (using clone or calling the system call directly).
|
|
*/
|
|
bool is_forked_child(void) {
|
|
if (dxg_runtime->is_forked)
|
|
return true;
|
|
|
|
pid_t cur_pid = getpid();
|
|
if (dxg_runtime->parent_pid != cur_pid) {
|
|
dxg_runtime->is_forked = true;
|
|
dxg_runtime->parent_pid = cur_pid;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Callbacks from pthread_atfork */
|
|
static void prepare_fork_handler(void) { pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); }
|
|
static void parent_fork_handler(void) { pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); }
|
|
static void child_fork_handler(void) {
|
|
pthread_mutex_init(&dxg_runtime->hsakmt_mutex, NULL);
|
|
dxg_runtime->is_forked = true;
|
|
}
|
|
|
|
/* Call this from the child process after fork. This will clear all
|
|
* data that is duplicated from the parent process, that is not valid
|
|
* in the child.
|
|
* The topology information is duplicated from the parent is valid
|
|
* in the child process so it is not cleared
|
|
*/
|
|
static void clear_after_fork(void) {
|
|
reset_suballocator();
|
|
clear_allocation_map();
|
|
|
|
if (dxg_runtime->dxg_fd >= 0) {
|
|
close(dxg_runtime->dxg_fd);
|
|
dxg_runtime->dxg_fd = -1;
|
|
}
|
|
delete dxg_runtime;
|
|
dxg_runtime = new hsakmtRuntime();
|
|
|
|
}
|
|
|
|
static inline void init_page_size(void) {
|
|
dxg_runtime->page_size = sysconf(_SC_PAGESIZE);
|
|
dxg_runtime->page_shift = ffs(dxg_runtime->page_size) - 1;
|
|
}
|
|
|
|
static HSAKMT_STATUS init_vars_from_env(void) {
|
|
char *envvar;
|
|
int debug_level;
|
|
|
|
/* Normally libraries don't print messages. For debugging purpose, we'll
|
|
* print messages if an environment variable, HSAKMT_DEBUG_LEVEL, is set.
|
|
*/
|
|
envvar = getenv("HSAKMT_DEBUG_LEVEL");
|
|
if (envvar) {
|
|
dxg_runtime->hsakmt_debug_level = atoi(envvar);
|
|
}
|
|
|
|
/* Check whether to support Zero frame buffer */
|
|
envvar = getenv("HSA_ZFB");
|
|
if (envvar)
|
|
dxg_runtime->zfb_support = atoi(envvar);
|
|
|
|
/* Check whether to handle vendor specific aql packet */
|
|
envvar = getenv("WSLKMT_VENDOR_PACKET");
|
|
if (envvar)
|
|
dxg_runtime->vendor_packet_process = atoi(envvar);
|
|
|
|
/* Decide whether to check available system memory before allocation */
|
|
envvar = getenv("WSL_CHECK_AVAIL_SYSRAM");
|
|
if (envvar)
|
|
dxg_runtime->check_avail_sysram = !strcmp(envvar, "1");
|
|
|
|
envvar = getenv("WSL_ENABLE_THUNK_SUB_ALLOCATOR");
|
|
if (envvar)
|
|
dxg_runtime->enable_thunk_sub_allocator = atoi(envvar);
|
|
|
|
envvar = getenv("ROCR_VISIBLE_DEVICES");
|
|
if (envvar) {
|
|
std::string devices(envvar);
|
|
size_t first_num_pos = devices.find_first_of("0123456789");
|
|
if (first_num_pos != std::string::npos)
|
|
dxg_runtime->default_node = std::stoi(devices.substr(first_num_pos)) + 1;
|
|
}
|
|
|
|
return HSAKMT_STATUS_SUCCESS;
|
|
}
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) {
|
|
HSAKMT_STATUS result;
|
|
int fd = -1;
|
|
HsaSystemProperties sys_props;
|
|
char *error;
|
|
|
|
pthread_mutex_lock(&dxg_runtime->hsakmt_mutex);
|
|
|
|
/* If the process has forked, the child process must re-initialize
|
|
* it's connection to DXG. Any references tracked by dxg_open_count
|
|
* belong to the parent
|
|
*/
|
|
if (is_forked_child())
|
|
clear_after_fork();
|
|
|
|
if (dxg_runtime->dxg_open_count == 0) {
|
|
static bool atfork_installed = false;
|
|
|
|
result = init_vars_from_env();
|
|
if (result != HSAKMT_STATUS_SUCCESS)
|
|
goto open_failed;
|
|
|
|
if (dxg_runtime->dxg_fd < 0) {
|
|
fd = open(dxg_runtime->dxg_device_name, O_RDWR | O_CLOEXEC);
|
|
|
|
if (fd == -1) {
|
|
result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
|
|
goto open_failed;
|
|
}
|
|
|
|
dxg_runtime->dxg_fd = fd;
|
|
}
|
|
if (!wsl::thunk::dxcore::DxcoreLoader::Instance().Initialize()) {
|
|
pr_err("Failed to load libdxcore.so\n");
|
|
result = HSAKMT_STATUS_ERROR;
|
|
goto dxcore_loader_failed;
|
|
}
|
|
|
|
hsakmt_hsa_loader_init();
|
|
init_page_size();
|
|
|
|
char *useSvmStr = getenv("HSA_USE_SVM");
|
|
dxg_runtime->is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0")) && false;
|
|
|
|
dxg_runtime->dxg_open_count = 1;
|
|
|
|
if (!atfork_installed) {
|
|
/* Atfork handlers cannot be uninstalled and
|
|
* must be installed only once. Otherwise
|
|
* prepare will deadlock when trying to take
|
|
* the same lock multiple times.
|
|
*/
|
|
pthread_atfork(prepare_fork_handler, parent_fork_handler,
|
|
child_fork_handler);
|
|
atfork_installed = true;
|
|
}
|
|
} else {
|
|
dxg_runtime->dxg_open_count++;
|
|
result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
|
|
}
|
|
|
|
reset_suballocator();
|
|
pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex);
|
|
return result;
|
|
dxcore_loader_failed:
|
|
close(fd);
|
|
open_failed:
|
|
pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex);
|
|
|
|
return result;
|
|
}
|
|
|
|
HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void) {
|
|
HSAKMT_STATUS result;
|
|
|
|
pthread_mutex_lock(&dxg_runtime->hsakmt_mutex);
|
|
|
|
if (dxg_runtime->dxg_open_count > 0) {
|
|
if (--dxg_runtime->dxg_open_count == 0) {
|
|
close(dxg_runtime->dxg_fd);
|
|
dxg_runtime->dxg_fd = -1;
|
|
wsl::thunk::dxcore::DxcoreLoader::Instance().Shutdown();
|
|
}
|
|
|
|
result = HSAKMT_STATUS_SUCCESS;
|
|
} else
|
|
result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED;
|
|
|
|
pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex);
|
|
|
|
return result;
|
|
}
|