diff --git a/globals.cpp b/globals.cpp index 312f8d0d59..880dd49cd9 100644 --- a/globals.cpp +++ b/globals.cpp @@ -27,29 +27,9 @@ // HSAKMT global data -int kfd_fd = -1; -unsigned long dxg_open_count; -unsigned long system_properties_count; -pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER; -bool hsakmt_is_dgpu; - #ifndef PAGE_SIZE int PAGE_SIZE; #endif int PAGE_SHIFT; -/* whether to check all dGPUs in the topology support SVM API */ -bool is_svm_api_supported; -/* zfb is mainly used during emulation */ -int zfb_support; -/* handle vendor specific packet. :1 process 0:skip */ -int vendor_packet_process; -/* enable vendor packet in hsa-runtime*/ -int enable_vendor_packet; -/* check available system memory before allocation */ -bool check_avail_sysram = false; -/* enable 128MB sub-allocator in libhsakmt, default: disable */ -int enable_thunk_sub_allocator = 0; - -size_t max_single_alloc_size = 0; diff --git a/libhsakmt.h b/libhsakmt.h index c6c4ffe565..b857c3b314 100644 --- a/libhsakmt.h +++ b/libhsakmt.h @@ -38,17 +38,47 @@ wsl::thunk::WDDMDevice* get_wddmdev(uint32_t node_id); wsl::thunk::GpuMemory *get_gpu_mem(void *MemoryAddress); -extern unsigned long dxg_open_count; -extern bool hsakmt_forked; -extern pthread_mutex_t hsakmt_mutex; -extern bool hsakmt_is_dgpu; -extern bool is_svm_api_supported; -extern int zfb_support; -extern int vendor_packet_process; -extern int enable_vendor_packet; -extern bool check_avail_sysram; -extern size_t max_single_alloc_size; -extern int enable_thunk_sub_allocator; +#define HSAKMT_DEBUG_LEVEL_ERR -1 +#define HSAKMT_DEBUG_LEVEL_DEFAULT 3 +#define HSAKMT_DEBUG_LEVEL_WARNING 4 +#define HSAKMT_DEBUG_LEVEL_INFO 6 +#define HSAKMT_DEBUG_LEVEL_DEBUG 7 + +struct hsakmtRuntime { + hsakmtRuntime() + : dxg_fd(-1), + parent_pid(getpid()), + is_forked(false), + hsakmt_debug_level(HSAKMT_DEBUG_LEVEL_DEFAULT), + dxg_open_count(0), + hsakmt_mutex(PTHREAD_MUTEX_INITIALIZER), + hsakmt_is_dgpu(false), + is_svm_api_supported(false), + zfb_support(0), + vendor_packet_process(0), + enable_vendor_packet(0), + check_avail_sysram(false), + max_single_alloc_size(0), + enable_thunk_sub_allocator(0) {} + + pthread_mutex_t hsakmt_mutex; + const char *dxg_device_name = "/dev/dxg"; + int dxg_fd = -1; + pid_t parent_pid = -1; + bool is_forked = false; + int hsakmt_debug_level = HSAKMT_DEBUG_LEVEL_DEFAULT; + unsigned long dxg_open_count; + bool hsakmt_is_dgpu; + bool is_svm_api_supported; + int zfb_support; + int vendor_packet_process; + int enable_vendor_packet; + bool check_avail_sysram; + size_t max_single_alloc_size; + int enable_thunk_sub_allocator; +}; + +extern hsakmtRuntime *dxg_runtime; #undef HSAKMTAPI #define HSAKMTAPI __attribute__((visibility ("default"))) @@ -66,7 +96,7 @@ extern int enable_thunk_sub_allocator; #define PORT_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v)) #define CHECK_DXG_OPEN() \ - do { if (dxg_open_count == 0 || hsakmt_forked) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0) + do { if (dxg_runtime->dxg_open_count == 0 || dxg_runtime->is_forked) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0) /* Might be defined in limits.h on platforms where it is constant (used by musl) */ /* See also: https://pubs.opengroup.org/onlinepubs/7908799/xsh/limits.h.html */ @@ -91,7 +121,6 @@ extern int PAGE_SHIFT; #define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0])) /* HSA Thunk logging usage */ -extern int hsakmt_debug_level; #define get_thread_id() \ ([]() -> std::string { \ std::stringstream str_thrd_id; \ @@ -109,16 +138,12 @@ extern int hsakmt_debug_level; #else #define hsakmt_print(level, fmt, ...) \ do { \ - if (level <= hsakmt_debug_level) { \ + if (level <= dxg_runtime->hsakmt_debug_level) { \ hsakmt_print_common(stdout, fmt, ##__VA_ARGS__); \ } \ } while (false) #endif -#define HSAKMT_DEBUG_LEVEL_ERR -1 -#define HSAKMT_DEBUG_LEVEL_DEFAULT 3 -#define HSAKMT_DEBUG_LEVEL_WARNING 4 -#define HSAKMT_DEBUG_LEVEL_INFO 6 -#define HSAKMT_DEBUG_LEVEL_DEBUG 7 + #define pr_err(fmt, ...) \ hsakmt_print_common(stderr, fmt, ##__VA_ARGS__) #define pr_warn(fmt, ...) \ diff --git a/memory.cpp b/memory.cpp index bbaba448e3..fca8984c45 100644 --- a/memory.cpp +++ b/memory.cpp @@ -179,15 +179,15 @@ HSAKMT_STATUS hsaKmtAllocMemoryAlignInternal(HSAuint32 PreferredNode, create_info.alignment = Alignment; create_info.va_hint = reinterpret_cast(*MemoryAddress); if ((PreferredNode == 0 && MemFlags.ui32.HostAccess) - || zfb_support || MemFlags.ui32.GTTAccess) { - if (SizeInBytes > max_single_alloc_size) + || dxg_runtime->zfb_support || MemFlags.ui32.GTTAccess) { + if (SizeInBytes > dxg_runtime->max_single_alloc_size) return HSAKMT_STATUS_NO_MEMORY; - if (check_avail_sysram && !isSystemMemoryAvailable(SizeInBytes)) + if (dxg_runtime->check_avail_sysram && !isSystemMemoryAvailable(SizeInBytes)) return HSAKMT_STATUS_NO_MEMORY; /* If allocate VRAM under ZFB mode */ - if (zfb_support && MemFlags.ui32.NonPaged == 1) + if (dxg_runtime->zfb_support && MemFlags.ui32.NonPaged == 1) MemFlags.ui32.CoarseGrain = 1; // AllocateNonPaged == AllocateIPC @@ -274,7 +274,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocMemoryAlign(HSAuint32 PreferredNode, return hsaKmtAllocMemoryAlignInternal(PreferredNode, SizeInBytes, Alignment, MemFlags, MemoryAddress, - !enable_thunk_sub_allocator); + !dxg_runtime->enable_thunk_sub_allocator); } HSAKMT_STATUS hsaKmtFreeMemoryInternal(void *MemoryAddress, @@ -419,7 +419,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtRegisterMemoryWithFlags( if ((MemFlags.ui32.HostAccess != 1) || (MemFlags.ui32.NonPaged == 1)) return HSAKMT_STATUS_NOT_SUPPORTED; - if (!hsakmt_is_dgpu) + if (!dxg_runtime->hsakmt_is_dgpu) /* TODO: support mixed APU and dGPU configurations */ return HSAKMT_STATUS_NOT_SUPPORTED; diff --git a/openclose.cpp b/openclose.cpp index 6bf3ded56f..2507276ca6 100644 --- a/openclose.cpp +++ b/openclose.cpp @@ -33,32 +33,20 @@ #include #include "libhsakmt.h" -static const char dxg_device_name[] = "/dev/dxg"; -static pid_t parent_pid = -1; -int hsakmt_debug_level; -bool hsakmt_forked; -static int dxg_fd = -1; - +hsakmtRuntime *dxg_runtime = new hsakmtRuntime(); /* is_forked_child detects when the process has forked since the last * time this function was called. We cannot rely on pthread_atfork * because the process can fork without calling the fork function in * libc (using clone or calling the system call directly). */ bool is_forked_child(void) { - pid_t cur_pid; - - if (hsakmt_forked) + if (dxg_runtime->is_forked) return true; - cur_pid = getpid(); - - if (parent_pid == -1) { - parent_pid = cur_pid; - return false; - } - - if (parent_pid != cur_pid) { - hsakmt_forked = true; + pid_t cur_pid = getpid(); + if (dxg_runtime->parent_pid != cur_pid) { + dxg_runtime->is_forked = true; + dxg_runtime->parent_pid = cur_pid; return true; } @@ -66,11 +54,11 @@ bool is_forked_child(void) { } /* Callbacks from pthread_atfork */ -static void prepare_fork_handler(void) { pthread_mutex_lock(&hsakmt_mutex); } -static void parent_fork_handler(void) { pthread_mutex_unlock(&hsakmt_mutex); } +static void prepare_fork_handler(void) { pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); } +static void parent_fork_handler(void) { pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); } static void child_fork_handler(void) { - pthread_mutex_init(&hsakmt_mutex, NULL); - hsakmt_forked = true; + pthread_mutex_init(&dxg_runtime->hsakmt_mutex, NULL); + dxg_runtime->is_forked = true; } /* Call this from the child process after fork. This will clear all @@ -82,13 +70,14 @@ static void child_fork_handler(void) { static void clear_after_fork(void) { reset_suballocator(); clear_allocation_map(); - if (dxg_fd) { - close(dxg_fd); - dxg_fd = -1; + + if (dxg_runtime->dxg_fd >= 0) { + close(dxg_runtime->dxg_fd); + dxg_runtime->dxg_fd = -1; } - dxg_open_count = 0; - parent_pid = -1; - hsakmt_forked = false; + delete dxg_runtime; + dxg_runtime = new hsakmtRuntime(); + } static inline void init_page_size(void) { @@ -105,36 +94,34 @@ static HSAKMT_STATUS init_vars_from_env(void) { /* Normally libraries don't print messages. For debugging purpose, we'll * print messages if an environment variable, HSAKMT_DEBUG_LEVEL, is set. */ - hsakmt_debug_level = HSAKMT_DEBUG_LEVEL_DEFAULT; - envvar = getenv("HSAKMT_DEBUG_LEVEL"); if (envvar) { - hsakmt_debug_level = atoi(envvar); + dxg_runtime->hsakmt_debug_level = atoi(envvar); } /* Check whether to support Zero frame buffer */ envvar = getenv("HSA_ZFB"); if (envvar) - zfb_support = atoi(envvar); + dxg_runtime->zfb_support = atoi(envvar); /* Check whether to handle vendor specific aql packet */ envvar = getenv("WSLKMT_VENDOR_PACKET"); if (envvar) - vendor_packet_process = atoi(envvar); + dxg_runtime->vendor_packet_process = atoi(envvar); /* Decide whether hsa-runtime dispatch vendor packet */ envvar = getenv("WSL_ENABLE_VENDOR_PACKET"); if (envvar) - enable_vendor_packet = atoi(envvar); + dxg_runtime->enable_vendor_packet = atoi(envvar); /* Decide whether to check available system memory before allocation */ envvar = getenv("WSL_CHECK_AVAIL_SYSRAM"); if (envvar) - check_avail_sysram = !strcmp(envvar, "1"); + dxg_runtime->check_avail_sysram = !strcmp(envvar, "1"); envvar = getenv("WSL_ENABLE_THUNK_SUB_ALLOCATOR"); if (envvar) - enable_thunk_sub_allocator = atoi(envvar); + dxg_runtime->enable_thunk_sub_allocator = atoi(envvar); return HSAKMT_STATUS_SUCCESS; } @@ -144,9 +131,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) { int fd = -1; HsaSystemProperties sys_props; char *error; - char *useSvmStr; - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); /* If the process has forked, the child process must re-initialize * it's connection to DXG. Any references tracked by dxg_open_count @@ -155,34 +141,34 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) { if (is_forked_child()) clear_after_fork(); - if (dxg_open_count == 0) { + if (dxg_runtime->dxg_open_count == 0) { static bool atfork_installed = false; result = init_vars_from_env(); if (result != HSAKMT_STATUS_SUCCESS) goto open_failed; - if (dxg_fd < 0) { - fd = open(dxg_device_name, O_RDWR | O_CLOEXEC); + if (dxg_runtime->dxg_fd < 0) { + fd = open(dxg_runtime->dxg_device_name, O_RDWR | O_CLOEXEC); if (fd == -1) { result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; goto open_failed; } - dxg_fd = fd; + dxg_runtime->dxg_fd = fd; } init_page_size(); - useSvmStr = getenv("HSA_USE_SVM"); - is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0")) && false; + char *useSvmStr = getenv("HSA_USE_SVM"); + dxg_runtime->is_svm_api_supported = !(useSvmStr && !strcmp(useSvmStr, "0")) && false; // result = topology_sysfs_get_system_props(&sys_props); if (result != HSAKMT_STATUS_SUCCESS) goto topology_sysfs_failed; - dxg_open_count = 1; + dxg_runtime->dxg_open_count = 1; if (!atfork_installed) { /* Atfork handlers cannot be uninstalled and @@ -195,17 +181,17 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void) { atfork_installed = true; } } else { - dxg_open_count++; + dxg_runtime->dxg_open_count++; result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED; } reset_suballocator(); - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return result; topology_sysfs_failed: close(fd); open_failed: - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return result; } @@ -213,18 +199,19 @@ open_failed: HSAKMT_STATUS HSAKMTAPI hsaKmtCloseKFD(void) { HSAKMT_STATUS result; - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); - if (dxg_open_count > 0) { - if (--dxg_open_count == 0) { - close(dxg_fd); + if (dxg_runtime->dxg_open_count > 0) { + if (--dxg_runtime->dxg_open_count == 0) { + close(dxg_runtime->dxg_fd); + dxg_runtime->dxg_fd = -1; } result = HSAKMT_STATUS_SUCCESS; } else result = HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return result; } diff --git a/topology.cpp b/topology.cpp index ed67320367..e9e34be8d6 100644 --- a/topology.cpp +++ b/topology.cpp @@ -395,7 +395,7 @@ void topology_setup_is_dgpu_param(HsaNodeProperties *props) { /* if we found a dGPU node, then treat the whole system as dGPU */ /* noted that some APUs are also treated as dGPU in runtime */ if (!props->NumCPUCores && props->NumFComputeCores) - hsakmt_is_dgpu = true; + dxg_runtime->hsakmt_is_dgpu = true; } static HSAKMT_STATUS topology_get_cpu_model_name(HsaNodeProperties& props, @@ -604,7 +604,7 @@ static HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, snprintf((char *)props.AMDName, sizeof(props.AMDName) - 1, "GFX%06x", HSA_GET_GFX_VERSION_FULL(props.EngineId.ui32)); - if (!is_svm_api_supported) + if (!dxg_runtime->is_svm_api_supported) props.Capability.ui32.SVMAPISupported = 0; props.Capability.ui32.DoorbellType = 2; @@ -636,7 +636,7 @@ static HSAKMT_STATUS topology_sysfs_get_mem_props(uint32_t node_id, /* props.SizeInBytes is the actual physical system * memory size. Reserve 1/16th for WSL system usage. */ - max_single_alloc_size = info.totalram - (info.totalram >> 4); + dxg_runtime->max_single_alloc_size = info.totalram - (info.totalram >> 4); props.Flags.MemoryProperty = 0; /* TODO: sudo dmidecode --type memory doesn't work on wsl */ @@ -1171,7 +1171,7 @@ hsaKmtAcquireSystemProperties(HsaSystemProperties *SystemProperties) { if (!SystemProperties) return HSAKMT_STATUS_INVALID_PARAMETER; - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); /* We already have a valid snapshot. Avoid double initialization that * would leak memory. @@ -1205,16 +1205,16 @@ init_process_apertures_failed: topology_drop_snapshot(); out: - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return err; } HSAKMT_STATUS HSAKMTAPI hsaKmtReleaseSystemProperties(void) { - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); topology_drop_snapshot(); - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return HSAKMT_STATUS_SUCCESS; } @@ -1237,7 +1237,7 @@ hsaKmtGetNodeProperties(HSAuint32 NodeId, HsaNodeProperties *NodeProperties) { return HSAKMT_STATUS_INVALID_PARAMETER; CHECK_DXG_OPEN(); - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); err = validate_nodeid(NodeId, &gpu_id); if (err != HSAKMT_STATUS_SUCCESS) @@ -1260,7 +1260,7 @@ hsaKmtGetNodeProperties(HSAuint32 NodeId, HsaNodeProperties *NodeProperties) { } out: - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return err; } @@ -1274,7 +1274,7 @@ hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, HSAuint32 NumBanks, return HSAKMT_STATUS_INVALID_PARAMETER; CHECK_DXG_OPEN(); - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); memset(MemoryProperties, 0, NumBanks * sizeof(HsaMemoryProperties)); for (i = 0; i < wsl::Min(dxg_topology->g_props[NodeId].node.NumMemoryBanks, NumBanks); i++) { @@ -1304,7 +1304,7 @@ hsaKmtGetNodeMemoryProperties(HSAuint32 NodeId, HSAuint32 NumBanks, } out: - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return err; } @@ -1318,7 +1318,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties( return HSAKMT_STATUS_INVALID_PARAMETER; CHECK_DXG_OPEN(); - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); /* KFD ADD page 18, snapshot protocol violation */ if (!dxg_topology->g_system || NodeId >= dxg_topology->g_system->NumNodes) { @@ -1339,7 +1339,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtGetNodeCacheProperties( err = HSAKMT_STATUS_SUCCESS; out: - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return err; } @@ -1364,7 +1364,7 @@ hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId, HSAuint32 NumIoLinks, CHECK_DXG_OPEN(); - pthread_mutex_lock(&hsakmt_mutex); + pthread_mutex_lock(&dxg_runtime->hsakmt_mutex); /* KFD ADD page 18, snapshot protocol violation */ if (!dxg_topology->g_system || NodeId >= dxg_topology->g_system->NumNodes) { @@ -1381,7 +1381,7 @@ hsaKmtGetNodeIoLinkProperties(HSAuint32 NodeId, HSAuint32 NumIoLinks, err = topology_get_iolink_props(NodeId, NumIoLinks, IoLinkProperties); out: - pthread_mutex_unlock(&hsakmt_mutex); + pthread_mutex_unlock(&dxg_runtime->hsakmt_mutex); return err; } diff --git a/version.cpp b/version.cpp index 0cbf188cf4..2c383027e4 100644 --- a/version.cpp +++ b/version.cpp @@ -53,7 +53,7 @@ hsaKmtGetVersionCapInfo(HsaVersionCapability *VersionCapInfo) { VersionCapInfo->ui64.XNack = 0; VersionCapInfo->ui64.FineGrainPcie = 0;*/ - VersionCapInfo->ui64.VendorPacket = enable_vendor_packet; + VersionCapInfo->ui64.VendorPacket = dxg_runtime->enable_vendor_packet; return HSAKMT_STATUS_SUCCESS; } diff --git a/wddm/queue.cpp b/wddm/queue.cpp index 68b5f25967..1907603363 100644 --- a/wddm/queue.cpp +++ b/wddm/queue.cpp @@ -824,7 +824,7 @@ hsa_status_t ComputeQueue::VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *pac uint32_t* pm4_addr = reinterpret_cast((static_cast(packet->ib_jump_cmd[2]) << 32) | (static_cast(packet->ib_jump_cmd[1]) & ~3ull)); uint32_t pm4_size = packet->ib_jump_cmd[3]&0xfffff; pr_debug("queue %p %s VENDOR_SPECIFIC pkt pm4_addr %p pm4_size %#x cs=%" PRIx64"\n", - ring, vendor_packet_process ? "process" : "skip", pm4_addr, pm4_size, + ring, dxg_runtime->vendor_packet_process ? "process" : "skip", pm4_addr, pm4_size, packet->completion_signal.handle); for (int i = 0; i < pm4_size; i++) { pr_debug("pm4_addr[%d]=%#x\n", i, pm4_addr[i]); @@ -832,7 +832,7 @@ hsa_status_t ComputeQueue::VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *pac int i = ib_size; - if (vendor_packet_process) { + if (dxg_runtime->vendor_packet_process) { int major = device->Major(); memcpy(cpu+i, pm4_addr, pm4_size * sizeof(uint32_t)); i += pm4_size * sizeof(uint32_t);