libhsakmt: Improve error handling in child process

Check for errno == EBADF in kmtIoctl to detect misuse of the kfd_fd
in a forked child process.

Detect being in a forked child process pro-actively by implementing
a pthread_atfork callback.

Make sure all mutexes get reinitialized in the child process to avoid
deadlocks.

Check for being in a forked child process in CHECK_KFD_OPENED so that
all hsaKmt functions will return the appropriate status
HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED.

Update InvalidKFDHandleTest to expect that error code.

Change-Id: I0238e5fba344dcaa454e97a35db2e2dcc8d1f607
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Tento commit je obsažen v:
Felix Kuehling
2019-12-04 23:06:00 -05:00
rodič fe97612800
revize 87e10cd0b4
5 změnil soubory, kde provedl 60 přidání a 4 odebrání
+2
Zobrazit soubor
@@ -3566,6 +3566,8 @@ static void fmm_clear_aperture(manageable_aperture_t *app)
{
rbtree_node_t *n;
pthread_mutex_init(&app->fmm_mutex, NULL);
while ((n = rbtree_node_any(&app->tree, MID)))
vm_remove_object(app, vm_object_entry(n, 0));
+9
Zobrazit soubor
@@ -1,3 +1,4 @@
#include <stdio.h>
#include <errno.h>
#include <sys/ioctl.h>
@@ -12,5 +13,13 @@ int kmtIoctl(int fd, unsigned long request, void *arg)
ret = ioctl(fd, request, arg);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
if (errno == EBADF) {
/* In case pthread_atfork didn't catch it, this will
* make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
*/
pr_err("KFD file descriptor not valid in this process\n");
hsakmt_forked = true;
}
return ret;
}
+2 -1
Zobrazit soubor
@@ -34,6 +34,7 @@
extern int kfd_fd;
extern unsigned long kfd_open_count;
extern bool hsakmt_forked;
extern pthread_mutex_t hsakmt_mutex;
extern bool is_dgpu;
@@ -51,7 +52,7 @@ extern struct hsa_gfxip_table force_asic_entry;
#define PORT_UINT64_TO_VPTR(v) ((void*)(unsigned long)(v))
#define CHECK_KFD_OPEN() \
do { if (kfd_open_count == 0) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0)
do { if (kfd_open_count == 0 || hsakmt_forked) return HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED; } while (0)
extern int PAGE_SIZE;
extern int PAGE_SHIFT;
+46 -2
Zobrazit soubor
@@ -38,25 +38,53 @@
static const char kfd_device_name[] = "/dev/kfd";
static pid_t parent_pid = -1;
int hsakmt_debug_level;
bool hsakmt_forked;
/* zfb is mainly used during emulation */
int zfb_support;
/* is_forked_child detects when the process has forked since the last
* time this function was called. We cannot rely on pthread_atfork
* because the process can fork without calling the fork function in
* libc (using clone or calling the system call directly).
*/
static bool is_forked_child(void)
{
pid_t cur_pid = getpid();
pid_t cur_pid;
if (hsakmt_forked)
return true;
cur_pid = getpid();
if (parent_pid == -1) {
parent_pid = cur_pid;
return false;
}
if (parent_pid != cur_pid)
if (parent_pid != cur_pid) {
hsakmt_forked = true;
return true;
}
return false;
}
/* Callbacks from pthread_atfork */
static void prepare_fork_handler(void)
{
pthread_mutex_lock(&hsakmt_mutex);
}
static void parent_fork_handler(void)
{
pthread_mutex_unlock(&hsakmt_mutex);
}
static void child_fork_handler(void)
{
pthread_mutex_init(&hsakmt_mutex, NULL);
hsakmt_forked = true;
}
/* Call this from the child process after fork. This will clear all
* data that is duplicated from the parent process, that is not valid
* in the child.
@@ -74,6 +102,8 @@ static void clear_after_fork(void)
kfd_fd = 0;
}
kfd_open_count = 0;
parent_pid = -1;
hsakmt_forked = false;
}
static inline void init_page_size(void)
@@ -150,6 +180,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
clear_after_fork();
if (kfd_open_count == 0) {
static bool atfork_installed = false;
result = init_vars_from_env();
if (result != HSAKMT_STATUS_SUCCESS)
goto open_failed;
@@ -182,6 +214,18 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtOpenKFD(void)
pr_warn("Insufficient Memory. Debugging unavailable\n");
init_counter_props(sys_props.NumNodes);
if (!atfork_installed) {
/* Atfork handlers cannot be uninstalled and
* must be installed only once. Otherwise
* prepare will deadlock when trying to take
* the same lock multiple times.
*/
pthread_atfork(prepare_fork_handler,
parent_fork_handler,
child_fork_handler);
atfork_installed = true;
}
} else {
kfd_open_count++;
result = HSAKMT_STATUS_KERNEL_ALREADY_OPENED;
+1 -1
Zobrazit soubor
@@ -75,7 +75,7 @@ TEST_F(KFDOpenCloseKFDTest, InvalidKFDHandleTest ) {
HsaVersionInfo m_VersionInfo;
pid_t m_ChildPid = fork();
if (m_ChildPid == 0) {
EXPECT_EQ(HSAKMT_STATUS_ERROR, hsaKmtGetVersion(&m_VersionInfo));
EXPECT_EQ(HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED, hsaKmtGetVersion(&m_VersionInfo));
exit(0);
} else {
int childStatus;