From 78e683acf48d670fcc9eeef6f144ab3503caa8e5 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Sat, 5 Aug 2017 00:45:50 -0400 Subject: [PATCH] Changes to run on old kernels Fall back to older apertures API and old events page size if the new APIs fail. This allows running on current upstream kernels (with only minor fixes) on gfx801 and enables testing of further changes during upstreaming. Change-Id: I9d86d4f576e52fcbb5bc158d80f1bf41261e4e87 Signed-off-by: Felix Kuehling --- src/events.c | 12 ++++++++++-- src/fmm.c | 50 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/src/events.c b/src/events.c index d934391c6d..876bc846d8 100644 --- a/src/events.c +++ b/src/events.c @@ -51,6 +51,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, bool ManualReset, bool IsSignaled, HsaEvent **Event) { + unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT; + CHECK_KFD_OPEN(); if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID) @@ -94,8 +96,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, e->EventId = args.event_id; if (!events_page && args.event_page_offset > 0) { - events_page = mmap(NULL, KFD_SIGNAL_EVENT_LIMIT * 8, PROT_WRITE | PROT_READ, + events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ, MAP_SHARED, kfd_fd, args.event_page_offset); + if (events_page == MAP_FAILED) { + /* old kernels only support 256 events */ + event_limit = 256; + events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ, + MAP_SHARED, kfd_fd, args.event_page_offset); + } if (events_page == MAP_FAILED) { events_page = NULL; pthread_mutex_unlock(&hsakmt_mutex); @@ -106,7 +114,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, pthread_mutex_unlock(&hsakmt_mutex); - if (args.event_page_offset > 0 && args.event_slot_index < KFD_SIGNAL_EVENT_LIMIT) + if (args.event_page_offset > 0 && args.event_slot_index < event_limit) e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index]; e->EventData.EventType = EventDesc->EventType; diff --git a/src/fmm.c b/src/fmm.c index 5695ece5b4..0b73dd413f 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -1382,14 +1382,45 @@ static uint32_t get_vm_alignment(uint32_t device_id) return MAX(PAGE_SIZE, page_size); } +static HSAKMT_STATUS get_process_apertures( + struct kfd_process_device_apertures *process_apertures, + uint32_t *num_of_nodes) +{ + struct kfd_ioctl_get_process_apertures_new_args args_new = { + .kfd_process_device_apertures_ptr = (uintptr_t)process_apertures, + .num_of_nodes = *num_of_nodes + }; + struct kfd_ioctl_get_process_apertures_args args_old; + + if (!kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, + (void *)&args_new)) { + *num_of_nodes = args_new.num_of_nodes; + return HSAKMT_STATUS_SUCCESS; + } + + /* New IOCTL failed, try the old one in case we're running on + * a really old kernel */ + if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, + (void *)&args_old)) + return HSAKMT_STATUS_ERROR; + + if (args_old.num_of_nodes < *num_of_nodes) + *num_of_nodes = args_old.num_of_nodes; + + memcpy(process_apertures, args_old.process_apertures, + sizeof(*process_apertures) * *num_of_nodes); + + return HSAKMT_STATUS_SUCCESS; +} + HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) { - struct kfd_ioctl_get_process_apertures_new_args args; uint32_t i = 0; int32_t gpu_mem_id = 0; uint32_t gpu_id; HsaNodeProperties props; struct kfd_process_device_apertures *process_apertures; + uint32_t num_of_nodes; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr; unsigned int guardPages = 1; @@ -1463,31 +1494,28 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) * required since Number of nodes is already known. Kernel will fill in * the apertures in kfd_process_device_apertures_ptr */ - process_apertures = malloc(gpu_mem_count * sizeof(struct kfd_process_device_apertures)); + num_of_nodes = gpu_mem_count; + process_apertures = malloc(num_of_nodes * sizeof(struct kfd_process_device_apertures)); if (!process_apertures) { ret = HSAKMT_STATUS_NO_MEMORY; goto sysfs_parse_failed; } - args.kfd_process_device_apertures_ptr = (uintptr_t)process_apertures; - args.num_of_nodes = gpu_mem_count; - - if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, (void *)&args)) { - ret = HSAKMT_STATUS_ERROR; + ret = get_process_apertures(process_apertures, &num_of_nodes); + if (ret != HSAKMT_STATUS_SUCCESS) goto get_aperture_ioctl_failed; - } all_gpu_id_array_size = 0; all_gpu_id_array = NULL; - if (args.num_of_nodes > 0) { - all_gpu_id_array = malloc(sizeof(uint32_t) * args.num_of_nodes); + if (num_of_nodes > 0) { + all_gpu_id_array = malloc(sizeof(uint32_t) * num_of_nodes); if (!all_gpu_id_array) { ret = HSAKMT_STATUS_NO_MEMORY; goto get_aperture_ioctl_failed; } } - for (i = 0 ; i < args.num_of_nodes ; i++) { + for (i = 0 ; i < num_of_nodes ; i++) { /* Map Kernel process device data node i <--> gpu_mem_id which * indexes into gpu_mem[] based on gpu_id */