diff --git a/src/events.c b/src/events.c index d934391c6d..876bc846d8 100644 --- a/src/events.c +++ b/src/events.c @@ -51,6 +51,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, bool ManualReset, bool IsSignaled, HsaEvent **Event) { + unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT; + CHECK_KFD_OPEN(); if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID) @@ -94,8 +96,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, e->EventId = args.event_id; if (!events_page && args.event_page_offset > 0) { - events_page = mmap(NULL, KFD_SIGNAL_EVENT_LIMIT * 8, PROT_WRITE | PROT_READ, + events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ, MAP_SHARED, kfd_fd, args.event_page_offset); + if (events_page == MAP_FAILED) { + /* old kernels only support 256 events */ + event_limit = 256; + events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ, + MAP_SHARED, kfd_fd, args.event_page_offset); + } if (events_page == MAP_FAILED) { events_page = NULL; pthread_mutex_unlock(&hsakmt_mutex); @@ -106,7 +114,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc, pthread_mutex_unlock(&hsakmt_mutex); - if (args.event_page_offset > 0 && args.event_slot_index < KFD_SIGNAL_EVENT_LIMIT) + if (args.event_page_offset > 0 && args.event_slot_index < event_limit) e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index]; e->EventData.EventType = EventDesc->EventType; diff --git a/src/fmm.c b/src/fmm.c index 5695ece5b4..0b73dd413f 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -1382,14 +1382,45 @@ static uint32_t get_vm_alignment(uint32_t device_id) return MAX(PAGE_SIZE, page_size); } +static HSAKMT_STATUS get_process_apertures( + struct kfd_process_device_apertures *process_apertures, + uint32_t *num_of_nodes) +{ + struct kfd_ioctl_get_process_apertures_new_args args_new = { + .kfd_process_device_apertures_ptr = (uintptr_t)process_apertures, + .num_of_nodes = *num_of_nodes + }; + struct kfd_ioctl_get_process_apertures_args args_old; + + if (!kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, + (void *)&args_new)) { + *num_of_nodes = args_new.num_of_nodes; + return HSAKMT_STATUS_SUCCESS; + } + + /* New IOCTL failed, try the old one in case we're running on + * a really old kernel */ + if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, + (void *)&args_old)) + return HSAKMT_STATUS_ERROR; + + if (args_old.num_of_nodes < *num_of_nodes) + *num_of_nodes = args_old.num_of_nodes; + + memcpy(process_apertures, args_old.process_apertures, + sizeof(*process_apertures) * *num_of_nodes); + + return HSAKMT_STATUS_SUCCESS; +} + HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) { - struct kfd_ioctl_get_process_apertures_new_args args; uint32_t i = 0; int32_t gpu_mem_id = 0; uint32_t gpu_id; HsaNodeProperties props; struct kfd_process_device_apertures *process_apertures; + uint32_t num_of_nodes; HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS; char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr; unsigned int guardPages = 1; @@ -1463,31 +1494,28 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes) * required since Number of nodes is already known. Kernel will fill in * the apertures in kfd_process_device_apertures_ptr */ - process_apertures = malloc(gpu_mem_count * sizeof(struct kfd_process_device_apertures)); + num_of_nodes = gpu_mem_count; + process_apertures = malloc(num_of_nodes * sizeof(struct kfd_process_device_apertures)); if (!process_apertures) { ret = HSAKMT_STATUS_NO_MEMORY; goto sysfs_parse_failed; } - args.kfd_process_device_apertures_ptr = (uintptr_t)process_apertures; - args.num_of_nodes = gpu_mem_count; - - if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, (void *)&args)) { - ret = HSAKMT_STATUS_ERROR; + ret = get_process_apertures(process_apertures, &num_of_nodes); + if (ret != HSAKMT_STATUS_SUCCESS) goto get_aperture_ioctl_failed; - } all_gpu_id_array_size = 0; all_gpu_id_array = NULL; - if (args.num_of_nodes > 0) { - all_gpu_id_array = malloc(sizeof(uint32_t) * args.num_of_nodes); + if (num_of_nodes > 0) { + all_gpu_id_array = malloc(sizeof(uint32_t) * num_of_nodes); if (!all_gpu_id_array) { ret = HSAKMT_STATUS_NO_MEMORY; goto get_aperture_ioctl_failed; } } - for (i = 0 ; i < args.num_of_nodes ; i++) { + for (i = 0 ; i < num_of_nodes ; i++) { /* Map Kernel process device data node i <--> gpu_mem_id which * indexes into gpu_mem[] based on gpu_id */