Changes to run on old kernels

Fall back to older apertures API and old events page size if the new APIs
fail. This allows running on current upstream kernels (with only minor
fixes) on gfx801 and enables testing of further changes during upstreaming.

Change-Id: I9d86d4f576e52fcbb5bc158d80f1bf41261e4e87
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
This commit is contained in:
Felix Kuehling
2017-08-05 00:45:50 -04:00
والد d0e2872011
کامیت 78e683acf4
2فایلهای تغییر یافته به همراه49 افزوده شده و 13 حذف شده
+10 -2
مشاهده پرونده
@@ -51,6 +51,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
bool ManualReset, bool IsSignaled,
HsaEvent **Event)
{
unsigned int event_limit = KFD_SIGNAL_EVENT_LIMIT;
CHECK_KFD_OPEN();
if (EventDesc->EventType >= HSA_EVENTTYPE_MAXID)
@@ -94,8 +96,14 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
e->EventId = args.event_id;
if (!events_page && args.event_page_offset > 0) {
events_page = mmap(NULL, KFD_SIGNAL_EVENT_LIMIT * 8, PROT_WRITE | PROT_READ,
events_page = mmap(NULL, event_limit * 8, PROT_WRITE | PROT_READ,
MAP_SHARED, kfd_fd, args.event_page_offset);
if (events_page == MAP_FAILED) {
/* old kernels only support 256 events */
event_limit = 256;
events_page = mmap(NULL, PAGE_SIZE, PROT_WRITE | PROT_READ,
MAP_SHARED, kfd_fd, args.event_page_offset);
}
if (events_page == MAP_FAILED) {
events_page = NULL;
pthread_mutex_unlock(&hsakmt_mutex);
@@ -106,7 +114,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateEvent(HsaEventDescriptor *EventDesc,
pthread_mutex_unlock(&hsakmt_mutex);
if (args.event_page_offset > 0 && args.event_slot_index < KFD_SIGNAL_EVENT_LIMIT)
if (args.event_page_offset > 0 && args.event_slot_index < event_limit)
e->EventData.HWData2 = (HSAuint64)&events_page[args.event_slot_index];
e->EventData.EventType = EventDesc->EventType;
+39 -11
مشاهده پرونده
@@ -1382,14 +1382,45 @@ static uint32_t get_vm_alignment(uint32_t device_id)
return MAX(PAGE_SIZE, page_size);
}
static HSAKMT_STATUS get_process_apertures(
struct kfd_process_device_apertures *process_apertures,
uint32_t *num_of_nodes)
{
struct kfd_ioctl_get_process_apertures_new_args args_new = {
.kfd_process_device_apertures_ptr = (uintptr_t)process_apertures,
.num_of_nodes = *num_of_nodes
};
struct kfd_ioctl_get_process_apertures_args args_old;
if (!kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
(void *)&args_new)) {
*num_of_nodes = args_new.num_of_nodes;
return HSAKMT_STATUS_SUCCESS;
}
/* New IOCTL failed, try the old one in case we're running on
* a really old kernel */
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES,
(void *)&args_old))
return HSAKMT_STATUS_ERROR;
if (args_old.num_of_nodes < *num_of_nodes)
*num_of_nodes = args_old.num_of_nodes;
memcpy(process_apertures, args_old.process_apertures,
sizeof(*process_apertures) * *num_of_nodes);
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
{
struct kfd_ioctl_get_process_apertures_new_args args;
uint32_t i = 0;
int32_t gpu_mem_id = 0;
uint32_t gpu_id;
HsaNodeProperties props;
struct kfd_process_device_apertures *process_apertures;
uint32_t num_of_nodes;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
char *disableCache, *pagedUserptr, *checkUserptr, *guardPagesStr;
unsigned int guardPages = 1;
@@ -1463,31 +1494,28 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
* required since Number of nodes is already known. Kernel will fill in
* the apertures in kfd_process_device_apertures_ptr
*/
process_apertures = malloc(gpu_mem_count * sizeof(struct kfd_process_device_apertures));
num_of_nodes = gpu_mem_count;
process_apertures = malloc(num_of_nodes * sizeof(struct kfd_process_device_apertures));
if (!process_apertures) {
ret = HSAKMT_STATUS_NO_MEMORY;
goto sysfs_parse_failed;
}
args.kfd_process_device_apertures_ptr = (uintptr_t)process_apertures;
args.num_of_nodes = gpu_mem_count;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, (void *)&args)) {
ret = HSAKMT_STATUS_ERROR;
ret = get_process_apertures(process_apertures, &num_of_nodes);
if (ret != HSAKMT_STATUS_SUCCESS)
goto get_aperture_ioctl_failed;
}
all_gpu_id_array_size = 0;
all_gpu_id_array = NULL;
if (args.num_of_nodes > 0) {
all_gpu_id_array = malloc(sizeof(uint32_t) * args.num_of_nodes);
if (num_of_nodes > 0) {
all_gpu_id_array = malloc(sizeof(uint32_t) * num_of_nodes);
if (!all_gpu_id_array) {
ret = HSAKMT_STATUS_NO_MEMORY;
goto get_aperture_ioctl_failed;
}
}
for (i = 0 ; i < args.num_of_nodes ; i++) {
for (i = 0 ; i < num_of_nodes ; i++) {
/* Map Kernel process device data node i <--> gpu_mem_id which
* indexes into gpu_mem[] based on gpu_id
*/