Switch to cleaned up memory management ioctls

Change-Id: Ib8971ef91138f2a051272b9b57f0ebd480e8e738
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
This commit is contained in:
Felix Kuehling
2017-05-01 19:19:38 -04:00
parent 3b2f064cbc
commit 5eb31b2ebe
2 changed files with 74 additions and 70 deletions
+28 -35
View File
@@ -156,12 +156,14 @@ struct kfd_ioctl_dbg_unregister_args {
uint32_t pad;
};
/* FIXME: This is not 32-on-64 safe, fixed upstream, we should match it */
struct kfd_ioctl_dbg_address_watch_args {
uint32_t gpu_id; /* to KFD */
uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
void *content_ptr; /* a pointer to the actual content */
};
/* FIXME: This is not 32-on-64 safe, fixed upstream, we should match it */
struct kfd_ioctl_dbg_wave_control_args {
uint32_t gpu_id; /* to KFD */
uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
@@ -217,7 +219,6 @@ struct kfd_ioctl_reset_event_args {
uint32_t pad;
};
struct kfd_memory_exception_failure {
bool NotPresent; /* Page not present or supervisor privilege */
bool ReadOnly; /* Write access to a read-only page */
@@ -226,6 +227,7 @@ struct kfd_memory_exception_failure {
};
/* memory exception data*/
/* FIXME: Make this 32-on-64 safe. Will break ABI. */
struct kfd_hsa_memory_exception_data {
uint64_t va;
uint32_t gpu_id;
@@ -252,7 +254,8 @@ struct kfd_ioctl_wait_events_args {
uint32_t wait_result; /* from KFD */
};
struct kfd_ioctl_alloc_memory_of_gpu_args {
/* FIXME: remove unused fields and make it 32-on-64 safe. Will break ABI. */
struct kfd_ioctl_alloc_memory_of_scratch_args {
uint64_t va_addr; /* to KFD */
uint64_t size; /* to KFD */
uint64_t handle; /* from KFD */
@@ -266,23 +269,15 @@ struct kfd_ioctl_free_memory_of_gpu_args {
struct kfd_ioctl_map_memory_to_gpu_args {
uint64_t handle; /* to KFD */
};
struct kfd_ioctl_map_memory_to_gpu_new_args {
uint64_t handle; /* to KFD */
uint32_t *device_ids_array; /* to KFD */
uint64_t device_ids_array_ptr; /* to KFD */
uint32_t device_ids_array_size; /* to KFD */
uint32_t pad;
};
struct kfd_ioctl_unmap_memory_from_gpu_args {
uint64_t handle; /* to KFD */
};
struct kfd_ioctl_unmap_memory_from_gpu_new_args {
uint64_t handle; /* to KFD */
uint32_t *device_ids_array; /* to KFD */
uint32_t device_ids_array_size; /* to KFD */
uint64_t handle; /* to KFD */
uint64_t device_ids_array_ptr; /* to KFD */
uint32_t device_ids_array_size; /* to KFD */
uint32_t pad;
};
@@ -295,6 +290,7 @@ struct kfd_ioctl_open_graphic_handle_args {
uint32_t pad;
};
/* FIXME: Make this 32-on-64 safe. Will break ABI. */
struct kfd_ioctl_set_process_dgpu_aperture_args {
uint32_t gpu_id;
uint64_t dgpu_base;
@@ -302,25 +298,30 @@ struct kfd_ioctl_set_process_dgpu_aperture_args {
};
/*
* GPU Memory allocation flags
* Memory types
*/
#define KFD_IOC_ALLOC_MEM_FLAGS_DGPU_HOST (1 << 0)
#define KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_DGPU_SCRATCH (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE (1 << 3)
#define KFD_IOC_ALLOC_MEM_FLAGS_APU_SCRATCH (1 << 4)
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
#define KFD_IOC_ALLOC_MEM_FLAGS_DGPU_AQL_QUEUE_MEM (1 << 5)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 6)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 7)
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 8)
/*
* Allocation flags attributes/access options.
*/
#define KFD_IOC_ALLOC_MEM_FLAGS_NONPAGED (1 << 31)
#define KFD_IOC_ALLOC_MEM_FLAGS_READONLY (1 << 30)
#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTE_ACCESS (1 << 26)
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 25)
struct kfd_ioctl_alloc_memory_of_gpu_new_args {
struct kfd_ioctl_alloc_memory_of_gpu_args {
uint64_t va_addr; /* to KFD */
uint64_t size; /* to KFD */
uint64_t handle; /* from KFD */
uint32_t gpu_id; /* to KFD */
uint64_t mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
uint32_t gpu_id; /* to KFD */
uint32_t flags;
};
@@ -495,7 +496,7 @@ struct kfd_ioctl_cross_memory_copy_args {
AMDKFD_IOWR(0x15, struct kfd_ioctl_open_graphic_handle_args)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH \
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_scratch_args)
#define AMDKFD_IOC_SET_CU_MASK \
AMDKFD_IOW(0x17, struct kfd_ioctl_set_cu_mask_args)
@@ -504,17 +505,9 @@ struct kfd_ioctl_cross_memory_copy_args {
AMDKFD_IOW(0x18, \
struct kfd_ioctl_set_process_dgpu_aperture_args)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_NEW \
AMDKFD_IOWR(0x19, struct kfd_ioctl_alloc_memory_of_gpu_new_args)
#define AMDKFD_IOC_SET_TRAP_HANDLER \
AMDKFD_IOW(0x1a, struct kfd_ioctl_set_trap_handler_args)
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU_NEW \
AMDKFD_IOWR(0x1b, struct kfd_ioctl_map_memory_to_gpu_new_args)
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW \
AMDKFD_IOWR(0x1c, struct kfd_ioctl_unmap_memory_from_gpu_new_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
AMDKFD_IOWR(0x1d, struct kfd_ioctl_get_process_apertures_new_args)
+46 -35
View File
@@ -700,7 +700,7 @@ static vm_object_t *fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
uint64_t *mmap_offset,
uint32_t flags)
{
struct kfd_ioctl_alloc_memory_of_gpu_new_args args;
struct kfd_ioctl_alloc_memory_of_gpu_args args;
struct kfd_ioctl_free_memory_of_gpu_args free_args;
vm_object_t *vm_obj = NULL;
@@ -711,14 +711,18 @@ static vm_object_t *fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
args.gpu_id = gpu_id;
args.size = ALIGN_UP(MemorySizeInBytes, aperture->align);
args.flags = flags;
args.flags = flags |
KFD_IOC_ALLOC_MEM_FLAGS_NONPAGED |
KFD_IOC_ALLOC_MEM_FLAGS_EXECUTE_ACCESS |
KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE;
args.va_addr = (uint64_t)mem;
if (flags == KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE)
if (!topology_is_dgpu(get_device_id_by_gpu_id(gpu_id)) &&
(flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
args.va_addr = VOID_PTRS_SUB(mem, aperture->base);
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
args.mmap_offset = *mmap_offset;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_NEW, &args))
if (kmtIoctl(kfd_fd, AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, &args))
return NULL;
/* Allocate object */
@@ -863,7 +867,7 @@ static void fmm_release_scratch(uint32_t gpu_id)
void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes)
{
manageble_aperture_t *aperture_phy;
struct kfd_ioctl_alloc_memory_of_gpu_args args;
struct kfd_ioctl_alloc_memory_of_scratch_args args;
int32_t gpu_mem_id;
void *mem = NULL;
uint64_t aligned_size = ALIGN_UP(MemorySizeInBytes, SCRATCH_ALIGN);
@@ -973,7 +977,7 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla
{
manageble_aperture_t *aperture;
int32_t gpu_mem_id;
uint32_t ioc_flags, offset;
uint32_t ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_VRAM, offset;
uint64_t size, mmap_offset;
void *mem;
vm_object_t *vm_obj = NULL;
@@ -985,16 +989,17 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla
size = MemorySizeInBytes;
if (flags.ui32.HostAccess)
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
if (topology_is_dgpu(get_device_id_by_gpu_id(gpu_id))) {
ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE;
aperture = &svm.dgpu_aperture;
offset = 0;
if (flags.ui32.AQLQueueMemory) {
size = MemorySizeInBytes * 2;
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_DGPU_AQL_QUEUE_MEM;
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM;
}
} else {
ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE;
aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture;
offset = GPUVM_APP_OFFSET;
}
@@ -1135,7 +1140,7 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes,
}
if (flags.ui32.AQLQueueMemory) {
size = MemorySizeInBytes * 2;
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_DGPU_AQL_QUEUE_MEM;
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM;
}
/* Paged memory is allocated as a userptr mapping, non-paged
@@ -1183,7 +1188,7 @@ static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes,
return NULL;
}
} else {
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_DGPU_HOST;
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_GTT;
mem = __fmm_allocate_device(gpu_id, size,
aperture, 0, &mmap_offset,
ioc_flags, &vm_obj);
@@ -1235,7 +1240,7 @@ void *fmm_open_graphic_handle(uint32_t gpu_id,
void *mem = NULL;
int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
struct kfd_ioctl_open_graphic_handle_args open_graphic_handle_args;
struct kfd_ioctl_unmap_memory_from_gpu_new_args unmap_args;
struct kfd_ioctl_unmap_memory_from_gpu_args unmap_args;
/* If not found or aperture isn't properly initialized/supported */
if (i < 0 || !aperture_is_valid(gpu_mem[i].gpuvm_aperture.base,
@@ -1273,9 +1278,9 @@ void *fmm_open_graphic_handle(uint32_t gpu_id,
release_mem:
unmap_args.handle = open_graphic_handle_args.handle;
unmap_args.device_ids_array = NULL;
unmap_args.device_ids_array_ptr = 0;
unmap_args.device_ids_array_size = 0;
kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW, &unmap_args);
kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &unmap_args);
release_area:
aperture_release_area(&gpu_mem[i].gpuvm_aperture, mem,
MemorySizeInBytes);
@@ -1660,7 +1665,7 @@ HSAKMT_STATUS fmm_get_aperture_base_and_limit(aperture_type_e aperture_type, HSA
static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
void *address, uint64_t size, vm_object_t *obj)
{
struct kfd_ioctl_map_memory_to_gpu_new_args args;
struct kfd_ioctl_map_memory_to_gpu_args args;
vm_object_t *object;
void *temp_mapped_id_array = NULL;
@@ -1686,10 +1691,11 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
args.handle = object->handle;
if (object->registered_device_id_array_size > 0) {
args.device_ids_array = object->registered_device_id_array;
args.device_ids_array_ptr =
(uint64_t)object->registered_device_id_array;
args.device_ids_array_size = object->registered_device_id_array_size;
} else {
args.device_ids_array = all_gpu_id_array;
args.device_ids_array_ptr = (uint64_t)all_gpu_id_array;
args.device_ids_array_size = all_gpu_id_array_size;
}
@@ -1697,10 +1703,11 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
if (!temp_mapped_id_array)
goto err_object_not_found;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU_NEW, &args))
if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args))
goto err_map_ioctl_failed;
print_device_id_array(args.device_ids_array, args.device_ids_array_size);
print_device_id_array((void *)args.device_ids_array_ptr,
args.device_ids_array_size);
if (object->mapped_device_id_array != NULL &&
object->mapped_device_id_array_size > 0 &&
@@ -1708,7 +1715,8 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
object->mapped_device_id_array != object->registered_device_id_array)
free(object->mapped_device_id_array);
memcpy(temp_mapped_id_array, args.device_ids_array, args.device_ids_array_size);
memcpy(temp_mapped_id_array, (void *)args.device_ids_array_ptr,
args.device_ids_array_size);
object->mapped_device_id_array = temp_mapped_id_array;
object->mapped_device_id_array_size = args.device_ids_array_size;
object->mapping_count = 1;
@@ -1755,7 +1763,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu
if (!ret && !is_debugger) {
vm_object_t *obj = fmm_allocate_memory_in_device(
gpu_id, address, size, aperture,
NULL, KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE);
NULL, KFD_IOC_ALLOC_MEM_FLAGS_VRAM);
if (obj == NULL)
return -1;
} else {
@@ -1764,7 +1772,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu
size,
aperture,
&mmap_offset,
KFD_IOC_ALLOC_MEM_FLAGS_DGPU_HOST);
KFD_IOC_ALLOC_MEM_FLAGS_GTT);
mmap_ret = mmap(address, size,
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED,
@@ -1788,7 +1796,7 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
void *address, uint64_t size,
uint64_t *gpuvm_address)
{
struct kfd_ioctl_map_memory_to_gpu_new_args args;
struct kfd_ioctl_map_memory_to_gpu_args args;
vm_object_t *object;
void *temp_mapped_id_array = NULL;
@@ -1806,10 +1814,11 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
args.handle = object->handle;
if (object->registered_device_id_array_size > 0 &&
object->registered_device_id_array) {
args.device_ids_array = object->registered_device_id_array;
args.device_ids_array_ptr =
(uint64_t)object->registered_device_id_array;
args.device_ids_array_size = object->registered_device_id_array_size;
} else {
args.device_ids_array = all_gpu_id_array;
args.device_ids_array_ptr = (uint64_t)all_gpu_id_array;
args.device_ids_array_size = all_gpu_id_array_size;
}
@@ -1817,7 +1826,7 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
if (!temp_mapped_id_array)
goto err_object_not_found;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU_NEW, &args))
if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args))
goto err_map_ioctl_failed;
if (object->mapped_device_id_array != NULL &&
@@ -1826,7 +1835,8 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
object->mapped_device_id_array != object->registered_device_id_array)
free(object->mapped_device_id_array);
memcpy(temp_mapped_id_array, args.device_ids_array, args.device_ids_array_size);
memcpy(temp_mapped_id_array, (void *)args.device_ids_array_ptr,
args.device_ids_array_size);
object->mapped_device_id_array = temp_mapped_id_array;
object->mapped_device_id_array_size = args.device_ids_array_size;
@@ -1958,7 +1968,7 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
{
vm_object_t *object;
int ret = 0;
struct kfd_ioctl_unmap_memory_from_gpu_new_args args;
struct kfd_ioctl_unmap_memory_from_gpu_args args;
HSAuint32 page_offset = (HSAint64)address & (PAGE_SIZE - 1);
if (!obj)
@@ -1982,10 +1992,10 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
args.handle = object->handle;
if (device_ids_array && device_ids_array_size > 0) {
args.device_ids_array = device_ids_array;
args.device_ids_array_ptr = (uint64_t)device_ids_array;
args.device_ids_array_size = device_ids_array_size;
} else if (object->mapped_device_id_array_size > 0) {
args.device_ids_array = object->mapped_device_id_array;
args.device_ids_array_ptr = (uint64_t)object->mapped_device_id_array;
args.device_ids_array_size = object->mapped_device_id_array_size;
} else {
/*
@@ -1997,9 +2007,10 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
goto out;
}
print_device_id_array(args.device_ids_array, args.device_ids_array_size);
print_device_id_array((void *)args.device_ids_array_ptr,
args.device_ids_array_size);
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW, &args);
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &args);
if (ret != 0)
goto out;
@@ -2029,7 +2040,7 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id,
{
int32_t gpu_mem_id;
vm_object_t *object;
struct kfd_ioctl_unmap_memory_from_gpu_new_args args;
struct kfd_ioctl_unmap_memory_from_gpu_args args;
/* Retrieve gpu_mem id according to gpu_id */
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
@@ -2055,9 +2066,9 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id,
/* unmap from GPU */
args.handle = object->handle;
args.device_ids_array = object->mapped_device_id_array;
args.device_ids_array_ptr = (uint64_t)object->mapped_device_id_array;
args.device_ids_array_size = object->mapped_device_id_array_size;
kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW, &args);
kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &args);
/* Clearing all mapped nodes list */
if (object->mapped_device_id_array != NULL &&