From 3f02a3cf0bbc4e5530a3f36648be58bc7ad53d04 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Wed, 17 Feb 2016 17:52:25 +0200 Subject: [PATCH] Mapping public VRAM BO to cpu Change-Id: I2ff62ff0784f8ce556ad80739a177b90d866f1b4 Signed-off-by: Ben Goz --- src/fmm.c | 36 +++++++++++++++++++++++++++--------- src/fmm.h | 2 +- src/memory.c | 2 +- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/fmm.c b/src/fmm.c index 9e9b0a05e7..a9be3ecd17 100644 --- a/src/fmm.c +++ b/src/fmm.c @@ -728,36 +728,54 @@ static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, * (after base subtraction) won't be used */ #define GPUVM_APP_OFFSET 0x10000 -void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes) +void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags) { manageble_aperture_t *aperture; int32_t gpu_mem_id; - uint32_t flags, offset; + uint32_t ioc_flags, offset; + uint64_t size, mmap_offset; + void *mem; /* Retrieve gpu_mem id according to gpu_id */ gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id); if (gpu_mem_id < 0) return NULL; + size = MemorySizeInBytes; + if (topology_is_dgpu(get_device_id_by_gpu_id(gpu_id))) { - flags = KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE; + ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE; /* * TODO: Once VA limit is raised from 0x200000000 (8GB) use gpuvm_aperture. * In that way the host access range won't be used for local memory */ aperture = &svm.dgpu_aperture; offset = 0; + if (flags.ui32.AQLQueueMemory) { + size = MemorySizeInBytes * 2; + ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_DGPU_AQL_QUEUE_MEM; + } } else { - flags = KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE; + ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_APU_DEVICE; aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture; offset = GPUVM_APP_OFFSET; } - return __fmm_allocate_device(gpu_id, MemorySizeInBytes, - aperture, offset, NULL, - flags); - /* TODO: honor host access mem flag and map to user mode VM if - * needed */ + mem = __fmm_allocate_device(gpu_id, size, + aperture, offset, &mmap_offset, + ioc_flags); + + if (mem && flags.ui32.HostAccess) { + void *ret = mmap(mem, MemorySizeInBytes, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, kfd_fd , mmap_offset); + if (ret == MAP_FAILED) { + __fmm_release(mem, MemorySizeInBytes, aperture); + return NULL; + } + } + + return mem; } static void* fmm_allocate_host_cpu(uint64_t MemorySizeInBytes, diff --git a/src/fmm.h b/src/fmm.h index 9a106f90b4..792be36b56 100644 --- a/src/fmm.h +++ b/src/fmm.h @@ -50,7 +50,7 @@ void fmm_destroy_process_apertures(void); * Memory interface */ void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes); -void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes); +void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags); void* fmm_allocate_host(uint64_t MemorySizeInBytes, HsaMemFlags flags); void* fmm_open_graphic_handle(uint32_t gpu_id, int32_t graphic_device_handle, diff --git a/src/memory.c b/src/memory.c index a616cd413f..e45dd67ffe 100644 --- a/src/memory.c +++ b/src/memory.c @@ -142,7 +142,7 @@ hsaKmtAllocMemory( } if (gpu_id && MemFlags.ui32.NonPaged && !MemFlags.ui32.Scratch) { - *MemoryAddress = fmm_allocate_device(gpu_id, SizeInBytes); + *MemoryAddress = fmm_allocate_device(gpu_id, SizeInBytes, MemFlags); if (*MemoryAddress == NULL) return HSAKMT_STATUS_NO_MEMORY;