libhsakmt: Try to use CPU addr as GPU addr for userptrs
If the CPU addr of a userptr is accessible by the GPU, try to use it
instead of allocating a different GPU address. If something else is
already registered with an overlapping address range, we still need to
allocate a GPU address, because KFD does not support overlapping GPUVM
mappings.
Change-Id: I452963ee45a454f735755a0b43122b9aee5d55be
Signed-off-by: Felix Kuehling <felix.kuehling@gmail.com>
[ROCm/ROCR-Runtime commit: ab181c46c0]
Этот коммит содержится в:
коммит произвёл
Felix Kuehling
родитель
3dfb956bd5
Коммит
84bb9072c0
@@ -502,6 +502,38 @@ loop:
|
||||
return cur; /* NULL if not found */
|
||||
}
|
||||
|
||||
/* Returns true if there is any object in GPU VM address space
|
||||
* overlapping the specified address range
|
||||
*/
|
||||
static bool vm_exists_overlapping_object(manageable_aperture_t *aper,
|
||||
const void *addr, uint64_t size)
|
||||
{
|
||||
unsigned long start_addr = (unsigned long)addr;
|
||||
unsigned long end_addr = start_addr + size;
|
||||
rbtree_t *tree = vm_object_tree(aper, 0);
|
||||
rbtree_key_t start_key, end_key;
|
||||
rbtree_node_t *rn_start, *ln_end;
|
||||
vm_object_t *cur;
|
||||
|
||||
start_key = rbtree_key(start_addr, 0);
|
||||
rn_start = rbtree_lookup_nearest(tree, &start_key, LKP_ALL, RIGHT);
|
||||
if (rn_start) {
|
||||
cur = vm_object_entry(rn_start, 0);
|
||||
if ((unsigned long)cur->start < end_addr)
|
||||
return true;
|
||||
}
|
||||
|
||||
end_key = rbtree_key(end_addr, 0);
|
||||
ln_end = rbtree_lookup_nearest(tree, &end_key, LKP_ALL, LEFT);
|
||||
if (ln_end) {
|
||||
cur = vm_object_entry(ln_end, 0);
|
||||
if ((unsigned long)cur->start + cur->size > start_addr)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static vm_object_t *vm_find_object_by_address(manageable_aperture_t *app,
|
||||
const void *address, uint64_t size)
|
||||
{
|
||||
@@ -1136,7 +1168,7 @@ static void *__fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
manageable_aperture_t *aperture, uint64_t *mmap_offset,
|
||||
uint32_t flags, vm_object_t **vm_obj)
|
||||
{
|
||||
void *mem = NULL;
|
||||
void *mem = NULL, *userpage = NULL;
|
||||
vm_object_t *obj;
|
||||
|
||||
/* Check that aperture is properly initialized/supported */
|
||||
@@ -1145,9 +1177,27 @@ static void *__fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
|
||||
/* Allocate address space */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
mem = aperture_allocate_area(aperture, MemorySizeInBytes);
|
||||
/* If it's a userptr within the GPU-addressable address space,
|
||||
* and no object is registered yet in the same address range,
|
||||
* then use the CPU address as GPU address
|
||||
*/
|
||||
if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) && mmap_offset) {
|
||||
userpage = (void *)*mmap_offset;
|
||||
if (userpage >= aperture->base &&
|
||||
VOID_PTR_ADD(userpage, MemorySizeInBytes - 1) <= aperture->limit &&
|
||||
!vm_exists_overlapping_object(aperture, userpage, MemorySizeInBytes))
|
||||
mem = userpage;
|
||||
}
|
||||
if (!mem)
|
||||
mem = aperture_allocate_area(aperture, MemorySizeInBytes);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
if (!mem) {
|
||||
pr_err("Failed to allocate %ld bytes virtual address space\n",
|
||||
MemorySizeInBytes);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we have the area reserved, allocate memory in the device
|
||||
* itself
|
||||
@@ -1159,9 +1209,11 @@ static void *__fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
* allocation of memory in device failed.
|
||||
* Release region in aperture
|
||||
*/
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
aperture_release_area(aperture, mem, MemorySizeInBytes);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
if (mem != userpage) {
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
aperture_release_area(aperture, mem, MemorySizeInBytes);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
}
|
||||
|
||||
/* Assign NULL to mem to indicate failure to calling function */
|
||||
mem = NULL;
|
||||
@@ -1465,7 +1517,12 @@ static void __fmm_release(vm_object_t *object, manageable_aperture_t *aperture)
|
||||
args.handle = object->handle;
|
||||
kmtIoctl(kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &args);
|
||||
|
||||
aperture_release_area(aperture, object->start, object->size);
|
||||
/* Userptrs with CPU addr == GPU addr don't have aperture
|
||||
* space allocated to them
|
||||
*/
|
||||
if (!object->userptr ||
|
||||
(void *)PAGE_ALIGN_DOWN(object->userptr) != object->start)
|
||||
aperture_release_area(aperture, object->start, object->size);
|
||||
vm_remove_object(aperture, object);
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
@@ -65,8 +65,10 @@ extern int PAGE_SHIFT;
|
||||
do { if ((uint64_t)PORT_VPTR_TO_UINT64(x) % PAGE_SIZE) return HSAKMT_STATUS_INVALID_PARAMETER; } while(0)
|
||||
|
||||
#define ALIGN_UP(x,align) (((uint64_t)(x) + (align) - 1) & ~(uint64_t)((align)-1))
|
||||
#define ALIGN_DOWN(x,align) ((uint64_t)(x) & ~(uint64_t)((align)-1))
|
||||
#define ALIGN_UP_32(x,align) (((uint32_t)(x) + (align) - 1) & ~(uint32_t)((align)-1))
|
||||
#define PAGE_ALIGN_UP(x) ALIGN_UP(x,PAGE_SIZE)
|
||||
#define PAGE_ALIGN_UP(x) ALIGN_UP((x),PAGE_SIZE)
|
||||
#define PAGE_ALIGN_DOWN(x) ALIGN_DOWN((x),PAGE_SIZE)
|
||||
#define BITMASK(n) (((n) < sizeof(1ULL) * CHAR_BIT ? (1ULL << (n)) : 0) - 1ULL)
|
||||
#define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user