Allow a memory to be registered multiple times

A memory region is allowed to be registered multiple times when the memory
is specified by a user pointer. If it's registered with the same user
pointer but with different sizes, it's treated as different instances and
multiple VM objects are created with different GPU address.


Change-Id: I49627111bb5db36d18f1133b252fb62a611f06a4
Этот коммит содержится в:
Amber Lin
2016-11-09 14:54:17 -05:00
родитель a1f417715b
Коммит 2a50ebba98
+106 -42
Просмотреть файл
@@ -65,12 +65,15 @@ struct vm_object {
uint32_t *registered_device_id_array;
uint32_t registered_device_id_array_size;
uint32_t *registered_node_id_array;
uint32_t registration_count; /* the same memory region can be
registered multiple times */
/*
* Nodes that mapped already
*/
uint32_t *mapped_device_id_array;
uint32_t mapped_device_id_array_size;
uint32_t *mapped_node_id_array;
uint32_t mapping_count;
/* Metadata of imported graphics buffers */
void *metadata;
/* User data associated with the memory */
@@ -204,6 +207,8 @@ static vm_object_t *vm_create_and_init_object(void *start, uint64_t size,
object->mapped_device_id_array_size = 0;
object->registered_node_id_array = NULL;
object->mapped_node_id_array = NULL;
object->registration_count = 0;
object->mapping_count = 0;
object->flags = flags;
object->metadata = NULL;
object->user_data = NULL;
@@ -355,18 +360,36 @@ static vm_object_t *vm_find_object_by_address_range(manageble_aperture_t *app,
}
static vm_object_t *vm_find_object_by_userptr(manageble_aperture_t *app,
const void *address)
const void *address, HSAuint64 size)
{
vm_object_t *cur = app->vm_objects;
vm_object_t *cur = app->vm_objects, *obj;
uint32_t found = 0;
/* Look up the appropriate address range containing the given address */
/* Look up the userptr that matches the address. If size is specified,
the size needs to match too. */
while (cur) {
if (cur->userptr == address)
if ((cur->userptr == address) &&
((cur->userptr_size == size) || !size)) {
found = 1;
break;
}
cur = cur->next;
}
return cur; /* NULL if not found */
/* If size is not specified, we need to ensure the vm_obj found is the
only obj having this address. */
if (found && !size) {
obj = cur->next;
while (obj) {
if (obj->userptr == address) {
cur = NULL;
break;
}
obj = obj->next;
}
}
return cur; /* NULL if any look-up failure */
}
static vm_object_t *vm_find_object_by_userptr_range(manageble_aperture_t *app,
@@ -1480,6 +1503,15 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
goto err_object_not_found;
}
/* For a memory region that is registered by user pointer, changing
* mapping nodes is not allowed, so we don't need to check the mapping
* nodes or map if it's already mapped. Just increase the reference.
*/
if (object->userptr && object->mapping_count) {
++object->mapping_count;
goto exit_ok;
}
args.handle = object->handle;
if (object->registered_device_id_array_size > 0) {
args.device_ids_array = object->registered_device_id_array;
@@ -1507,7 +1539,9 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
memcpy(temp_mapped_id_array, args.device_ids_array, args.device_ids_array_size);
object->mapped_device_id_array = temp_mapped_id_array;
object->mapped_device_id_array_size = args.device_ids_array_size;
object->mapping_count = 1;
exit_ok:
if (!obj)
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -1669,7 +1703,7 @@ static int _fmm_map_to_gpu_userptr(void *addr, uint64_t size,
obj = object;
if (!obj) {
obj = vm_find_object_by_userptr(aperture, addr);
obj = vm_find_object_by_userptr(aperture, addr, size);
if (obj == NULL) {
pthread_mutex_unlock(&aperture->fmm_mutex);
return HSAKMT_STATUS_ERROR;
@@ -1761,8 +1795,9 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
vm_object_t *obj)
{
vm_object_t *object;
int ret;
int ret = 0;
struct kfd_ioctl_unmap_memory_from_gpu_new_args args;
HSAuint32 page_offset = (HSAint64)address & (PAGE_SIZE - 1);
if (!obj)
pthread_mutex_lock(&aperture->fmm_mutex);
@@ -1770,13 +1805,19 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
/* Find the object to retrieve the handle */
object = obj;
if (!object) {
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_address(aperture,
VOID_PTR_SUB(address, page_offset), 0);
if (!object) {
ret = -1;
goto err;
goto out;
}
}
if (object->userptr && object->mapping_count > 1) {
--object->mapping_count;
goto out;
}
args.handle = object->handle;
if (device_ids_array && device_ids_array_size > 0) {
args.device_ids_array = device_ids_array;
@@ -1791,14 +1832,14 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
* need to deploy the change on there side before thunk fails on this case.
*/
ret = 0;
goto err;
goto out;
}
print_device_id_array(args.device_ids_array, args.device_ids_array_size);
ret = kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW, &args);
if (ret != 0)
goto err;
goto out;
/* Clearing all mapped nodes list */
if (object->mapped_device_id_array != NULL &&
@@ -1812,12 +1853,9 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
if (object->mapped_node_id_array)
free(object->mapped_node_id_array);
object->mapped_node_id_array = NULL;
object->mapping_count = 0;
if (!obj)
pthread_mutex_unlock(&aperture->fmm_mutex);
return 0;
err:
out:
if (!obj)
pthread_mutex_unlock(&aperture->fmm_mutex);
return ret;
@@ -1894,7 +1932,7 @@ static int _fmm_unmap_from_gpu_userptr(void *addr)
/* Find the start address in SVM space for GPU unmapping */
pthread_mutex_lock(&aperture->fmm_mutex);
obj = vm_find_object_by_userptr(aperture, addr);
obj = vm_find_object_by_userptr(aperture, addr, 0);
if (obj == NULL) {
pthread_mutex_unlock(&aperture->fmm_mutex);
return HSAKMT_STATUS_ERROR;
@@ -2160,10 +2198,14 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj
/* Check if this address was already registered */
pthread_mutex_lock(&aperture->fmm_mutex);
obj = vm_find_object_by_userptr(aperture, addr);
obj = vm_find_object_by_userptr(aperture, addr, size);
if (obj != NULL) {
++obj->registration_count;
pthread_mutex_unlock(&aperture->fmm_mutex);
*obj_ret = obj;
return HSAKMT_STATUS_SUCCESS;
}
pthread_mutex_unlock(&aperture->fmm_mutex);
if (obj != NULL)
return HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED;
/* Allocate BO, userptr address is passed in mmap_offset */
svm_addr = __fmm_allocate_device(gpu_id, aligned_size, aperture, 0,
@@ -2176,6 +2218,7 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj
obj->userptr = addr;
gpuid_to_nodeid(gpu_id, &obj->node_id);
obj->userptr_size = size;
obj->registration_count = 1;
pthread_mutex_unlock(&aperture->fmm_mutex);
}
else
@@ -2225,8 +2268,18 @@ HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes,
if (!object)
return HSAKMT_STATUS_NOT_SUPPORTED;
if (object->registered_device_id_array_size > 0)
return HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED;
if (object->registered_device_id_array_size > 0) {
/* Multiple registration is allowed, but not changing nodes */
if ((gpu_id_array_size != object->registered_device_id_array_size)
|| memcmp(object->registered_device_id_array,
gpu_id_array, gpu_id_array_size)) {
fprintf(stderr,
"Error. Changing nodes in a registered addr.\n");
return HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED;
}
else
return HSAKMT_STATUS_SUCCESS;
}
if (gpu_id_array_size > 0) {
object->registered_device_id_array = gpu_id_array;
@@ -2345,10 +2398,10 @@ static HSAKMT_STATUS fmm_deregister_user_memory(void *addr)
/* Find the size and start address in SVM space */
pthread_mutex_lock(&aperture->fmm_mutex);
obj = vm_find_object_by_userptr(aperture, addr);
if (obj == NULL) {
obj = vm_find_object_by_userptr(aperture, addr, 0);
if ((obj == NULL) || (obj->registration_count > 1)) {
pthread_mutex_unlock(&aperture->fmm_mutex);
return HSAKMT_STATUS_MEMORY_NOT_REGISTERED;
return HSAKMT_STATUS_ERROR;
}
svm_addr = obj->start;
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -2364,6 +2417,7 @@ HSAKMT_STATUS fmm_deregister_memory(void *address)
manageble_aperture_t *aperture = NULL;
vm_object_t *object = NULL;
unsigned i;
HSAuint32 page_offset = (HSAint64)address & (PAGE_SIZE - 1);
if ((address >= svm.dgpu_aperture.base) &&
(address <= svm.dgpu_aperture.limit))
@@ -2399,18 +2453,24 @@ HSAKMT_STATUS fmm_deregister_memory(void *address)
pthread_mutex_lock(&aperture->fmm_mutex);
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_address(aperture,
VOID_PTR_SUB(address, page_offset), 0);
if (!object) {
pthread_mutex_unlock(&aperture->fmm_mutex);
return HSAKMT_STATUS_MEMORY_NOT_REGISTERED;
}
if (object->userptr) {
if (object->registration_count > 1) {
--object->registration_count;
pthread_mutex_unlock(&aperture->fmm_mutex);
return fmm_deregister_user_memory(object->userptr);
} else if (object->metadata) {
return HSAKMT_STATUS_SUCCESS;
}
if (object->metadata || object->userptr) {
/* An object with metadata is an imported graphics
* buffer. Deregistering it means releasing the buffer. */
* buffer. Deregistering imported graphics buffers or
* userptrs means releasing the BO.
*/
pthread_mutex_unlock(&aperture->fmm_mutex);
__fmm_release(address, aperture);
return HSAKMT_STATUS_SUCCESS;
@@ -2427,6 +2487,7 @@ HSAKMT_STATUS fmm_deregister_memory(void *address)
if (object->registered_node_id_array)
free(object->registered_node_id_array);
object->registered_node_id_array = NULL;
object->registration_count = 0;
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -2470,7 +2531,7 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size,
pthread_mutex_lock(&aperture->fmm_mutex);
if (userptr && is_dgpu)
object = vm_find_object_by_userptr(aperture, address);
object = vm_find_object_by_userptr(aperture, address, size);
else
object = vm_find_object_by_address(aperture, address, 0);
@@ -2479,6 +2540,17 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size,
return HSAKMT_STATUS_ERROR;
}
/* For userptr, we ignore the nodes array and map all registered nodes.
* This is to simply the implementation of allowing the same memory
* region to be registered multiple times.
*/
if (userptr && is_dgpu) {
retcode = _fmm_map_to_gpu_userptr(address, size,
gpuvm_address, object);
pthread_mutex_unlock(&aperture->fmm_mutex);
return retcode;
}
/* Verify that all nodes to map are registered already */
registered_node_id_array = all_gpu_id_array;
registered_node_id_array_size = all_gpu_id_array_size;
@@ -2539,16 +2611,8 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size,
object->registered_device_id_array = nodes_to_map;
object->registered_device_id_array_size = nodes_to_map_size;
if (nodes_to_map_size > 0) {
if (userptr && is_dgpu)
retcode = _fmm_map_to_gpu_userptr(address, size,
gpuvm_address,
object);
else
retcode = _fmm_map_to_gpu_gtt(aperture, address,
size,
object);
}
if (nodes_to_map_size > 0)
retcode = _fmm_map_to_gpu_gtt(aperture, address, size, object);
/* Restore old registered device id array */
object->registered_device_id_array = temp_node_id_array;
@@ -2647,7 +2711,7 @@ HSAKMT_STATUS fmm_set_mem_user_data(const void *mem, void *usr_data)
vm_obj = vm_find_object_by_address(aperture, mem, 0);
if (!vm_obj)
vm_obj = vm_find_object_by_userptr(aperture, mem);
vm_obj = vm_find_object_by_userptr(aperture, mem, 0);
if (!vm_obj)
return HSAKMT_STATUS_ERROR;