libhsakmt: fix multiple threads register userptr race
Aperture locking is too fine-grained, it has race between find userptr
and allocate userptr object.
Change _fmm_allocate_device and fmm_allocate_memory_object to not take
the aperture lock, the callers take it, this implements an atomic find
userptr or allocate a new one.
Change-Id: I6773404e22c1f4382a211c5a9817df23c5534a2a
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
[ROCm/ROCR-Runtime commit: c4d5ee28f0]
Dieser Commit ist enthalten in:
@@ -941,12 +941,10 @@ static vm_object_t *fmm_allocate_memory_object(uint32_t gpu_id, void *mem,
|
||||
return NULL;
|
||||
|
||||
/* Allocate object */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
vm_obj = aperture_allocate_object(aperture, mem, args.handle,
|
||||
MemorySizeInBytes, flags);
|
||||
if (!vm_obj)
|
||||
goto err_object_allocation_failed;
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
if (mmap_offset)
|
||||
*mmap_offset = args.mmap_offset;
|
||||
@@ -954,7 +952,6 @@ static vm_object_t *fmm_allocate_memory_object(uint32_t gpu_id, void *mem,
|
||||
return vm_obj;
|
||||
|
||||
err_object_allocation_failed:
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
free_args.handle = args.handle;
|
||||
kmtIoctl(kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &free_args);
|
||||
|
||||
@@ -1278,9 +1275,7 @@ static void *__fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t Memo
|
||||
return NULL;
|
||||
|
||||
/* Allocate address space */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
mem = aperture_allocate_area(aperture, address, MemorySizeInBytes);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
/*
|
||||
* Now that we have the area reserved, allocate memory in the device
|
||||
@@ -1293,9 +1288,7 @@ static void *__fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t Memo
|
||||
* allocation of memory in device failed.
|
||||
* Release region in aperture
|
||||
*/
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
aperture_release_area(aperture, mem, MemorySizeInBytes);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
/* Assign NULL to mem to indicate failure to calling function */
|
||||
mem = NULL;
|
||||
@@ -1341,17 +1334,19 @@ void *fmm_allocate_device(uint32_t gpu_id, void *address, uint64_t MemorySizeInB
|
||||
if (flags.ui32.Uncached || svm.disable_cache)
|
||||
ioc_flags |= KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
mem = __fmm_allocate_device(gpu_id, address, size, aperture, &mmap_offset,
|
||||
ioc_flags, &vm_obj);
|
||||
|
||||
if (mem && vm_obj) {
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
/* Store memory allocation flags, not ioc flags */
|
||||
vm_obj->flags = flags.Value;
|
||||
gpuid_to_nodeid(gpu_id, &vm_obj->node_id);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
if (mem) {
|
||||
int map_fd = gpu_mem[gpu_mem_id].drm_render_fd;
|
||||
int prot = flags.ui32.HostAccess ? PROT_READ | PROT_WRITE :
|
||||
@@ -1397,6 +1392,8 @@ void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
mem = __fmm_allocate_device(gpu_id, NULL, MemorySizeInBytes, aperture, NULL,
|
||||
ioc_flags, &vm_obj);
|
||||
|
||||
@@ -1409,12 +1406,12 @@ void *fmm_allocate_doorbell(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.Reserved = 0xBe1;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
vm_obj->flags = flags.Value;
|
||||
gpuid_to_nodeid(gpu_id, &vm_obj->node_id);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
if (mem) {
|
||||
void *ret = mmap(mem, MemorySizeInBytes,
|
||||
PROT_READ | PROT_WRITE,
|
||||
@@ -1564,13 +1561,13 @@ static void *fmm_allocate_host_gpu(uint32_t node_id, void *address,
|
||||
/* Paged memory is allocated as a userptr mapping, non-paged
|
||||
* memory is allocated from KFD
|
||||
*/
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
if (!flags.ui32.NonPaged && svm.userptr_for_paged_mem) {
|
||||
/* Allocate address space */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
mem = aperture_allocate_area(aperture, address, size);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
if (!mem)
|
||||
return NULL;
|
||||
goto out_unlock;
|
||||
|
||||
/* Map anonymous pages */
|
||||
if (mmap(mem, MemorySizeInBytes, PROT_READ | PROT_WRITE,
|
||||
@@ -1608,7 +1605,7 @@ static void *fmm_allocate_host_gpu(uint32_t node_id, void *address,
|
||||
MAP_SHARED | MAP_FIXED, map_fd, mmap_offset);
|
||||
if (ret == MAP_FAILED) {
|
||||
__fmm_release(vm_obj, aperture);
|
||||
return NULL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (flags.ui32.AQLQueueMemory) {
|
||||
@@ -1624,18 +1621,17 @@ static void *fmm_allocate_host_gpu(uint32_t node_id, void *address,
|
||||
|
||||
if (mem && vm_obj) {
|
||||
/* Store memory allocation flags, not ioc flags */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
vm_obj->flags = flags.Value;
|
||||
vm_obj->node_id = node_id;
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return mem;
|
||||
|
||||
out_release_area:
|
||||
/* Release address space */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
aperture_release_area(aperture, mem, size);
|
||||
out_unlock:
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
return NULL;
|
||||
@@ -2075,6 +2071,9 @@ static void *map_mmio(uint32_t node_id, uint32_t gpu_id, int mmap_fd)
|
||||
ioc_flags = KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
mem = __fmm_allocate_device(gpu_id, NULL, PAGE_SIZE, aperture,
|
||||
&mmap_offset, ioc_flags, &vm_obj);
|
||||
|
||||
@@ -2085,7 +2084,6 @@ static void *map_mmio(uint32_t node_id, uint32_t gpu_id, int mmap_fd)
|
||||
flags.ui32.NonPaged = 1;
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.Reserved = 0;
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
vm_obj->flags = flags.Value;
|
||||
vm_obj->node_id = node_id;
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
@@ -2550,9 +2548,6 @@ static int _fmm_map_to_gpu(manageable_aperture_t *aperture,
|
||||
vm_object_t *object;
|
||||
int ret = 0;
|
||||
|
||||
if (!obj)
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
object = obj;
|
||||
if (!object) {
|
||||
/* Find the object to retrieve the handle */
|
||||
@@ -2615,9 +2610,6 @@ static int _fmm_map_to_gpu(manageable_aperture_t *aperture,
|
||||
exit_ok:
|
||||
err_object_not_found:
|
||||
err_map_failed:
|
||||
if (!obj)
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2646,22 +2638,23 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *apert
|
||||
return -1;
|
||||
|
||||
ret = debug_get_reg_status(gpu_mem[gpu_mem_id].node_id, &is_debugger);
|
||||
|
||||
/* allocate object within the scratch backing aperture */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
if (!ret && !is_debugger) {
|
||||
obj = fmm_allocate_memory_object(
|
||||
gpu_id, address, size, aperture, &mmap_offset,
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_VRAM |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE);
|
||||
if (!obj)
|
||||
return -1;
|
||||
goto out_unlock;
|
||||
/* Create a CPU mapping for the debugger */
|
||||
map_fd = gpu_mem[gpu_mem_id].drm_render_fd;
|
||||
mmap_ret = mmap(address, size, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_FIXED, map_fd, mmap_offset);
|
||||
if (mmap_ret == MAP_FAILED) {
|
||||
__fmm_release(obj, aperture);
|
||||
return -1;
|
||||
}
|
||||
if (mmap_ret == MAP_FAILED)
|
||||
goto out_release;
|
||||
} else {
|
||||
obj = fmm_allocate_memory_object(
|
||||
gpu_id, address, size, aperture, &mmap_offset,
|
||||
@@ -2671,10 +2664,8 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *apert
|
||||
mmap_ret = mmap(address, size,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, map_fd, mmap_offset);
|
||||
if (mmap_ret == MAP_FAILED) {
|
||||
__fmm_release(obj, aperture);
|
||||
return -1;
|
||||
}
|
||||
if (mmap_ret == MAP_FAILED)
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
|
||||
@@ -2683,7 +2674,14 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageable_aperture_t *apert
|
||||
if (ret != 0)
|
||||
__fmm_release(obj, aperture);
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return ret;
|
||||
|
||||
out_release:
|
||||
__fmm_release(obj, aperture);
|
||||
out_unlock:
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int _fmm_map_to_gpu_userptr(void *addr, uint64_t size,
|
||||
@@ -3003,29 +3001,45 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size,
|
||||
if (svm.check_userptr)
|
||||
fmm_check_user_memory(addr, size);
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
/* catch the race condition where some other thread added the userptr
|
||||
* object already after the vm_find_object.
|
||||
*/
|
||||
obj = vm_find_object_by_userptr(aperture, addr, size);
|
||||
if (obj) {
|
||||
++obj->registration_count;
|
||||
goto out_found;
|
||||
}
|
||||
|
||||
/* Allocate BO, userptr address is passed in mmap_offset */
|
||||
svm_addr = __fmm_allocate_device(gpu_id, NULL, aligned_size, aperture,
|
||||
&aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
|
||||
KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE |
|
||||
(coarse_grain ? 0 : KFD_IOC_ALLOC_MEM_FLAGS_COHERENT), &obj);
|
||||
if (!svm_addr)
|
||||
if (!svm_addr) {
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (obj) {
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
obj->userptr = addr;
|
||||
gpuid_to_nodeid(gpu_id, &obj->node_id);
|
||||
obj->userptr_size = size;
|
||||
obj->registration_count = 1;
|
||||
obj->user_node.key = rbtree_key((unsigned long)addr, size);
|
||||
rbtree_insert(&aperture->user_tree, &obj->user_node);
|
||||
} else {
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
} else
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
out_found:
|
||||
if (obj_ret)
|
||||
*obj_ret = obj;
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren