Add two pointer attributes APIs:
hsaKmtQueryPointerInfo - allow the user to query the memory information
    using a pointer. This pointer can point to any address inside the
    range known to HSA.
hsaKmtSetMemoryUserData - allow the user to attach data to a pointer to
    add memory tracking information. This pointer must match the start
    address of a memory allocation or registration.
TODO: This patch implements support on dGPU. Needs to add APU.

Change-Id: I4711809274248434901f0794f50ebfa13a7371a8


[ROCm/ROCR-Runtime commit: 51e4d27c37]
Этот коммит содержится в:
Amber Lin
2016-09-01 23:25:42 -04:00
родитель cba37c251c
Коммит 8a1cef5fbb
6 изменённых файлов: 294 добавлений и 40 удалений
+20
Просмотреть файл
@@ -661,6 +661,26 @@ hsaKmtGetTileConfig(
HsaGpuTileConfig* config // IN & OUT
);
/**
Returns information about pointers
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryPointerInfo(
const void * Pointer, //IN
HsaPointerInfo * PointerInfo //OUT
);
/**
Associates user data with a memory allocation
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryUserData(
const void * Pointer, //IN
void * UserData //IN
);
#ifdef __cplusplus
} //extern "C"
#endif
+22
Просмотреть файл
@@ -982,6 +982,28 @@ typedef struct _HsaGpuTileConfig
HSAuint32 Reserved[7]; /* Round up to 16 dwords for future extension */
} HsaGpuTileConfig;
typedef enum _HSA_POINTER_TYPE {
HSA_POINTER_UNKNOWN = 0,
HSA_POINTER_ALLOCATED = 1, // Allocated with hsaKmtAllocMemory (except scratch)
HSA_POINTER_REGISTERED_USER = 2, // Registered user pointer
HSA_POINTER_REGISTERED_GRAPHICS = 3 // Registered graphics buffer
// (hsaKmtRegisterGraphicsToNodes)
} HSA_POINTER_TYPE;
typedef struct _HsaPointerInfo {
HSA_POINTER_TYPE Type; // Pointer type
HSAuint32 Node; // Node where the memory is located
HsaMemFlags MemFlags; // Only valid for HSA_POINTER_ALLOCATED
void *CPUAddress; // Start address for CPU access
HSAuint64 GPUAddress; // Start address for GPU access
HSAuint64 SizeInBytes; // Size in bytes
HSAuint32 NRegisteredNodes; // Number of nodes the memory is registered to
HSAuint32 NMappedNodes; // Number of nodes the memory is mapped to
const HSAuint32 *RegisteredNodes; // Array of registered nodes
const HSAuint32 *MappedNodes; // Array of mapped nodes
void *UserData; // User data associated with the memory
} HsaPointerInfo;
#pragma pack(pop, hsakmttypes_h)
+220 -37
Просмотреть файл
@@ -49,8 +49,13 @@
struct vm_object {
void *start;
void *userptr;
uint64_t size;
uint64_t userptr_size;
uint64_t size; /* size allocated on GPU. When the user requests a random
* size, Thunk aligns it to page size and allocates this
* aligned size on GPU
*/
uint64_t handle; /* opaque */
uint32_t node_id;
struct vm_object *next;
struct vm_object *prev;
uint32_t flags; /* memory allocation flags */
@@ -59,13 +64,17 @@ struct vm_object {
*/
uint32_t *registered_device_id_array;
uint32_t registered_device_id_array_size;
uint32_t *registered_node_id_array;
/*
* Nodes that mapped already
*/
uint32_t *mapped_device_id_array;
uint32_t mapped_device_id_array_size;
uint32_t *mapped_node_id_array;
/* Metadata of imported graphics buffers */
void *metadata;
/* User data associated with the memory */
void *user_data;
};
typedef struct vm_object vm_object_t;
@@ -181,11 +190,14 @@ static vm_object_t *vm_create_and_init_object(void *start, uint64_t size,
if (object) {
object->start = start;
object->userptr = NULL;
object->userptr_size = 0;
object->size = size;
object->handle = handle;
object->next = object->prev = NULL;
object->registered_device_id_array_size = 0;
object->mapped_device_id_array_size = 0;
object->registered_node_id_array = NULL;
object->mapped_node_id_array = NULL;
object->flags = flags;
object->metadata = NULL;
}
@@ -276,7 +288,22 @@ static void vm_split_area(manageble_aperture_t *app, vm_area_t *area,
}
static vm_object_t *vm_find_object_by_address(manageble_aperture_t *app,
void *address, uint64_t size)
const void *address)
{
vm_object_t *cur = app->vm_objects;
while (cur) {
if (address >= cur->start &&
(uint64_t)address < ((uint64_t)cur->start + cur->size))
break;
cur = cur->next;
}
return cur; /* NULL if not found */
}
static vm_object_t *vm_find_object_by_start_address(manageble_aperture_t *app,
const void *address, uint64_t size)
{
vm_object_t *cur = app->vm_objects;
@@ -287,13 +314,13 @@ static vm_object_t *vm_find_object_by_address(manageble_aperture_t *app,
if (cur->start == address && (cur->size == size || size == 0))
break;
cur = cur->next;
};
}
return cur; /* NULL if not found */
}
static vm_object_t *vm_find_object_by_userptr(manageble_aperture_t *app,
void *address)
const void *address)
{
vm_object_t *cur = app->vm_objects;
@@ -466,7 +493,10 @@ static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id)
return -1;
}
static int fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
/* After allocating the memory, return the vm_object created for this memory.
* Return NULL if any failure.
*/
static vm_object_t *fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
uint64_t MemorySizeInBytes,
manageble_aperture_t *aperture,
uint64_t *mmap_offset,
@@ -474,9 +504,10 @@ static int fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
{
struct kfd_ioctl_alloc_memory_of_gpu_new_args args;
struct kfd_ioctl_free_memory_of_gpu_args free_args;
vm_object_t *vm_obj = NULL;
if (!mem)
return -1;
return NULL;
/* Allocate memory from amdkfd */
args.gpu_id = gpu_id;
@@ -490,26 +521,26 @@ static int fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
args.mmap_offset = *mmap_offset;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_NEW, &args))
return -1;
return NULL;
/* Allocate object */
pthread_mutex_lock(&aperture->fmm_mutex);
if (!aperture_allocate_object(aperture, mem, args.handle,
MemorySizeInBytes, flags))
if (!(vm_obj = aperture_allocate_object(aperture, mem, args.handle,
MemorySizeInBytes, flags)))
goto err_object_allocation_failed;
pthread_mutex_unlock(&aperture->fmm_mutex);
if (mmap_offset)
*mmap_offset = args.mmap_offset;
return 0;
return vm_obj;
err_object_allocation_failed:
pthread_mutex_unlock(&aperture->fmm_mutex);
free_args.handle = args.handle;
kmtIoctl(kfd_fd, AMDKFD_IOC_FREE_MEMORY_OF_GPU, &free_args);
return -1;
return NULL;
}
bool fmm_is_inside_some_aperture(void *address)
@@ -696,9 +727,11 @@ void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes)
static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
manageble_aperture_t *aperture, uint64_t offset, uint64_t *mmap_offset,
uint32_t flags)
uint32_t flags, vm_object_t **vm_obj)
{
void *mem = NULL;
vm_object_t *obj;
/* Check that aperture is properly initialized/supported */
if (!aperture_is_valid(aperture->base, aperture->limit))
return NULL;
@@ -713,8 +746,9 @@ static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
* Now that we have the area reserved, allocate memory in the device
* itself
*/
if (fmm_allocate_memory_in_device(gpu_id, mem,
MemorySizeInBytes, aperture, mmap_offset, flags)) {
obj = fmm_allocate_memory_in_device(gpu_id, mem,
MemorySizeInBytes, aperture, mmap_offset, flags);
if (obj == NULL) {
/*
* allocation of memory in device failed.
* Release region in aperture
@@ -726,6 +760,8 @@ static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
/* Assign NULL to mem to indicate failure to calling function */
mem = NULL;
}
if (vm_obj)
*vm_obj = obj;
return mem;
}
@@ -742,6 +778,7 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla
uint32_t ioc_flags, offset;
uint64_t size, mmap_offset;
void *mem;
vm_object_t *vm_obj = NULL;
/* Retrieve gpu_mem id according to gpu_id */
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
@@ -766,7 +803,15 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFla
mem = __fmm_allocate_device(gpu_id, size,
aperture, offset, &mmap_offset,
ioc_flags);
ioc_flags, &vm_obj);
if (mem && vm_obj) {
pthread_mutex_lock(&aperture->fmm_mutex);
/* Store memory allocation flags, not ioc flags */
vm_obj->flags = flags.Value;
gpuid_to_nodeid(gpu_id, &vm_obj->node_id);
pthread_mutex_unlock(&aperture->fmm_mutex);
}
if (mem && flags.ui32.HostAccess) {
void *ret = mmap(mem, MemorySizeInBytes,
@@ -805,7 +850,7 @@ static void* fmm_allocate_host_cpu(uint64_t MemorySizeInBytes,
return mem;
}
static void* fmm_allocate_host_gpu(uint64_t MemorySizeInBytes,
static void* fmm_allocate_host_gpu(uint32_t node_id, uint64_t MemorySizeInBytes,
HsaMemFlags flags)
{
void *mem;
@@ -815,6 +860,7 @@ static void* fmm_allocate_host_gpu(uint64_t MemorySizeInBytes,
uint64_t size;
int32_t i;
uint32_t gpu_id;
vm_object_t *vm_obj = NULL;
i = find_first_dgpu(&gpu_id);
if (i < 0)
@@ -833,7 +879,15 @@ static void* fmm_allocate_host_gpu(uint64_t MemorySizeInBytes,
mem = __fmm_allocate_device(gpu_id, size,
aperture, 0, &mmap_offset,
ioc_flags);
ioc_flags, &vm_obj);
if (mem && vm_obj) {
/* Store memory allocation flags, not ioc flags */
pthread_mutex_lock(&aperture->fmm_mutex);
vm_obj->flags = flags.Value;
vm_obj->node_id = node_id;
pthread_mutex_unlock(&aperture->fmm_mutex);
}
if (flags.ui32.HostAccess) {
void *ret = mmap(mem, MemorySizeInBytes,
@@ -856,10 +910,11 @@ static void* fmm_allocate_host_gpu(uint64_t MemorySizeInBytes,
return mem;
}
void* fmm_allocate_host(uint64_t MemorySizeInBytes, HsaMemFlags flags)
void* fmm_allocate_host(uint32_t node_id, uint64_t MemorySizeInBytes,
HsaMemFlags flags)
{
if (is_dgpu)
return fmm_allocate_host_gpu(MemorySizeInBytes, flags);
return fmm_allocate_host_gpu(node_id, MemorySizeInBytes, flags);
return fmm_allocate_host_cpu(MemorySizeInBytes, flags);
}
@@ -933,7 +988,7 @@ static void __fmm_release(void *address, manageble_aperture_t *aperture)
pthread_mutex_lock(&aperture->fmm_mutex);
/* Find the object to retrieve the handle */
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object) {
pthread_mutex_unlock(&aperture->fmm_mutex);
return;
@@ -944,6 +999,7 @@ static void __fmm_release(void *address, manageble_aperture_t *aperture)
object->registered_device_id_array) {
object->mapped_device_id_array_size = 0;
object->mapped_device_id_array = NULL;
object->mapped_node_id_array = NULL;
}
free(object->registered_device_id_array);
object->registered_device_id_array_size = 0;
@@ -959,6 +1015,13 @@ static void __fmm_release(void *address, manageble_aperture_t *aperture)
if (object->metadata)
free(object->metadata);
if (object->registered_node_id_array)
free(object->registered_node_id_array);
object->registered_node_id_array = NULL;
if (object->mapped_node_id_array)
free(object->mapped_node_id_array);
object->mapped_node_id_array = NULL;
if (address >= dgpu_shared_aperture_base &&
address <= dgpu_shared_aperture_limit) {
/* Remove any CPU mapping, but keep the address range reserved */
@@ -1289,7 +1352,7 @@ static int _fmm_map_to_gpu_gtt(manageble_aperture_t *aperture,
object = obj;
if (!object) {
/* Find the object to retrieve the handle */
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object)
goto err_object_not_found;
}
@@ -1345,6 +1408,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu
bool is_debugger = 0;
void *mmap_ret = NULL;
uint64_t mmap_offset = 0;
/* Retrieve gpu_mem id according to gpu_id */
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
if (gpu_mem_id < 0)
@@ -1363,7 +1427,7 @@ static int _fmm_map_to_gpu_scratch(uint32_t gpu_id, manageble_aperture_t *apertu
if (!ret && !is_debugger) {
offset = VOID_PTRS_SUB(address, aperture->base);
mem = __fmm_allocate_device(gpu_id, size, aperture, offset,
NULL, KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE);
NULL, KFD_IOC_ALLOC_MEM_FLAGS_DGPU_DEVICE, NULL);
if (mem == NULL)
return -1;
@@ -1416,7 +1480,7 @@ static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
pthread_mutex_lock(&aperture->fmm_mutex);
/* Find the object to retrieve the handle */
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object)
goto err_object_not_found;
@@ -1583,7 +1647,7 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
/* Find the object to retrieve the handle */
object = obj;
if (!object) {
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object) {
ret = -1;
goto err;
@@ -1622,6 +1686,9 @@ static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address,
object->mapped_device_id_array = NULL;
object->mapped_device_id_array_size = 0;
if (object->mapped_node_id_array)
free(object->mapped_node_id_array);
object->mapped_node_id_array = NULL;
if (!obj)
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -1652,7 +1719,7 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id,
pthread_mutex_lock(&aperture->fmm_mutex);
/* Find the object to retrieve the handle and size */
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object)
goto err;
@@ -1678,6 +1745,9 @@ static int _fmm_unmap_from_gpu_scratch(uint32_t gpu_id,
object->mapped_device_id_array = NULL;
object->mapped_device_id_array_size = 0;
if (object->mapped_node_id_array)
free(object->mapped_node_id_array);
object->mapped_node_id_array = NULL;
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -1937,7 +2007,7 @@ bool fmm_get_handle(void *address, uint64_t *handle)
pthread_mutex_lock(&aperture->fmm_mutex);
/* Find the object to retrieve the handle */
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (object && handle) {
*handle = object->handle;
found = true;
@@ -1974,19 +2044,19 @@ static HSAKMT_STATUS fmm_register_user_memory(void *addr, HSAuint64 size, vm_obj
/* Allocate BO, userptr address is passed in mmap_offset */
svm_addr = __fmm_allocate_device(gpu_id, aligned_size, aperture, 0,
&aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR);
&aligned_addr, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, &obj);
if (svm_addr == NULL)
return HSAKMT_STATUS_ERROR;
/* Find the object and set its userptr address */
pthread_mutex_lock(&aperture->fmm_mutex);
obj = vm_find_object_by_address(aperture, svm_addr, aligned_size);
if (obj == NULL) {
if (obj) {
pthread_mutex_lock(&aperture->fmm_mutex);
obj->userptr = addr;
gpuid_to_nodeid(gpu_id, &obj->node_id);
obj->userptr_size = size;
pthread_mutex_unlock(&aperture->fmm_mutex);
return HSAKMT_STATUS_ERROR;
}
obj->userptr = addr;
pthread_mutex_unlock(&aperture->fmm_mutex);
else
return HSAKMT_STATUS_ERROR;
if (obj_ret)
*obj_ret = obj;
@@ -2026,7 +2096,7 @@ HSAKMT_STATUS fmm_register_memory(void *address, uint64_t size_in_bytes,
if (!object) {
pthread_mutex_lock(&aperture->fmm_mutex);
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
pthread_mutex_unlock(&aperture->fmm_mutex);
}
@@ -2117,6 +2187,7 @@ HSAKMT_STATUS fmm_register_graphics_handle(HSAuint64 GraphicsResourceHandle,
obj->metadata = metadata;
obj->registered_device_id_array = gpu_id_array;
obj->registered_device_id_array_size = gpu_id_array_size;
gpuid_to_nodeid(infoArgs.gpu_id, &obj->node_id);
}
pthread_mutex_unlock(&aperture->fmm_mutex);
if (!obj)
@@ -2205,7 +2276,7 @@ HSAKMT_STATUS fmm_deregister_memory(void *address)
pthread_mutex_lock(&aperture->fmm_mutex);
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object) {
pthread_mutex_unlock(&aperture->fmm_mutex);
return HSAKMT_STATUS_MEMORY_NOT_REGISTERED;
@@ -2230,6 +2301,9 @@ HSAKMT_STATUS fmm_deregister_memory(void *address)
free(object->registered_device_id_array);
object->registered_device_id_array = NULL;
object->registered_device_id_array_size = 0;
if (object->registered_node_id_array)
free(object->registered_node_id_array);
object->registered_node_id_array = NULL;
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -2275,7 +2349,7 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size,
if (userptr && is_dgpu)
object = vm_find_object_by_userptr(aperture, address);
else
object = vm_find_object_by_address(aperture, address, 0);
object = vm_find_object_by_start_address(aperture, address, 0);
if (!object) {
pthread_mutex_unlock(&aperture->fmm_mutex);
@@ -2364,3 +2438,112 @@ HSAKMT_STATUS fmm_map_to_gpu_nodes(void *address, uint64_t size,
return 0;
}
HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info)
{
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
uint32_t i;
manageble_aperture_t *aperture;
vm_object_t *vm_obj;
memset(info, 0, sizeof(HsaPointerInfo));
/* TODO: APU */
if (address >= svm.dgpu_aperture.base &&
address <= svm.dgpu_aperture.limit)
aperture = &svm.dgpu_aperture;
else if (address >= svm.dgpu_alt_aperture.base &&
address <= svm.dgpu_alt_aperture.limit)
aperture = &svm.dgpu_alt_aperture;
else
/* Not in SVM, it can be system memory registered by userptr */
aperture = &svm.dgpu_aperture;
vm_obj = vm_find_object_by_address(aperture, address);
if (!vm_obj)
vm_obj = vm_find_object_by_userptr(aperture, address);
if (!vm_obj) {
info->Type = HSA_POINTER_UNKNOWN;
ret = HSAKMT_STATUS_ERROR;
goto exit;
}
if (vm_obj->metadata)
info->Type = HSA_POINTER_REGISTERED_GRAPHICS;
else if (vm_obj->userptr)
info->Type = HSA_POINTER_REGISTERED_USER;
else
info->Type = HSA_POINTER_ALLOCATED;
info->Node = vm_obj->node_id;
info->GPUAddress = (HSAuint64)vm_obj->start;
info->SizeInBytes = vm_obj->size;
/* registered nodes */
info->NRegisteredNodes =
vm_obj->registered_device_id_array_size / sizeof(uint32_t);
if (info->NRegisteredNodes && !vm_obj->registered_node_id_array) {
vm_obj->registered_node_id_array = (uint32_t *)
(uint32_t *)malloc(vm_obj->registered_device_id_array_size);
/* vm_obj->registered_node_id_array allocated here will be
* freed whenever the registration is deregistered or the
* memory being freed
*/
for (i=0; i<info->NRegisteredNodes; i++)
gpuid_to_nodeid(vm_obj->registered_device_id_array[i],
&vm_obj->registered_node_id_array[i]);
}
info->RegisteredNodes = vm_obj->registered_node_id_array;
/* mapped nodes */
info->NMappedNodes =
vm_obj->mapped_device_id_array_size / sizeof(uint32_t);
if (info->NMappedNodes && !vm_obj->mapped_node_id_array) {
vm_obj->mapped_node_id_array =
(uint32_t *)malloc(vm_obj->mapped_device_id_array_size);
/* vm_obj->mapped_node_id_array allocated here will be
* freed whenever the mapping is unmapped or memory being freed
*/
for (i=0; i<info->NMappedNodes; i++)
gpuid_to_nodeid(vm_obj->mapped_device_id_array[i],
&vm_obj->mapped_node_id_array[i]);
}
info->MappedNodes = vm_obj->mapped_node_id_array;
info->UserData = vm_obj->user_data;
if (info->Type == HSA_POINTER_REGISTERED_USER) {
info->CPUAddress = vm_obj->userptr;
info->SizeInBytes = vm_obj->userptr_size;
info->GPUAddress += ((HSAuint64)info->CPUAddress & (PAGE_SIZE-1));
}
else if (info->Type == HSA_POINTER_ALLOCATED) {
info->MemFlags.Value = vm_obj->flags;
info->CPUAddress = vm_obj->start;
}
exit:
return ret;
}
HSAKMT_STATUS fmm_set_mem_user_data(const void *mem, void *usr_data)
{
manageble_aperture_t *aperture;
vm_object_t *vm_obj;
/* TODO: APU */
if (mem >= svm.dgpu_alt_aperture.base &&
mem <= svm.dgpu_alt_aperture.limit)
aperture = &svm.dgpu_alt_aperture;
else
aperture = &svm.dgpu_aperture;
vm_obj = vm_find_object_by_start_address(aperture, mem, 0);
if (!vm_obj)
vm_obj = vm_find_object_by_userptr(aperture, mem);
if (!vm_obj)
return HSAKMT_STATUS_ERROR;
vm_obj->user_data = usr_data;
return HSAKMT_STATUS_SUCCESS;
}
+4 -1
Просмотреть файл
@@ -52,7 +52,8 @@ void fmm_destroy_process_apertures(void);
*/
void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes);
void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags);
void* fmm_allocate_host(uint64_t MemorySizeInBytes, HsaMemFlags flags);
void* fmm_allocate_host(uint32_t node_id, uint64_t MemorySizeInBytes,
HsaMemFlags flags);
void* fmm_open_graphic_handle(uint32_t gpu_id,
int32_t graphic_device_handle,
uint32_t graphic_handle,
@@ -63,6 +64,8 @@ void fmm_release(void* address);
int fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
int fmm_unmap_from_gpu(void *address);
bool fmm_get_handle(void *address, uint64_t *handle);
HSAKMT_STATUS fmm_get_mem_info(const void *address, HsaPointerInfo *info);
HSAKMT_STATUS fmm_set_mem_user_data(const void *mem, void *usr_data);
/* Topology interface*/
HSAKMT_STATUS fmm_node_added(HSAuint32 gpu_id);
+2
Просмотреть файл
@@ -48,6 +48,8 @@ hsaKmtMapGraphicHandle;
hsaKmtUnmapGraphicHandle;
hsaKmtSetTrapHandler;
hsaKmtGetTileConfig;
hsaKmtQueryPointerInfo;
hsaKmtSetMemoryUserData;
local: *;
};
+26 -2
Просмотреть файл
@@ -133,7 +133,8 @@ hsaKmtAllocMemory(
}
if (gpu_id == 0 && !MemFlags.ui32.Scratch) {
*MemoryAddress = fmm_allocate_host(SizeInBytes, MemFlags);
*MemoryAddress = fmm_allocate_host(PreferredNode, SizeInBytes,
MemFlags);
if (*MemoryAddress == NULL)
return HSAKMT_STATUS_ERROR;
@@ -161,7 +162,8 @@ hsaKmtAllocMemory(
/* Backwards compatibility hack: Allocate system memory if app
* asks for paged memory from a GPU node. */
if (gpu_id && !MemFlags.ui32.NonPaged && !MemFlags.ui32.Scratch) {
*MemoryAddress = fmm_allocate_host(SizeInBytes, MemFlags);
*MemoryAddress = fmm_allocate_host(PreferredNode, SizeInBytes,
MemFlags);
if (*MemoryAddress == NULL)
return HSAKMT_STATUS_ERROR;
@@ -422,3 +424,25 @@ hsaKmtGetTileConfig(
return HSAKMT_STATUS_SUCCESS;
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueryPointerInfo(
const void *Pointer, /* IN */
HsaPointerInfo *PointerInfo /* OUT */
)
{
if (!PointerInfo)
return HSAKMT_STATUS_INVALID_PARAMETER;
return fmm_get_mem_info(Pointer, PointerInfo);
}
HSAKMT_STATUS
HSAKMTAPI
hsaKmtSetMemoryUserData(
const void *Pointer, /* IN */
void *UserData /* IN */
)
{
return fmm_set_mem_user_data(Pointer, UserData);
}