reformat memory and fmm functions according to kernel coding style
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
[ROCm/ROCR-Runtime commit: 027ca02f2e]
Este commit está contenido en:
+371
-258
@@ -33,58 +33,73 @@
|
||||
|
||||
#define NON_VALID_GPU_ID 0
|
||||
#define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
|
||||
#define INIT_APERTURE(base_value, limit_value) {.base = (void*)base_value, .limit = (void*)limit_value }
|
||||
#define INIT_MANAGEBLE_APERTURE(base_value, limit_value) {.base = (void*)base_value,.limit = (void*)limit_value, .vm_ranges = NULL, .vm_objects = NULL, .fmm_mutex = PTHREAD_MUTEX_INITIALIZER}
|
||||
#define INIT_GPU_MEM \
|
||||
{ .gpu_id = NON_VALID_GPU_ID,\
|
||||
.lds_aperture = INIT_APERTURE(0, 0), \
|
||||
.scratch_aperture = INIT_MANAGEBLE_APERTURE(0, 0),\
|
||||
.gpuvm_aperture = INIT_MANAGEBLE_APERTURE(0, 0)\
|
||||
|
||||
#define INIT_APERTURE(base_value, limit_value) { \
|
||||
.base = (void *) base_value, \
|
||||
.limit = (void *) limit_value \
|
||||
}
|
||||
|
||||
#define INIT_MANAGEBLE_APERTURE(base_value, limit_value) { \
|
||||
.base = (void *) base_value, \
|
||||
.limit = (void *) limit_value, \
|
||||
.vm_ranges = NULL, \
|
||||
.vm_objects = NULL, \
|
||||
.fmm_mutex = PTHREAD_MUTEX_INITIALIZER \
|
||||
}
|
||||
|
||||
#define INIT_GPU_MEM { \
|
||||
.gpu_id = NON_VALID_GPU_ID, \
|
||||
.lds_aperture = INIT_APERTURE(0, 0), \
|
||||
.scratch_aperture = INIT_MANAGEBLE_APERTURE(0, 0), \
|
||||
.gpuvm_aperture = INIT_MANAGEBLE_APERTURE(0, 0) \
|
||||
}
|
||||
|
||||
#define INIT_GPUs_MEM {[0 ... (NUM_OF_SUPPORTED_GPUS-1)] = INIT_GPU_MEM}
|
||||
struct vm_object{
|
||||
void* start;
|
||||
|
||||
struct vm_object {
|
||||
void *start;
|
||||
uint64_t size;
|
||||
uint64_t handle; // opaque
|
||||
struct vm_object* next;
|
||||
struct vm_object* prev;
|
||||
uint64_t handle; /* opaque */
|
||||
struct vm_object *next;
|
||||
struct vm_object *prev;
|
||||
};
|
||||
typedef struct vm_object vm_object_t;
|
||||
|
||||
struct vm_area{
|
||||
void* start;
|
||||
void* end;
|
||||
struct vm_area* next;
|
||||
struct vm_area* prev;
|
||||
struct vm_area {
|
||||
void *start;
|
||||
void *end;
|
||||
struct vm_area *next;
|
||||
struct vm_area *prev;
|
||||
};
|
||||
typedef struct vm_area vm_area_t;
|
||||
|
||||
typedef struct {
|
||||
void* base;
|
||||
void* limit;
|
||||
vm_area_t* vm_ranges;
|
||||
vm_object_t* vm_objects;
|
||||
void *base;
|
||||
void *limit;
|
||||
vm_area_t *vm_ranges;
|
||||
vm_object_t *vm_objects;
|
||||
pthread_mutex_t fmm_mutex;
|
||||
} manageble_aperture_t;
|
||||
|
||||
typedef struct {
|
||||
void* base;
|
||||
void* limit;
|
||||
void *base;
|
||||
void *limit;
|
||||
} aperture_t;
|
||||
|
||||
typedef struct{
|
||||
typedef struct {
|
||||
uint32_t gpu_id;
|
||||
aperture_t lds_aperture;
|
||||
manageble_aperture_t scratch_aperture;
|
||||
manageble_aperture_t gpuvm_aperture;
|
||||
}gpu_mem_t;
|
||||
} gpu_mem_t;
|
||||
|
||||
static gpu_mem_t gpu_mem[] = INIT_GPUs_MEM;
|
||||
|
||||
static vm_area_t* vm_create_and_init_area(void* start, void* end){
|
||||
vm_area_t* area = (vm_area_t*)malloc(sizeof(vm_area_t));// TODO: Memory pool ???
|
||||
if (area){
|
||||
static vm_area_t *vm_create_and_init_area(void *start, void *end)
|
||||
{
|
||||
vm_area_t *area = (vm_area_t *) malloc(sizeof(vm_area_t));
|
||||
|
||||
if (area) {
|
||||
area->start = start;
|
||||
area->end = end;
|
||||
area->next = area->prev = NULL;
|
||||
@@ -93,9 +108,12 @@ static vm_area_t* vm_create_and_init_area(void* start, void* end){
|
||||
return area;
|
||||
}
|
||||
|
||||
static vm_object_t* vm_create_and_init_object(void* start, uint64_t size, uint64_t handle){
|
||||
vm_object_t* object = (vm_object_t*)malloc(sizeof(vm_object_t)); // TODO: Memory pool ???
|
||||
if (object){
|
||||
static vm_object_t *vm_create_and_init_object(void *start, uint64_t size,
|
||||
uint64_t handle)
|
||||
{
|
||||
vm_object_t *object = (vm_object_t *) malloc(sizeof(vm_object_t));
|
||||
|
||||
if (object) {
|
||||
object->start = start;
|
||||
object->size = size;
|
||||
object->handle = handle;
|
||||
@@ -106,48 +124,49 @@ static vm_object_t* vm_create_and_init_object(void* start, uint64_t size, uint64
|
||||
}
|
||||
|
||||
|
||||
static void vm_remove_area(manageble_aperture_t* app, vm_area_t* area){
|
||||
vm_area_t* next;
|
||||
vm_area_t* prev;
|
||||
static void vm_remove_area(manageble_aperture_t *app, vm_area_t *area)
|
||||
{
|
||||
vm_area_t *next;
|
||||
vm_area_t *prev;
|
||||
|
||||
next = area->next;
|
||||
prev = area->prev;
|
||||
|
||||
if (prev == NULL )// The first element
|
||||
if (prev == NULL) /* The first element */
|
||||
app->vm_ranges = next;
|
||||
else
|
||||
prev->next = next;
|
||||
|
||||
if(next) // If not the last element
|
||||
if (next) /* If not the last element */
|
||||
next->prev = prev;
|
||||
|
||||
free(area);
|
||||
|
||||
}
|
||||
|
||||
static void vm_remove_object(manageble_aperture_t* app, vm_object_t* object){
|
||||
vm_object_t* next;
|
||||
vm_object_t* prev;
|
||||
static void vm_remove_object(manageble_aperture_t *app, vm_object_t *object)
|
||||
{
|
||||
vm_object_t *next;
|
||||
vm_object_t *prev;
|
||||
|
||||
next = object->next;
|
||||
prev = object->prev;
|
||||
|
||||
if (prev == NULL )// The first element
|
||||
if (prev == NULL) /* The first element */
|
||||
app->vm_objects = next;
|
||||
else
|
||||
prev->next = next;
|
||||
|
||||
if(next) // If not the last element
|
||||
if (next) /* If not the last element */
|
||||
next->prev = prev;
|
||||
|
||||
free(object);
|
||||
|
||||
}
|
||||
|
||||
static void vm_add_area_after(vm_area_t *after_this, vm_area_t *new_area)
|
||||
{
|
||||
vm_area_t *next = after_this->next;
|
||||
|
||||
|
||||
static void vm_add_area_after(vm_area_t* after_this, vm_area_t* new_area){
|
||||
vm_area_t* next = after_this->next;
|
||||
after_this->next = new_area;
|
||||
new_area->next = next;
|
||||
|
||||
@@ -156,8 +175,11 @@ static void vm_add_area_after(vm_area_t* after_this, vm_area_t* new_area){
|
||||
next->prev = new_area;
|
||||
}
|
||||
|
||||
static void vm_add_object_before(vm_object_t* before_this, vm_object_t* new_object){
|
||||
vm_object_t* prev = before_this->prev;
|
||||
static void vm_add_object_before(vm_object_t *before_this,
|
||||
vm_object_t *new_object)
|
||||
{
|
||||
vm_object_t *prev = before_this->prev;
|
||||
|
||||
before_this->prev = new_object;
|
||||
new_object->next = before_this;
|
||||
|
||||
@@ -166,45 +188,54 @@ static void vm_add_object_before(vm_object_t* before_this, vm_object_t* new_obje
|
||||
prev->next = new_object;
|
||||
}
|
||||
|
||||
static void vm_split_area(manageble_aperture_t* app, vm_area_t* area, void* address, uint64_t MemorySizeInBytes){
|
||||
static void vm_split_area(manageble_aperture_t *app, vm_area_t *area,
|
||||
void *address, uint64_t MemorySizeInBytes)
|
||||
{
|
||||
/*
|
||||
* The existing area is split to: [area->start, address - 1]
|
||||
* and [address + MemorySizeInBytes, area->end]
|
||||
*/
|
||||
vm_area_t *new_area = vm_create_and_init_area(
|
||||
VOID_PTR_ADD(address, MemorySizeInBytes),
|
||||
area->end);
|
||||
|
||||
// The existing area is split to: [area->start, address - 1] and [address + MemorySizeInBytes, area->end]
|
||||
vm_area_t* new_area = vm_create_and_init_area(VOID_PTR_ADD(address,MemorySizeInBytes), area->end);
|
||||
|
||||
// Shrink the existing area
|
||||
area->end = VOID_PTR_SUB(address,1);
|
||||
/* Shrink the existing area */
|
||||
area->end = VOID_PTR_SUB(address, 1);
|
||||
|
||||
vm_add_area_after(area, new_area);
|
||||
|
||||
}
|
||||
|
||||
static vm_object_t* vm_find_object_by_address(manageble_aperture_t* app, void* address, uint64_t size){
|
||||
vm_object_t* cur = app->vm_objects;
|
||||
static vm_object_t *vm_find_object_by_address(manageble_aperture_t *app,
|
||||
void *address, uint64_t size)
|
||||
{
|
||||
vm_object_t *cur = app->vm_objects;
|
||||
|
||||
// Look up the appropriate address range containing the given address
|
||||
while(cur){
|
||||
if(cur->start == address && (cur->size == size || size == 0))
|
||||
/* Look up the appropriate address range containing the given address */
|
||||
while (cur) {
|
||||
if (cur->start == address && (cur->size == size || size == 0))
|
||||
break;
|
||||
cur = cur->next;
|
||||
};
|
||||
|
||||
return cur; // NULL if not found
|
||||
return cur; /* NULL if not found */
|
||||
}
|
||||
|
||||
static vm_area_t* vm_find(manageble_aperture_t* app, void* address){
|
||||
vm_area_t* cur = app->vm_ranges;
|
||||
static vm_area_t *vm_find(manageble_aperture_t *app, void *address)
|
||||
{
|
||||
vm_area_t *cur = app->vm_ranges;
|
||||
|
||||
// Look up the appropriate address range containing the given address
|
||||
while(cur){
|
||||
if(cur->start <= address && cur->end >= address)
|
||||
/* Look up the appropriate address range containing the given address */
|
||||
while (cur) {
|
||||
if (cur->start <= address && cur->end >= address)
|
||||
break;
|
||||
cur = cur->next;
|
||||
};
|
||||
|
||||
return cur; // NULL if not found
|
||||
return cur; /* NULL if not found */
|
||||
}
|
||||
|
||||
static bool aperture_is_valid(void* app_base, void* app_limit){
|
||||
static bool aperture_is_valid(void *app_base, void *app_limit)
|
||||
{
|
||||
if (app_base && app_limit && app_base < app_limit)
|
||||
return true;
|
||||
return false;
|
||||
@@ -213,329 +244,405 @@ static bool aperture_is_valid(void* app_base, void* app_limit){
|
||||
/*
|
||||
* Assumes that fmm_mutex is locked on entry.
|
||||
*/
|
||||
static void aperture_release_area(manageble_aperture_t* app, void* address, uint64_t MemorySizeInBytes){
|
||||
vm_area_t* area;
|
||||
static void aperture_release_area(manageble_aperture_t *app, void *address,
|
||||
uint64_t MemorySizeInBytes)
|
||||
{
|
||||
vm_area_t *area;
|
||||
uint64_t SizeOfRegion;
|
||||
|
||||
area = vm_find(app, address);
|
||||
if(area) {
|
||||
if(VOID_PTRS_SUB(area->end, area->start) + 1 > MemorySizeInBytes) { // the size of the released block is less than the size of area
|
||||
if(area->start == address) { // shrink from the start
|
||||
area->start = VOID_PTR_ADD(area->start,MemorySizeInBytes);
|
||||
} else if(VOID_PTRS_SUB(area->end, address) + 1 == MemorySizeInBytes) { // shrink from the end
|
||||
area->end = VOID_PTR_SUB(area->end, MemorySizeInBytes);
|
||||
} else { // split the area
|
||||
vm_split_area(app, area, address, MemorySizeInBytes);
|
||||
}
|
||||
} else if(VOID_PTRS_SUB(area->end, area->start) + 1 == MemorySizeInBytes) { // the size of the released block is exactly the same as the size of area
|
||||
vm_remove_area(app, area);
|
||||
}
|
||||
if (!area)
|
||||
return;
|
||||
|
||||
SizeOfRegion = VOID_PTRS_SUB(area->end, area->start) + 1;
|
||||
|
||||
/* check if block is whole region or part of it */
|
||||
if (SizeOfRegion == MemorySizeInBytes) {
|
||||
vm_remove_area(app, area);
|
||||
} else if (SizeOfRegion > MemorySizeInBytes) {
|
||||
/* shrink from the start */
|
||||
if (area->start == address)
|
||||
area->start =
|
||||
VOID_PTR_ADD(area->start, MemorySizeInBytes);
|
||||
/* shrink from the end */
|
||||
else if (VOID_PTRS_SUB(area->end, address) + 1 ==
|
||||
MemorySizeInBytes)
|
||||
area->end = VOID_PTR_SUB(area->end, MemorySizeInBytes);
|
||||
/* split the area */
|
||||
else
|
||||
vm_split_area(app, area, address, MemorySizeInBytes);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* returns allocated address or NULL. Assumes, that fmm_mutex is locked on entry.
|
||||
* returns allocated address or NULL. Assumes, that fmm_mutex is locked
|
||||
* on entry.
|
||||
*/
|
||||
static void* aperture_allocate_area(manageble_aperture_t* app, uint64_t MemorySizeInBytes, uint64_t offset){
|
||||
vm_area_t* cur, *next, *new_area, *start;
|
||||
void* new_address = NULL;
|
||||
static void *aperture_allocate_area(manageble_aperture_t *app,
|
||||
uint64_t MemorySizeInBytes,
|
||||
uint64_t offset)
|
||||
{
|
||||
vm_area_t *cur, *next, *new_area, *start;
|
||||
void *new_address = NULL;
|
||||
|
||||
next = NULL;
|
||||
new_area = NULL;
|
||||
|
||||
cur = app->vm_ranges;
|
||||
if(cur) { // not empty
|
||||
|
||||
// Look up the appropriate address space "hole" or end of the list
|
||||
if (cur) { /* not empty */
|
||||
/*
|
||||
* Look up the appropriate address space "hole" or end of
|
||||
* the list
|
||||
*/
|
||||
while (cur) {
|
||||
next = cur->next;
|
||||
|
||||
// End of the list reached
|
||||
if(!next)
|
||||
/* End of the list reached */
|
||||
if (!next)
|
||||
break;
|
||||
|
||||
// address space "hole"
|
||||
if((VOID_PTRS_SUB(next->start,cur->end) >= MemorySizeInBytes))
|
||||
/* address space "hole" */
|
||||
if ((VOID_PTRS_SUB(next->start, cur->end) >=
|
||||
MemorySizeInBytes))
|
||||
break;
|
||||
|
||||
cur = next;
|
||||
};
|
||||
|
||||
// If the new range is inside the reserved aperture
|
||||
if(VOID_PTRS_SUB(app->limit, cur->end) + 1 >= MemorySizeInBytes) {
|
||||
// cur points to the last inspected element: the tail of the list or the found "hole"
|
||||
// Just extend the existing region
|
||||
/* If the new range is inside the reserved aperture */
|
||||
if (VOID_PTRS_SUB(app->limit, cur->end) + 1 >=
|
||||
MemorySizeInBytes) {
|
||||
/*
|
||||
* cur points to the last inspected element: the tail
|
||||
* of the list or the found "hole".
|
||||
* Just extend the existing region
|
||||
*/
|
||||
new_address = VOID_PTR_ADD(cur->end, 1);
|
||||
cur->end = VOID_PTR_ADD(cur->end, MemorySizeInBytes);
|
||||
} else new_address = NULL;
|
||||
|
||||
} else { // empty - create the first area
|
||||
start = VOID_PTR_ADD(app->base, offset); // Some offset from the base
|
||||
new_area = vm_create_and_init_area(start, VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
|
||||
if(new_area) {
|
||||
} else {
|
||||
new_address = NULL;
|
||||
}
|
||||
} else { /* empty - create the first area */
|
||||
/* Some offset from the base */
|
||||
start = VOID_PTR_ADD(app->base, offset);
|
||||
new_area = vm_create_and_init_area(start,
|
||||
VOID_PTR_ADD(start, (MemorySizeInBytes - 1)));
|
||||
if (new_area) {
|
||||
app->vm_ranges = new_area;
|
||||
new_address = new_area->start;
|
||||
}
|
||||
}
|
||||
|
||||
return new_address;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* returns 0 on success. Assumes, that fmm_mutex is locked on entry.
|
||||
*/
|
||||
static int aperture_allocate_object(manageble_aperture_t* app, void* new_address, uint64_t handle, uint64_t MemorySizeInBytes){
|
||||
vm_object_t* new_object;
|
||||
/* returns 0 on success. Assumes, that fmm_mutex is locked on entry */
|
||||
static int aperture_allocate_object(manageble_aperture_t *app,
|
||||
void *new_address,
|
||||
uint64_t handle,
|
||||
uint64_t MemorySizeInBytes)
|
||||
{
|
||||
vm_object_t *new_object;
|
||||
|
||||
// Allocate new object
|
||||
new_object = vm_create_and_init_object(new_address, MemorySizeInBytes, handle);
|
||||
if(!new_object)
|
||||
/* Allocate new object */
|
||||
new_object = vm_create_and_init_object(new_address,
|
||||
MemorySizeInBytes,
|
||||
handle);
|
||||
if (!new_object)
|
||||
return -1;
|
||||
|
||||
if(app->vm_objects == NULL ) { // empty list
|
||||
// Update head
|
||||
app->vm_objects = new_object;
|
||||
} else {
|
||||
// Add it before the first element
|
||||
/* check for non-empty list */
|
||||
if (app->vm_objects != NULL)
|
||||
/* Add it before the first element */
|
||||
vm_add_object_before(app->vm_objects, new_object);
|
||||
// Update head
|
||||
app->vm_objects = new_object;
|
||||
}
|
||||
|
||||
app->vm_objects = new_object; /* Update head */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id){
|
||||
static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
|
||||
if(gpu_mem[i].gpu_id == gpu_id)
|
||||
for (i = 0 ; i < NUM_OF_SUPPORTED_GPUS ; i++)
|
||||
if (gpu_mem[i].gpu_id == gpu_id)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool fmm_is_inside_some_aperture(void* address){
|
||||
|
||||
bool fmm_is_inside_some_aperture(void *address)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
|
||||
if(gpu_mem[i].gpu_id != NON_VALID_GPU_ID){
|
||||
if ((address>= gpu_mem[i].lds_aperture.base) && (address<= gpu_mem[i].lds_aperture.limit))
|
||||
return true;
|
||||
if ((address>= gpu_mem[i].gpuvm_aperture.base) && (address<= gpu_mem[i].gpuvm_aperture.limit))
|
||||
return true;
|
||||
if ((address>= gpu_mem[i].scratch_aperture.base) && (address<= gpu_mem[i].scratch_aperture.limit))
|
||||
return true;
|
||||
}
|
||||
for (i = 0 ; i < NUM_OF_SUPPORTED_GPUS ; i++) {
|
||||
if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
|
||||
continue;
|
||||
if ((address >= gpu_mem[i].lds_aperture.base) &&
|
||||
(address <= gpu_mem[i].lds_aperture.limit))
|
||||
return true;
|
||||
if ((address >= gpu_mem[i].gpuvm_aperture.base) &&
|
||||
(address <= gpu_mem[i].gpuvm_aperture.limit))
|
||||
return true;
|
||||
if ((address >= gpu_mem[i].scratch_aperture.base) &&
|
||||
(address <= gpu_mem[i].scratch_aperture.limit))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_PRINT_APERTURE
|
||||
static void aperture_print(aperture_t* app){
|
||||
static void aperture_print(aperture_t *app)
|
||||
{
|
||||
printf("\t Base: %p\n", app->base);
|
||||
printf("\t Limit: %p\n", app->limit);
|
||||
}
|
||||
|
||||
static void manageble_aperture_print(manageble_aperture_t* app){
|
||||
vm_area_t* cur = app->vm_ranges;
|
||||
static void manageble_aperture_print(manageble_aperture_t *app)
|
||||
{
|
||||
vm_area_t *cur = app->vm_ranges;
|
||||
vm_object_t *object = app->vm_objects;
|
||||
|
||||
printf("\t Base: %p\n", app->base);
|
||||
printf("\t Limit: %p\n", app->limit);
|
||||
printf("\t Ranges: \n");
|
||||
while(cur){
|
||||
printf("\t\t Range [%p - %p] \n", cur->start, cur->end);
|
||||
printf("\t Ranges:\n");
|
||||
while (cur) {
|
||||
printf("\t\t Range [%p - %p]\n", cur->start, cur->end);
|
||||
cur = cur->next;
|
||||
};
|
||||
printf("\t Objects: \n");
|
||||
while(object){
|
||||
printf("\t\t Object [%p - %" PRIu64 "] \n", object->start, object->size);
|
||||
printf("\t Objects:\n");
|
||||
while (object) {
|
||||
printf("\t\t Object [%p - %" PRIu64 "]\n",
|
||||
object->start, object->size);
|
||||
object = object->next;
|
||||
};
|
||||
}
|
||||
|
||||
void fmm_print(uint32_t gpu_id){
|
||||
void fmm_print(uint32_t gpu_id)
|
||||
{
|
||||
int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
if(i >= 0){ // Found
|
||||
printf("LDS aperture: \n");
|
||||
aperture_print(&gpu_mem[i].lds_aperture);
|
||||
printf("GPUVM aperture: \n");
|
||||
manageble_aperture_print(&gpu_mem[i].gpuvm_aperture);
|
||||
printf("Scratch aperture: \n");
|
||||
manageble_aperture_print(&gpu_mem[i].scratch_aperture);
|
||||
|
||||
if (i >= 0) { /* Found */
|
||||
printf("LDS aperture:\n");
|
||||
aperture_print(&gpu_mem[i].lds_aperture);
|
||||
printf("GPUVM aperture:\n");
|
||||
manageble_aperture_print(&gpu_mem[i].gpuvm_aperture);
|
||||
printf("Scratch aperture:\n");
|
||||
manageble_aperture_print(&gpu_mem[i].scratch_aperture);
|
||||
}
|
||||
}
|
||||
#else
|
||||
void fmm_print(uint32_t gpu_id){
|
||||
|
||||
void fmm_print(uint32_t gpu_id)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes){
|
||||
// Not supported yet
|
||||
void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
{
|
||||
/* Not supported yet */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The offset from GPUVM aperture base address to ensure that address 0 (after base subtraction) won't be used
|
||||
/*
|
||||
* The offset from GPUVM aperture base address to ensure that address 0
|
||||
* (after base subtraction) won't be used
|
||||
*/
|
||||
#define GPUVM_APP_OFFSET 0x10000
|
||||
void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes){
|
||||
|
||||
void* mem = NULL;
|
||||
void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
{
|
||||
void *mem = NULL;
|
||||
int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
|
||||
// If not found or aperture isn't properly initialized/supported
|
||||
if(i < 0 || !aperture_is_valid(gpu_mem[i].gpuvm_aperture.base, gpu_mem[i].gpuvm_aperture.limit))
|
||||
return NULL ;
|
||||
/* If not found or aperture isn't properly initialized/supported */
|
||||
if (i < 0 ||
|
||||
!aperture_is_valid(gpu_mem[i].gpuvm_aperture.base,
|
||||
gpu_mem[i].gpuvm_aperture.limit))
|
||||
return NULL;
|
||||
|
||||
// Allocate address space
|
||||
/* Allocate address space */
|
||||
pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
mem = aperture_allocate_area(&gpu_mem[i].gpuvm_aperture, MemorySizeInBytes, GPUVM_APP_OFFSET);
|
||||
mem = aperture_allocate_area(&gpu_mem[i].gpuvm_aperture,
|
||||
MemorySizeInBytes, GPUVM_APP_OFFSET);
|
||||
pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
void* fmm_open_graphic_handle(uint32_t gpu_id,
|
||||
void *fmm_open_graphic_handle(uint32_t gpu_id,
|
||||
int32_t graphic_device_handle,
|
||||
uint32_t graphic_handle,
|
||||
uint64_t MemorySizeInBytes){
|
||||
uint64_t MemorySizeInBytes)
|
||||
{
|
||||
|
||||
void* mem = NULL;
|
||||
void *mem = NULL;
|
||||
int32_t i = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
struct kfd_ioctl_open_graphic_handle_args open_graphic_handle_args;
|
||||
struct kfd_ioctl_unmap_memory_from_gpu_args unmap_args;
|
||||
|
||||
// If not found or aperture isn't properly initialized/supported
|
||||
if (i < 0 || !aperture_is_valid(gpu_mem[i].gpuvm_aperture.base, gpu_mem[i].gpuvm_aperture.limit))
|
||||
/* If not found or aperture isn't properly initialized/supported */
|
||||
if (i < 0 || !aperture_is_valid(gpu_mem[i].gpuvm_aperture.base,
|
||||
gpu_mem[i].gpuvm_aperture.limit))
|
||||
return NULL;
|
||||
|
||||
pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
// Allocate address space
|
||||
mem = aperture_allocate_area(&gpu_mem[i].gpuvm_aperture, MemorySizeInBytes, GPUVM_APP_OFFSET);
|
||||
/* Allocate address space */
|
||||
mem = aperture_allocate_area(&gpu_mem[i].gpuvm_aperture,
|
||||
MemorySizeInBytes, GPUVM_APP_OFFSET);
|
||||
if (!mem)
|
||||
goto out;
|
||||
|
||||
// Allocate local memory
|
||||
/* Allocate local memory */
|
||||
open_graphic_handle_args.gpu_id = gpu_id;
|
||||
open_graphic_handle_args.graphic_device_fd = graphic_device_handle;
|
||||
open_graphic_handle_args.graphic_handle = graphic_handle;
|
||||
open_graphic_handle_args.va_addr = VOID_PTRS_SUB(mem, gpu_mem[i].gpuvm_aperture.base);
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_OPEN_GRAPHIC_HANDLE, &open_graphic_handle_args))
|
||||
open_graphic_handle_args.va_addr =
|
||||
VOID_PTRS_SUB(mem, gpu_mem[i].gpuvm_aperture.base);
|
||||
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_OPEN_GRAPHIC_HANDLE,
|
||||
&open_graphic_handle_args))
|
||||
goto release_area;
|
||||
|
||||
// Allocate object
|
||||
if (aperture_allocate_object(&gpu_mem[i].gpuvm_aperture, mem, open_graphic_handle_args.handle, MemorySizeInBytes))
|
||||
/* Allocate object */
|
||||
if (aperture_allocate_object(&gpu_mem[i].gpuvm_aperture, mem,
|
||||
open_graphic_handle_args.handle,
|
||||
MemorySizeInBytes))
|
||||
goto release_mem;
|
||||
|
||||
pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
|
||||
// That's all. Just return the new address
|
||||
/* That's all. Just return the new address */
|
||||
return mem;
|
||||
|
||||
release_mem:
|
||||
unmap_args.handle = open_graphic_handle_args.handle;
|
||||
kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &unmap_args);
|
||||
release_area:
|
||||
aperture_release_area(&gpu_mem[i].gpuvm_aperture, mem, MemorySizeInBytes);
|
||||
aperture_release_area(&gpu_mem[i].gpuvm_aperture, mem,
|
||||
MemorySizeInBytes);
|
||||
out:
|
||||
pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
|
||||
return NULL ;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
void fmm_release(void* address, uint64_t MemorySizeInBytes){
|
||||
|
||||
void fmm_release(void *address, uint64_t MemorySizeInBytes)
|
||||
{
|
||||
uint32_t i;
|
||||
bool found = false;
|
||||
|
||||
for (i = 0; i < NUM_OF_SUPPORTED_GPUS && !found; i++) {
|
||||
if(gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
|
||||
for (i = 0 ; i < NUM_OF_SUPPORTED_GPUS && !found ; i++) {
|
||||
if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
|
||||
continue;
|
||||
|
||||
if(address >= gpu_mem[i].gpuvm_aperture.base && address <= gpu_mem[i].gpuvm_aperture.limit) {
|
||||
if (address >= gpu_mem[i].gpuvm_aperture.base &&
|
||||
address <= gpu_mem[i].gpuvm_aperture.limit) {
|
||||
found = true;
|
||||
pthread_mutex_lock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
aperture_release_area(&gpu_mem[i].gpuvm_aperture, address, MemorySizeInBytes);
|
||||
aperture_release_area(&gpu_mem[i].gpuvm_aperture, address,
|
||||
MemorySizeInBytes);
|
||||
pthread_mutex_unlock(&gpu_mem[i].gpuvm_aperture.fmm_mutex);
|
||||
fmm_print(gpu_mem[i].gpu_id);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// If memory address isn't inside of any defined aperture - it refers to the system memory
|
||||
if (!found) {
|
||||
/*
|
||||
* If memory address isn't inside of any defined aperture - it refers
|
||||
* to the system memory
|
||||
*/
|
||||
if (!found)
|
||||
free(address);
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS fmm_init_process_apertures(){
|
||||
HSAKMT_STATUS fmm_init_process_apertures(void)
|
||||
{
|
||||
struct kfd_ioctl_get_process_apertures_args args;
|
||||
uint8_t node_id;
|
||||
|
||||
if (0 == kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, (void*)&args)){
|
||||
for(node_id = 0; node_id < args.num_of_nodes; node_id++){
|
||||
gpu_mem[node_id].gpu_id = args.process_apertures[node_id].gpu_id;
|
||||
gpu_mem[node_id].lds_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_base);
|
||||
gpu_mem[node_id].lds_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_limit);
|
||||
gpu_mem[node_id].gpuvm_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_base);
|
||||
gpu_mem[node_id].gpuvm_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_limit);
|
||||
gpu_mem[node_id].scratch_aperture.base = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_base);
|
||||
gpu_mem[node_id].scratch_aperture.limit = PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_limit);
|
||||
}
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, (void *) &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
for (node_id = 0 ; node_id < args.num_of_nodes ; node_id++) {
|
||||
gpu_mem[node_id].gpu_id =
|
||||
args.process_apertures[node_id].gpu_id;
|
||||
|
||||
gpu_mem[node_id].lds_aperture.base =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_base);
|
||||
|
||||
gpu_mem[node_id].lds_aperture.limit =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_limit);
|
||||
|
||||
gpu_mem[node_id].gpuvm_aperture.base =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_base);
|
||||
|
||||
gpu_mem[node_id].gpuvm_aperture.limit =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].gpuvm_limit);
|
||||
|
||||
gpu_mem[node_id].scratch_aperture.base =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_base);
|
||||
|
||||
gpu_mem[node_id].scratch_aperture.limit =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].scratch_limit);
|
||||
}
|
||||
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id){
|
||||
HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id)
|
||||
{
|
||||
int32_t slot = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
if (slot<0)
|
||||
|
||||
if (slot < 0)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
switch(aperture_type){
|
||||
switch (aperture_type) {
|
||||
case FMM_GPUVM:
|
||||
return aperture_is_valid(gpu_mem[slot].gpuvm_aperture.base, gpu_mem[slot].gpuvm_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].gpuvm_aperture.base) : 0;
|
||||
return aperture_is_valid(gpu_mem[slot].gpuvm_aperture.base,
|
||||
gpu_mem[slot].gpuvm_aperture.limit) ?
|
||||
PORT_VPTR_TO_UINT64(gpu_mem[slot].gpuvm_aperture.base) : 0;
|
||||
break;
|
||||
|
||||
case FMM_SCRATCH:
|
||||
return aperture_is_valid(gpu_mem[slot].scratch_aperture.base, gpu_mem[slot].scratch_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].scratch_aperture.base) : 0;
|
||||
return aperture_is_valid(gpu_mem[slot].scratch_aperture.base,
|
||||
gpu_mem[slot].scratch_aperture.limit) ?
|
||||
PORT_VPTR_TO_UINT64(gpu_mem[slot].scratch_aperture.base) : 0;
|
||||
break;
|
||||
|
||||
case FMM_LDS:
|
||||
return aperture_is_valid(gpu_mem[slot].lds_aperture.base, gpu_mem[slot].lds_aperture.limit) ? PORT_VPTR_TO_UINT64(gpu_mem[slot].lds_aperture.base) : 0;
|
||||
return aperture_is_valid(gpu_mem[slot].lds_aperture.base,
|
||||
gpu_mem[slot].lds_aperture.limit) ?
|
||||
PORT_VPTR_TO_UINT64(gpu_mem[slot].lds_aperture.base) : 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
static bool _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t* aperture, void* address, uint64_t size, uint64_t* gpuvm_address) {
|
||||
|
||||
static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
|
||||
void *address, uint64_t size,
|
||||
uint64_t *gpuvm_address)
|
||||
{
|
||||
struct kfd_ioctl_map_memory_to_gpu_args args;
|
||||
struct kfd_ioctl_unmap_memory_from_gpu_args unmap_args;
|
||||
|
||||
// Check that address space was previously reserved
|
||||
/* Check that address space was previously reserved */
|
||||
if (vm_find(aperture, address) == NULL)
|
||||
return false;
|
||||
return -1;
|
||||
|
||||
// Allocate local memory
|
||||
/* Allocate local memory */
|
||||
args.gpu_id = gpu_id;
|
||||
args.size = size;
|
||||
args.va_addr = VOID_PTRS_SUB(address, aperture->base); //va_addr is 40 bit GPUVM address
|
||||
if(kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args))
|
||||
return false;
|
||||
|
||||
// Allocate object
|
||||
/* va_addr is 40 bit GPUVM address */
|
||||
args.va_addr = VOID_PTRS_SUB(address, aperture->base);
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args))
|
||||
goto err_map_ioctl_failed;
|
||||
|
||||
/* Allocate object */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
if (aperture_allocate_object(aperture, address, args.handle, size))
|
||||
goto err_object_allocation_failed;
|
||||
@@ -543,49 +650,54 @@ static bool _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t* aperture, voi
|
||||
|
||||
*gpuvm_address = args.va_addr;
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
|
||||
err_object_allocation_failed:
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
unmap_args.handle = args.handle;
|
||||
kmtIoctl(kfd_fd, AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, &unmap_args);
|
||||
|
||||
err_map_ioctl_failed:
|
||||
*gpuvm_address = 0;
|
||||
return false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool fmm_map_to_gpu(void* address, uint64_t size, uint64_t* gpuvm_address) {
|
||||
|
||||
int fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address)
|
||||
{
|
||||
int32_t i;
|
||||
uint64_t pi;
|
||||
|
||||
// Find an aperture the requested address belongs to
|
||||
for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
|
||||
if(gpu_mem[i].gpu_id != NON_VALID_GPU_ID){
|
||||
if ((address>= gpu_mem[i].gpuvm_aperture.base) && (address<= gpu_mem[i].gpuvm_aperture.limit)) {
|
||||
// map it
|
||||
return _fmm_map_to_gpu(gpu_mem[i].gpu_id, &gpu_mem[i].gpuvm_aperture, address, size, gpuvm_address);
|
||||
}
|
||||
}
|
||||
/* Find an aperture the requested address belongs to */
|
||||
for (i = 0; i < NUM_OF_SUPPORTED_GPUS; i++) {
|
||||
if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
|
||||
continue;
|
||||
|
||||
if ((address >= gpu_mem[i].gpuvm_aperture.base) &&
|
||||
(address <= gpu_mem[i].gpuvm_aperture.limit))
|
||||
/* map it */
|
||||
return _fmm_map_to_gpu(gpu_mem[i].gpu_id,
|
||||
&gpu_mem[i].gpuvm_aperture,
|
||||
address, size, gpuvm_address);
|
||||
}
|
||||
|
||||
// If address isn't Local memory address, we assume that this is
|
||||
// system memory address accessed through IOMMU.
|
||||
// Thus we "prefetch" it
|
||||
for(pi = 0; pi < size / PAGE_SIZE; pi++) {
|
||||
((char*)address)[pi*PAGE_SIZE] = 0;
|
||||
}
|
||||
return true;
|
||||
/*
|
||||
* If address isn't Local memory address, we assume that this is
|
||||
* system memory address accessed through IOMMU. Thus we "prefetch" it
|
||||
*/
|
||||
for (pi = 0; pi < size / PAGE_SIZE; pi++)
|
||||
((char *) address)[pi * PAGE_SIZE] = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool _fmm_unmap_from_gpu(manageble_aperture_t* aperture, void* address) {
|
||||
|
||||
vm_object_t* object;
|
||||
static int _fmm_unmap_from_gpu(manageble_aperture_t *aperture, void *address)
|
||||
{
|
||||
vm_object_t *object;
|
||||
struct kfd_ioctl_unmap_memory_from_gpu_args args;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
// Find the object to retrieve the handle
|
||||
/* Find the object to retrieve the handle */
|
||||
object = vm_find_object_by_address(aperture, address, 0);
|
||||
if (!object)
|
||||
goto err;
|
||||
@@ -596,27 +708,28 @@ static bool _fmm_unmap_from_gpu(manageble_aperture_t* aperture, void* address) {
|
||||
vm_remove_object(aperture, object);
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return true;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return false;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool fmm_unmap_from_gpu(void* address) {
|
||||
|
||||
int fmm_unmap_from_gpu(void *address)
|
||||
{
|
||||
int32_t i;
|
||||
|
||||
// Find the aperture the requested address belongs to
|
||||
for(i = 0; i < NUM_OF_SUPPORTED_GPUS; i++){
|
||||
if(gpu_mem[i].gpu_id != NON_VALID_GPU_ID){
|
||||
if ((address>= gpu_mem[i].gpuvm_aperture.base) && (address<= gpu_mem[i].gpuvm_aperture.limit)) {
|
||||
// unmap it
|
||||
return _fmm_unmap_from_gpu(&gpu_mem[i].gpuvm_aperture, address);
|
||||
}
|
||||
}
|
||||
/* Find the aperture the requested address belongs to */
|
||||
for (i = 0; i < NUM_OF_SUPPORTED_GPUS; i++) {
|
||||
if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
|
||||
continue;
|
||||
|
||||
if ((address >= gpu_mem[i].gpuvm_aperture.base) &&
|
||||
(address <= gpu_mem[i].gpuvm_aperture.limit))
|
||||
/* unmap it */
|
||||
return _fmm_unmap_from_gpu(&gpu_mem[i].gpuvm_aperture,
|
||||
address);
|
||||
}
|
||||
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -56,8 +56,8 @@ void* fmm_open_graphic_handle(uint32_t gpu_id,
|
||||
void fmm_print(uint32_t node);
|
||||
bool fmm_is_inside_some_aperture(void* address);
|
||||
void fmm_release(void* address, HSAuint64 MemorySizeInBytes);
|
||||
bool fmm_map_to_gpu(void* address, uint64_t size, uint64_t* gpuvm_address);
|
||||
bool fmm_unmap_from_gpu(void* address);
|
||||
int fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
|
||||
int fmm_unmap_from_gpu(void *address);
|
||||
|
||||
/* Topology interface*/
|
||||
HSAKMT_STATUS fmm_node_added(HSAuint32 gpu_id);
|
||||
|
||||
@@ -37,13 +37,14 @@
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtSetMemoryPolicy(
|
||||
HSAuint32 Node,
|
||||
HSAuint32 DefaultPolicy,
|
||||
HSAuint32 AlternatePolicy,
|
||||
void* MemoryAddressAlternate,
|
||||
HSAuint64 MemorySizeInBytes
|
||||
)
|
||||
HSAuint32 Node,
|
||||
HSAuint32 DefaultPolicy,
|
||||
HSAuint32 AlternatePolicy,
|
||||
void *MemoryAddressAlternate,
|
||||
HSAuint64 MemorySizeInBytes
|
||||
)
|
||||
{
|
||||
struct kfd_ioctl_set_memory_policy_args args;
|
||||
HSAKMT_STATUS result;
|
||||
uint32_t gpu_id;
|
||||
|
||||
@@ -53,23 +54,31 @@ hsaKmtSetMemoryPolicy(
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
// We accept any legal policy and alternate address location. You get CC everywhere anyway.
|
||||
if ((DefaultPolicy != HSA_CACHING_CACHED && DefaultPolicy != HSA_CACHING_NONCACHED)
|
||||
|| (AlternatePolicy != HSA_CACHING_CACHED && AlternatePolicy != HSA_CACHING_NONCACHED))
|
||||
{
|
||||
/*
|
||||
* We accept any legal policy and alternate address location.
|
||||
* You get CC everywhere anyway.
|
||||
*/
|
||||
if ((DefaultPolicy != HSA_CACHING_CACHED &&
|
||||
DefaultPolicy != HSA_CACHING_NONCACHED) ||
|
||||
(AlternatePolicy != HSA_CACHING_CACHED &&
|
||||
AlternatePolicy != HSA_CACHING_NONCACHED))
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
CHECK_PAGE_MULTIPLE(MemoryAddressAlternate);
|
||||
CHECK_PAGE_MULTIPLE(MemorySizeInBytes);
|
||||
|
||||
struct kfd_ioctl_set_memory_policy_args args;
|
||||
memset(&args, 0, sizeof(args));
|
||||
|
||||
args.gpu_id = gpu_id;
|
||||
args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT;
|
||||
args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ? KFD_IOC_CACHE_POLICY_COHERENT : KFD_IOC_CACHE_POLICY_NONCOHERENT;
|
||||
args.alternate_aperture_base = (uintptr_t)MemoryAddressAlternate;
|
||||
args.default_policy = (DefaultPolicy == HSA_CACHING_CACHED) ?
|
||||
KFD_IOC_CACHE_POLICY_COHERENT :
|
||||
KFD_IOC_CACHE_POLICY_NONCOHERENT;
|
||||
|
||||
args.alternate_policy = (AlternatePolicy == HSA_CACHING_CACHED) ?
|
||||
KFD_IOC_CACHE_POLICY_COHERENT :
|
||||
KFD_IOC_CACHE_POLICY_NONCOHERENT;
|
||||
|
||||
args.alternate_aperture_base = (uintptr_t) MemoryAddressAlternate;
|
||||
args.alternate_aperture_size = MemorySizeInBytes;
|
||||
|
||||
int err = kmtIoctl(kfd_fd, AMDKFD_IOC_SET_MEMORY_POLICY, &args);
|
||||
@@ -79,46 +88,51 @@ hsaKmtSetMemoryPolicy(
|
||||
|
||||
static HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags)
|
||||
{
|
||||
switch (pageSizeFlags)
|
||||
{
|
||||
switch (pageSizeFlags) {
|
||||
case HSA_PAGE_SIZE_4KB: return 4*1024;
|
||||
case HSA_PAGE_SIZE_64KB: return 64*1024;
|
||||
case HSA_PAGE_SIZE_2MB: return 2*1024*1024;
|
||||
case HSA_PAGE_SIZE_1GB: return 1024*1024*1024;
|
||||
default: assert(false); return 4*1024;
|
||||
default:
|
||||
assert(false);
|
||||
return 4*1024;
|
||||
}
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtAllocMemory(
|
||||
HSAuint32 PreferredNode, //IN
|
||||
HSAuint64 SizeInBytes, //IN (multiple of page size)
|
||||
HsaMemFlags MemFlags, //IN
|
||||
void** MemoryAddress //OUT (page-aligned)
|
||||
)
|
||||
HSAuint32 PreferredNode, /* IN */
|
||||
HSAuint64 SizeInBytes, /* IN (multiple of page size) */
|
||||
HsaMemFlags MemFlags, /* IN */
|
||||
void **MemoryAddress /* OUT (page-aligned) */
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
HSAKMT_STATUS result;
|
||||
HSAuint64 page_size;
|
||||
uint32_t gpu_id;
|
||||
int err;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
result = validate_nodeid(PreferredNode, &gpu_id);
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
// The required size should be page aligned (GDS?)
|
||||
HSAuint64 page_size = PageSizeFromFlags(MemFlags.ui32.PageSize);
|
||||
if ((SizeInBytes & (page_size-1)) && !MemFlags.ui32.GDSMemory){
|
||||
/* The required size should be page aligned (GDS?) */
|
||||
page_size = PageSizeFromFlags(MemFlags.ui32.PageSize);
|
||||
if ((SizeInBytes & (page_size-1)) && !MemFlags.ui32.GDSMemory)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged) {
|
||||
err = posix_memalign(MemoryAddress, page_size, SizeInBytes);
|
||||
if (err != 0)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
if (MemFlags.ui32.ExecuteAccess) {
|
||||
err = mprotect(*MemoryAddress, SizeInBytes, PROT_READ | PROT_WRITE | PROT_EXEC);
|
||||
err = mprotect(*MemoryAddress, SizeInBytes,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC);
|
||||
|
||||
if (err != 0) {
|
||||
free(*MemoryAddress);
|
||||
return err;
|
||||
@@ -127,10 +141,12 @@ hsaKmtAllocMemory(
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
if(!MemFlags.ui32.HostAccess && MemFlags.ui32.NonPaged){
|
||||
*MemoryAddress = fmm_allocate_device(gpu_id, SizeInBytes);
|
||||
if (!MemFlags.ui32.HostAccess && MemFlags.ui32.NonPaged) {
|
||||
*MemoryAddress = fmm_allocate_device(gpu_id, SizeInBytes);
|
||||
|
||||
if (*MemoryAddress == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -140,22 +156,22 @@ hsaKmtAllocMemory(
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtFreeMemory(
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 SizeInBytes //IN
|
||||
)
|
||||
void *MemoryAddress, /* IN (page-aligned) */
|
||||
HSAuint64 SizeInBytes /* IN */
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
fmm_release( MemoryAddress, SizeInBytes);
|
||||
fmm_release(MemoryAddress, SizeInBytes);
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtRegisterMemory(
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes //IN (page-aligned)
|
||||
)
|
||||
void *MemoryAddress, /* IN (page-aligned) */
|
||||
HSAuint64 MemorySizeInBytes /* IN (page-aligned) */
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -165,8 +181,8 @@ hsaKmtRegisterMemory(
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtDeregisterMemory(
|
||||
void* MemoryAddress //IN
|
||||
)
|
||||
void *MemoryAddress /* IN */
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -176,50 +192,47 @@ hsaKmtDeregisterMemory(
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtMapMemoryToGPU(
|
||||
void* MemoryAddress, //IN (page-aligned)
|
||||
HSAuint64 MemorySizeInBytes, //IN (page-aligned)
|
||||
HSAuint64* AlternateVAGPU //OUT (page-aligned)
|
||||
)
|
||||
void *MemoryAddress, /* IN (page-aligned) */
|
||||
HSAuint64 MemorySizeInBytes, /* IN (page-aligned) */
|
||||
HSAuint64 *AlternateVAGPU /* OUT (page-aligned) */
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
if (AlternateVAGPU)
|
||||
*AlternateVAGPU = 0;
|
||||
|
||||
if (fmm_map_to_gpu(MemoryAddress, MemorySizeInBytes, AlternateVAGPU)){
|
||||
if (!fmm_map_to_gpu(MemoryAddress, MemorySizeInBytes, AlternateVAGPU))
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
else {
|
||||
else
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUnmapMemoryToGPU(
|
||||
void* MemoryAddress //IN (page-aligned)
|
||||
)
|
||||
void *MemoryAddress /* IN (page-aligned) */
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
if (fmm_unmap_from_gpu(MemoryAddress))
|
||||
if (!fmm_unmap_from_gpu(MemoryAddress))
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
else
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtMapGraphicHandle(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint64 GraphicDeviceHandle, //IN
|
||||
HSAuint64 GraphicResourceHandle, //IN
|
||||
HSAuint64 GraphicResourceOffset, //IN
|
||||
HSAuint64 GraphicResourceSize, //IN
|
||||
HSAuint64* FlatMemoryAddress //OUT
|
||||
)
|
||||
HSAuint32 NodeId, /* IN */
|
||||
HSAuint64 GraphicDeviceHandle, /* IN */
|
||||
HSAuint64 GraphicResourceHandle, /* IN */
|
||||
HSAuint64 GraphicResourceOffset, /* IN */
|
||||
HSAuint64 GraphicResourceSize, /* IN */
|
||||
HSAuint64 *FlatMemoryAddress /* OUT */
|
||||
)
|
||||
{
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
HSAKMT_STATUS result;
|
||||
uint32_t gpu_id;
|
||||
@@ -248,10 +261,11 @@ hsaKmtMapGraphicHandle(
|
||||
HSAKMT_STATUS
|
||||
HSAKMTAPI
|
||||
hsaKmtUnmapGraphicHandle(
|
||||
HSAuint32 NodeId, //IN
|
||||
HSAuint64 FlatMemoryAddress, //IN
|
||||
HSAuint64 SizeInBytes //IN
|
||||
)
|
||||
HSAuint32 NodeId, /* IN */
|
||||
HSAuint64 FlatMemoryAddress, /* IN */
|
||||
HSAuint64 SizeInBytes /* IN */
|
||||
)
|
||||
{
|
||||
|
||||
return hsaKmtUnmapMemoryToGPU(PORT_UINT64_TO_VPTR(FlatMemoryAddress));
|
||||
}
|
||||
|
||||
Referencia en una nueva incidencia
Block a user