Support gfx802 dGPU
Signed-off-by: Ben Goz <ben.goz@amd.com>
[ROCm/ROCR-Runtime commit: fb8378a18b]
Этот коммит содержится в:
@@ -249,7 +249,7 @@ struct kfd_ioctl_alloc_memory_of_gpu_args {
|
||||
uint64_t size; /* to KFD */
|
||||
uint64_t handle; /* from KFD */
|
||||
uint32_t gpu_id; /* to KFD */
|
||||
uint32_t pad;
|
||||
uint64_t mmap_offset; /* from KFD */
|
||||
};
|
||||
|
||||
struct kfd_ioctl_free_memory_of_gpu_args {
|
||||
@@ -273,6 +273,12 @@ struct kfd_ioctl_open_graphic_handle_args {
|
||||
uint32_t pad;
|
||||
};
|
||||
|
||||
struct kfd_ioctl_set_process_dgpu_aperture_args {
|
||||
uint32_t node_id;
|
||||
uint64_t dgpu_base;
|
||||
uint64_t dgpu_limit;
|
||||
};
|
||||
|
||||
#define AMDKFD_IOCTL_BASE 'K'
|
||||
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
|
||||
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
|
||||
@@ -342,13 +348,17 @@ struct kfd_ioctl_open_graphic_handle_args {
|
||||
#define AMDKFD_IOC_OPEN_GRAPHIC_HANDLE \
|
||||
AMDKFD_IOWR(0x15, struct kfd_ioctl_open_graphic_handle_args)
|
||||
|
||||
#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH \
|
||||
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
|
||||
#define AMDKFD_IOC_ALLOC_MEMORY_OF_SCRATCH \
|
||||
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
|
||||
|
||||
#define AMDKFD_IOC_SET_CU_MASK \
|
||||
AMDKFD_IOW(0x17, struct kfd_ioctl_set_cu_mask_args)
|
||||
|
||||
#define AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE \
|
||||
AMDKFD_IOW(0x18, struct kfd_ioctl_set_process_dgpu_aperture_args)
|
||||
|
||||
|
||||
#define AMDKFD_COMMAND_START 0x01
|
||||
#define AMDKFD_COMMAND_END 0x18
|
||||
#define AMDKFD_COMMAND_END 0x19
|
||||
|
||||
#endif
|
||||
|
||||
@@ -30,7 +30,9 @@
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <stdio.h>
|
||||
#include "linux/kfd_ioctl.h"
|
||||
#include "fmm.h"
|
||||
|
||||
static HSAuint64 *events_page = NULL;
|
||||
|
||||
@@ -70,6 +72,15 @@ hsaKmtCreateEvent(
|
||||
args.event_type = EventDesc->EventType;
|
||||
args.auto_reset = !ManualReset;
|
||||
|
||||
/* dGPU code */
|
||||
if (is_dgpu && events_page == NULL) {
|
||||
events_page = allocate_exec_aligned_memory_gpu(KFD_SIGNAL_EVENT_LIMIT * 8, 0x9000);
|
||||
if (!events_page) {
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
fmm_get_handle(events_page, &args.event_page_offset);
|
||||
}
|
||||
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_CREATE_EVENT, &args) != 0) {
|
||||
free(e);
|
||||
*Event = NULL;
|
||||
|
||||
@@ -92,10 +92,16 @@ typedef struct {
|
||||
manageble_aperture_t scratch_aperture;
|
||||
manageble_aperture_t scratch_physical;
|
||||
manageble_aperture_t gpuvm_aperture;
|
||||
manageble_aperture_t dgpu_aperture;
|
||||
} gpu_mem_t;
|
||||
|
||||
static gpu_mem_t gpu_mem[] = INIT_GPUs_MEM;
|
||||
|
||||
static HSAKMT_STATUS dgpu_mem_init(uint8_t node_id, void **base, void **limit);
|
||||
static int set_dgpu_aperture(uint32_t node_id, uint64_t base, uint64_t limit);
|
||||
static void __fmm_release(uint32_t gpu_id, void *address,
|
||||
uint64_t MemorySizeInBytes, manageble_aperture_t *aperture);
|
||||
|
||||
static vm_area_t *vm_create_and_init_area(void *start, void *end)
|
||||
{
|
||||
vm_area_t *area = (vm_area_t *) malloc(sizeof(vm_area_t));
|
||||
@@ -373,45 +379,24 @@ static int32_t gpu_mem_find_by_gpu_id(uint32_t gpu_id)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static manageble_aperture_t *find_valid_gpuvm_apperture_of_gpu(uint32_t gpu_id)
|
||||
{
|
||||
manageble_aperture_t *aperture;
|
||||
int32_t gpu_mem_id;
|
||||
|
||||
/* Retrieve gpu_mem id according to gpu_id */
|
||||
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
if (gpu_mem_id < 0)
|
||||
return NULL;
|
||||
|
||||
aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture;
|
||||
|
||||
/* Check that aperture is properly initialized/supported */
|
||||
if (!aperture_is_valid(aperture->base, aperture->limit))
|
||||
return NULL;
|
||||
|
||||
return aperture;
|
||||
}
|
||||
|
||||
static int fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
|
||||
uint64_t MemorySizeInBytes)
|
||||
uint64_t MemorySizeInBytes,
|
||||
manageble_aperture_t *aperture,
|
||||
uint64_t *mmap_offset)
|
||||
{
|
||||
struct kfd_ioctl_alloc_memory_of_gpu_args args;
|
||||
struct kfd_ioctl_free_memory_of_gpu_args free_args;
|
||||
manageble_aperture_t *aperture;
|
||||
|
||||
if (!mem)
|
||||
return -1;
|
||||
|
||||
/* Retrieve gpuvm aperture according to gpu_id */
|
||||
aperture = find_valid_gpuvm_apperture_of_gpu(gpu_id);
|
||||
if (!aperture)
|
||||
return -1;
|
||||
|
||||
/* Allocate memory from amdkfd */
|
||||
args.gpu_id = gpu_id;
|
||||
args.size = MemorySizeInBytes;
|
||||
|
||||
args.va_addr = VOID_PTRS_SUB(mem, aperture->base);
|
||||
args.va_addr = (uint64_t)mem;
|
||||
if (!mmap_offset)
|
||||
args.va_addr = VOID_PTRS_SUB(mem, aperture->base);
|
||||
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, &args))
|
||||
return -1;
|
||||
@@ -423,6 +408,9 @@ static int fmm_allocate_memory_in_device(uint32_t gpu_id, void *mem,
|
||||
goto err_object_allocation_failed;
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
if (mmap_offset)
|
||||
*mmap_offset = args.mmap_offset;
|
||||
|
||||
return 0;
|
||||
|
||||
err_object_allocation_failed:
|
||||
@@ -541,24 +529,10 @@ void *fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
return (void*)(((((uint64_t)mem) >> 16) + 1) << 16);
|
||||
}
|
||||
|
||||
/*
|
||||
* The offset from GPUVM aperture base address to ensure that address 0
|
||||
* (after base subtraction) won't be used
|
||||
*/
|
||||
#define GPUVM_APP_OFFSET 0x10000
|
||||
void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
static void* __fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
manageble_aperture_t *aperture, uint64_t offset, uint64_t *mmap_offset)
|
||||
{
|
||||
manageble_aperture_t *aperture;
|
||||
int32_t gpu_mem_id;
|
||||
void *mem = NULL;
|
||||
|
||||
/* Retrieve gpu_mem id according to gpu_id */
|
||||
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
if (gpu_mem_id < 0)
|
||||
return NULL;
|
||||
|
||||
aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture;
|
||||
|
||||
/* Check that aperture is properly initialized/supported */
|
||||
if (!aperture_is_valid(aperture->base, aperture->limit))
|
||||
return NULL;
|
||||
@@ -566,14 +540,15 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
/* Allocate address space */
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
mem = aperture_allocate_area(aperture,
|
||||
MemorySizeInBytes, GPUVM_APP_OFFSET);
|
||||
MemorySizeInBytes, offset);
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
/*
|
||||
* Now that we have the area reserved, allocate memory in the device
|
||||
* itself
|
||||
*/
|
||||
if (fmm_allocate_memory_in_device(gpu_id, mem, MemorySizeInBytes)) {
|
||||
if (fmm_allocate_memory_in_device(gpu_id, mem,
|
||||
MemorySizeInBytes, aperture, mmap_offset)) {
|
||||
/*
|
||||
* allocation of memory in device failed.
|
||||
* Release region in aperture
|
||||
@@ -589,6 +564,89 @@ void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
return mem;
|
||||
}
|
||||
|
||||
/*
|
||||
* The offset from GPUVM aperture base address to ensure that address 0
|
||||
* (after base subtraction) won't be used
|
||||
*/
|
||||
#define GPUVM_APP_OFFSET 0x10000
|
||||
void *fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes)
|
||||
{
|
||||
manageble_aperture_t *aperture;
|
||||
int32_t gpu_mem_id;
|
||||
|
||||
/* Retrieve gpu_mem id according to gpu_id */
|
||||
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
if (gpu_mem_id < 0)
|
||||
return NULL;
|
||||
|
||||
aperture = &gpu_mem[gpu_mem_id].gpuvm_aperture;
|
||||
|
||||
return __fmm_allocate_device(gpu_id, MemorySizeInBytes,
|
||||
aperture, GPUVM_APP_OFFSET, NULL);
|
||||
}
|
||||
|
||||
static void* fmm_allocate_host_cpu(uint32_t gpu_id,
|
||||
uint64_t MemorySizeInBytes, HsaMemFlags flags)
|
||||
{
|
||||
int err;
|
||||
HSAuint64 page_size;
|
||||
void *mem = NULL;
|
||||
|
||||
page_size = PageSizeFromFlags(flags.ui32.PageSize);
|
||||
err = posix_memalign(&mem, page_size, MemorySizeInBytes);
|
||||
if (err != 0)
|
||||
return NULL;
|
||||
|
||||
if (flags.ui32.ExecuteAccess) {
|
||||
err = mprotect(mem, MemorySizeInBytes,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC);
|
||||
|
||||
if (err != 0) {
|
||||
free(mem);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return mem;
|
||||
}
|
||||
|
||||
static void* fmm_allocate_host_gpu(uint32_t gpu_id,
|
||||
uint64_t MemorySizeInBytes, HsaMemFlags flags)
|
||||
{
|
||||
void *mem;
|
||||
manageble_aperture_t *aperture;
|
||||
int32_t gpu_mem_id;
|
||||
uint64_t mmap_offset;
|
||||
|
||||
/* Retrieve gpu_mem id according to gpu_id */
|
||||
gpu_mem_id = gpu_mem_find_by_gpu_id(gpu_id);
|
||||
if (gpu_mem_id < 0)
|
||||
return NULL;
|
||||
|
||||
aperture = &gpu_mem[gpu_mem_id].dgpu_aperture;
|
||||
|
||||
MemorySizeInBytes += 0x8000 - (MemorySizeInBytes % 0x8000);
|
||||
|
||||
mem = __fmm_allocate_device(gpu_id, MemorySizeInBytes,
|
||||
aperture, 0, &mmap_offset);
|
||||
|
||||
void *ret = mmap(mem, MemorySizeInBytes,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
MAP_SHARED | MAP_FIXED, kfd_fd , mmap_offset);
|
||||
if (ret == MAP_FAILED) {
|
||||
__fmm_release(gpu_id, mem, MemorySizeInBytes, aperture);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* fmm_allocate_host(uint32_t gpu_id, uint64_t MemorySizeInBytes, HsaMemFlags flags, uint16_t dev_id)
|
||||
{
|
||||
if (topology_is_dgpu(dev_id))
|
||||
return fmm_allocate_host_gpu(gpu_id, MemorySizeInBytes, flags);
|
||||
return fmm_allocate_host_cpu(gpu_id, MemorySizeInBytes, flags);
|
||||
}
|
||||
|
||||
void *fmm_open_graphic_handle(uint32_t gpu_id,
|
||||
int32_t graphic_device_handle,
|
||||
uint32_t graphic_handle,
|
||||
@@ -647,20 +705,14 @@ out:
|
||||
}
|
||||
|
||||
static void __fmm_release(uint32_t gpu_id, void *address,
|
||||
uint64_t MemorySizeInBytes)
|
||||
uint64_t MemorySizeInBytes, manageble_aperture_t *aperture)
|
||||
{
|
||||
struct kfd_ioctl_free_memory_of_gpu_args args;
|
||||
manageble_aperture_t *aperture;
|
||||
vm_object_t *object;
|
||||
|
||||
if (!address)
|
||||
return;
|
||||
|
||||
/* Retrieve gpuvm aperture according to gpu_id */
|
||||
aperture = find_valid_gpuvm_apperture_of_gpu(gpu_id);
|
||||
if (!aperture)
|
||||
return;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
/* Find the object to retrieve the handle */
|
||||
@@ -696,7 +748,16 @@ void fmm_release(void *address, uint64_t MemorySizeInBytes)
|
||||
if (address >= gpu_mem[i].gpuvm_aperture.base &&
|
||||
address <= gpu_mem[i].gpuvm_aperture.limit) {
|
||||
found = true;
|
||||
__fmm_release(gpu_mem[i].gpu_id, address, MemorySizeInBytes);
|
||||
__fmm_release(gpu_mem[i].gpu_id, address,
|
||||
MemorySizeInBytes, &gpu_mem[i].gpuvm_aperture);
|
||||
fmm_print(gpu_mem[i].gpu_id);
|
||||
}
|
||||
|
||||
if (address >= gpu_mem[i].dgpu_aperture.base &&
|
||||
address <= gpu_mem[i].dgpu_aperture.limit) {
|
||||
found = true;
|
||||
__fmm_release(gpu_mem[i].gpu_id, address,
|
||||
MemorySizeInBytes, &gpu_mem[i].dgpu_aperture);
|
||||
fmm_print(gpu_mem[i].gpu_id);
|
||||
}
|
||||
}
|
||||
@@ -713,6 +774,8 @@ HSAKMT_STATUS fmm_init_process_apertures(void)
|
||||
{
|
||||
struct kfd_ioctl_get_process_apertures_args args;
|
||||
uint8_t node_id;
|
||||
uint32_t gpu_id;
|
||||
HsaNodeProperties props;
|
||||
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, (void *) &args))
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
@@ -721,6 +784,17 @@ HSAKMT_STATUS fmm_init_process_apertures(void)
|
||||
gpu_mem[node_id].gpu_id =
|
||||
args.process_apertures[node_id].gpu_id;
|
||||
|
||||
|
||||
if (topology_sysfs_get_node_props(node_id, &props, &gpu_id) ==
|
||||
HSAKMT_STATUS_SUCCESS) {
|
||||
if (topology_is_dgpu(props.DeviceId)) {
|
||||
dgpu_mem_init(node_id, &gpu_mem[node_id].dgpu_aperture.base,
|
||||
&gpu_mem[node_id].dgpu_aperture.limit);
|
||||
set_dgpu_aperture(node_id, (uint64_t)gpu_mem[node_id].dgpu_aperture.base,
|
||||
(uint64_t)gpu_mem[node_id].dgpu_aperture.limit);
|
||||
}
|
||||
}
|
||||
|
||||
gpu_mem[node_id].lds_aperture.base =
|
||||
PORT_UINT64_TO_VPTR(args.process_apertures[node_id].lds_base);
|
||||
|
||||
@@ -804,6 +878,34 @@ HSAuint64 fmm_get_aperture_base(aperture_type_e aperture_type, HSAuint32 gpu_id)
|
||||
}
|
||||
}
|
||||
|
||||
static int _fmm_map_to_gpu_gtt(uint32_t gpu_id, manageble_aperture_t *aperture,
|
||||
void *address, uint64_t size)
|
||||
{
|
||||
struct kfd_ioctl_map_memory_to_gpu_args args;
|
||||
vm_object_t *object;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
|
||||
/* Find the object to retrieve the handle */
|
||||
object = vm_find_object_by_address(aperture, address, 0);
|
||||
if (!object) {
|
||||
goto err_object_not_found;
|
||||
}
|
||||
|
||||
args.handle = object->handle;
|
||||
if (kmtIoctl(kfd_fd, AMDKFD_IOC_MAP_MEMORY_TO_GPU, &args))
|
||||
goto err_map_ioctl_failed;
|
||||
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
return 0;
|
||||
|
||||
err_map_ioctl_failed:
|
||||
err_object_not_found:
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int _fmm_map_to_gpu(uint32_t gpu_id, manageble_aperture_t *aperture,
|
||||
void *address, uint64_t size,
|
||||
uint64_t *gpuvm_address)
|
||||
@@ -855,6 +957,12 @@ int fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address)
|
||||
return _fmm_map_to_gpu(gpu_mem[i].gpu_id,
|
||||
&gpu_mem[i].gpuvm_aperture,
|
||||
address, size, gpuvm_address);
|
||||
if ((address >= gpu_mem[i].dgpu_aperture.base) &&
|
||||
(address <= gpu_mem[i].dgpu_aperture.limit))
|
||||
/* map it */
|
||||
return _fmm_map_to_gpu_gtt(gpu_mem[i].gpu_id,
|
||||
&gpu_mem[i].dgpu_aperture,
|
||||
address, size);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -904,7 +1012,144 @@ int fmm_unmap_from_gpu(void *address)
|
||||
/* unmap it */
|
||||
return _fmm_unmap_from_gpu(&gpu_mem[i].gpuvm_aperture,
|
||||
address);
|
||||
else if ((address >= gpu_mem[i].dgpu_aperture.base) &&
|
||||
(address <= gpu_mem[i].dgpu_aperture.limit))
|
||||
/* unmap it */
|
||||
return _fmm_unmap_from_gpu(&gpu_mem[i].dgpu_aperture,
|
||||
address);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Tonga dGPU specific functions */
|
||||
static bool is_dgpu_mem_init = false;
|
||||
static void *dgpu_shared_aperture_base = NULL;
|
||||
static void *dgpu_shared_aperture_limit = NULL;
|
||||
|
||||
static int set_dgpu_aperture(uint32_t node_id, uint64_t base, uint64_t limit)
|
||||
{
|
||||
struct kfd_ioctl_set_process_dgpu_aperture_args args;
|
||||
|
||||
args.node_id = node_id;
|
||||
args.dgpu_base = base;
|
||||
args.dgpu_limit = limit;
|
||||
|
||||
return kmtIoctl(kfd_fd, AMDKFD_IOC_SET_PROCESS_DGPU_APERTURE, &args);
|
||||
}
|
||||
|
||||
static void *reserve_address(void *addr, long long unsigned int len)
|
||||
{
|
||||
void *ret_addr;
|
||||
|
||||
if (len <= 0)
|
||||
return NULL;
|
||||
|
||||
ret_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
|
||||
MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
|
||||
if (addr == MAP_FAILED)
|
||||
return NULL;
|
||||
|
||||
return ret_addr;
|
||||
}
|
||||
|
||||
#define ADDRESS_RANGE_LIMIT_MASK 0xFFFFFFFFFF
|
||||
|
||||
static HSAKMT_STATUS dgpu_mem_init(uint8_t node_id, void **base, void **limit)
|
||||
{
|
||||
bool found;
|
||||
HSAKMT_STATUS ret;
|
||||
void *addr, *ret_addr;
|
||||
uint32_t max_len;
|
||||
long long unsigned int temp;
|
||||
uint32_t gpu_id;
|
||||
HsaNodeProperties props;
|
||||
|
||||
if (is_dgpu_mem_init) {
|
||||
if (base)
|
||||
base = dgpu_shared_aperture_base;
|
||||
if (limit)
|
||||
limit = dgpu_shared_aperture_limit;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ret = topology_sysfs_get_node_props(node_id, &props, &gpu_id);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS)
|
||||
return ret;
|
||||
|
||||
max_len = (uint32_t)props.LocalMemSize;
|
||||
found = false;
|
||||
|
||||
for (addr = (void *)PAGE_SIZE, ret_addr = NULL;
|
||||
ret_addr != addr;
|
||||
addr = (void *)((unsigned long)addr + 0x8000))
|
||||
{
|
||||
ret_addr = reserve_address(addr, max_len);
|
||||
if (!ret_addr)
|
||||
continue;
|
||||
temp = (long long unsigned int)ret_addr + max_len;
|
||||
if (temp < ADDRESS_RANGE_LIMIT_MASK) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
munmap(ret_addr, max_len);
|
||||
}
|
||||
|
||||
if (found) {
|
||||
if (base)
|
||||
*base = ret_addr;
|
||||
dgpu_shared_aperture_base = ret_addr;
|
||||
if (limit)
|
||||
*limit = (void *)((long long unsigned int)ret_addr + max_len);
|
||||
dgpu_shared_aperture_limit = (void *)((long long unsigned int)ret_addr + max_len);
|
||||
is_dgpu_mem_init = true;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
bool fmm_get_handle(void *address, uint64_t *handle)
|
||||
{
|
||||
int32_t i;
|
||||
manageble_aperture_t *aperture;
|
||||
vm_object_t *object;
|
||||
bool found;
|
||||
|
||||
found = false;
|
||||
aperture = NULL;
|
||||
|
||||
/* Find the aperture the requested address belongs to */
|
||||
for (i = 0; i < NUM_OF_SUPPORTED_GPUS; i++) {
|
||||
if (gpu_mem[i].gpu_id == NON_VALID_GPU_ID)
|
||||
continue;
|
||||
|
||||
if ((address >= gpu_mem[i].gpuvm_aperture.base) &&
|
||||
(address <= gpu_mem[i].gpuvm_aperture.limit)) {
|
||||
aperture = &gpu_mem[i].gpuvm_aperture;
|
||||
break;
|
||||
}
|
||||
|
||||
else if ((address >= gpu_mem[i].dgpu_aperture.base) &&
|
||||
(address <= gpu_mem[i].dgpu_aperture.limit)) {
|
||||
aperture = &gpu_mem[i].dgpu_aperture;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!aperture)
|
||||
return false;
|
||||
|
||||
pthread_mutex_lock(&aperture->fmm_mutex);
|
||||
/* Find the object to retrieve the handle */
|
||||
object = vm_find_object_by_address(aperture, address, 0);
|
||||
if (object && handle) {
|
||||
*handle = object->handle;
|
||||
found = true;
|
||||
}
|
||||
pthread_mutex_unlock(&aperture->fmm_mutex);
|
||||
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
@@ -49,6 +49,8 @@ HSAKMT_STATUS fmm_init_process_apertures(void);
|
||||
*/
|
||||
void* fmm_allocate_scratch(uint32_t gpu_id, uint64_t MemorySizeInBytes);
|
||||
void* fmm_allocate_device(uint32_t gpu_id, uint64_t MemorySizeInBytes);
|
||||
void* fmm_allocate_host(uint32_t gpu_id, uint64_t MemorySizeInBytes,
|
||||
HsaMemFlags flags, uint16_t dev_id);
|
||||
void* fmm_open_graphic_handle(uint32_t gpu_id,
|
||||
int32_t graphic_device_handle,
|
||||
uint32_t graphic_handle,
|
||||
@@ -58,6 +60,7 @@ bool fmm_is_inside_some_aperture(void* address);
|
||||
void fmm_release(void* address, HSAuint64 MemorySizeInBytes);
|
||||
int fmm_map_to_gpu(void *address, uint64_t size, uint64_t *gpuvm_address);
|
||||
int fmm_unmap_from_gpu(void *address);
|
||||
bool fmm_get_handle(void *address, uint64_t *handle);
|
||||
|
||||
/* Topology interface*/
|
||||
HSAKMT_STATUS fmm_node_added(HSAuint32 gpu_id);
|
||||
|
||||
@@ -31,3 +31,4 @@ int kfd_fd;
|
||||
unsigned long kfd_open_count;
|
||||
unsigned long system_properties_count;
|
||||
pthread_mutex_t hsakmt_mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
bool is_dgpu = false;
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
extern int kfd_fd;
|
||||
extern unsigned long kfd_open_count;
|
||||
extern pthread_mutex_t hsakmt_mutex;
|
||||
extern bool is_dgpu;
|
||||
|
||||
#undef HSAKMTAPI
|
||||
#define HSAKMTAPI __attribute__((visibility ("default")))
|
||||
@@ -65,6 +66,15 @@ HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id);
|
||||
HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t* node_id);
|
||||
uint16_t get_device_id_by_node(HSAuint32 node_id);
|
||||
|
||||
HSAKMT_STATUS topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id);
|
||||
HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32_t *gpu_id);
|
||||
bool topology_is_dgpu(uint16_t gpu_id);
|
||||
|
||||
HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags);
|
||||
|
||||
void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align);
|
||||
void free_exec_aligned_memory_gpu(void *addr, uint32_t size);
|
||||
|
||||
extern int kmtIoctl(int fd, unsigned long request, void *arg);
|
||||
|
||||
/* Void pointer arithmetic (or remove -Wpointer-arith to allow void pointers arithmetic) */
|
||||
|
||||
@@ -86,7 +86,7 @@ hsaKmtSetMemoryPolicy(
|
||||
return (err == -1) ? HSAKMT_STATUS_ERROR : HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags)
|
||||
HSAuint32 PageSizeFromFlags(unsigned int pageSizeFlags)
|
||||
{
|
||||
switch (pageSizeFlags) {
|
||||
case HSA_PAGE_SIZE_4KB: return 4*1024;
|
||||
@@ -109,9 +109,8 @@ hsaKmtAllocMemory(
|
||||
)
|
||||
{
|
||||
HSAKMT_STATUS result;
|
||||
HSAuint64 page_size;
|
||||
uint32_t gpu_id;
|
||||
int err;
|
||||
HSAuint64 page_size;
|
||||
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
@@ -119,26 +118,18 @@ hsaKmtAllocMemory(
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
/* The required size should be page aligned (GDS?) */
|
||||
page_size = PageSizeFromFlags(MemFlags.ui32.PageSize);
|
||||
|
||||
if ((!MemoryAddress) || (!SizeInBytes) ||
|
||||
(SizeInBytes & (page_size-1)))
|
||||
(SizeInBytes & (page_size-1))) {
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
if (MemFlags.ui32.HostAccess && !MemFlags.ui32.NonPaged && !MemFlags.ui32.Scratch) {
|
||||
err = posix_memalign(MemoryAddress, page_size, SizeInBytes);
|
||||
if (err != 0)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
if (MemFlags.ui32.ExecuteAccess) {
|
||||
err = mprotect(*MemoryAddress, SizeInBytes,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC);
|
||||
|
||||
if (err != 0) {
|
||||
free(*MemoryAddress);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
*MemoryAddress = fmm_allocate_host(gpu_id, SizeInBytes, MemFlags,
|
||||
get_device_id_by_node(PreferredNode));
|
||||
if (*MemoryAddress == NULL)
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -224,6 +215,7 @@ hsaKmtUnmapMemoryToGPU(
|
||||
)
|
||||
{
|
||||
CHECK_KFD_OPEN();
|
||||
|
||||
if (!fmm_unmap_from_gpu(MemoryAddress))
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
else
|
||||
|
||||
@@ -34,25 +34,42 @@
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define TONGA_PAGE_SIZE 0x9000
|
||||
|
||||
/* 1024 doorbells, 4 bytes each doorbell */
|
||||
#define DOORBELLS_PAGE_SIZE 1024 * 4
|
||||
|
||||
enum asic_family_type {
|
||||
CHIP_KAVERI = 0,
|
||||
CHIP_CARRIZO,
|
||||
CHIP_TONGA
|
||||
};
|
||||
|
||||
struct device_info
|
||||
{
|
||||
enum asic_family_type asic_family;
|
||||
uint32_t ctx_save_restore_size;
|
||||
uint32_t eop_buffer_size;
|
||||
};
|
||||
|
||||
struct device_info kaveri_device_info = {
|
||||
.asic_family = CHIP_KAVERI,
|
||||
.ctx_save_restore_size = 0,
|
||||
.eop_buffer_size = 0,
|
||||
};
|
||||
|
||||
struct device_info carrizo_device_info = {
|
||||
.asic_family = CHIP_CARRIZO,
|
||||
.ctx_save_restore_size = 2756608,
|
||||
.eop_buffer_size = 4096,
|
||||
};
|
||||
|
||||
struct device_info tonga_device_info = {
|
||||
.asic_family = CHIP_TONGA,
|
||||
.ctx_save_restore_size = TONGA_PAGE_SIZE,
|
||||
.eop_buffer_size = TONGA_PAGE_SIZE,
|
||||
};
|
||||
|
||||
struct device_id
|
||||
{
|
||||
uint16_t dev_id;
|
||||
@@ -87,6 +104,8 @@ struct device_id supported_devices[] = {
|
||||
{ 0x9875, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9876, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x9877, &carrizo_device_info }, /* Carrizo */
|
||||
{ 0x6939, &tonga_device_info },
|
||||
{ 0x692b, &tonga_device_info },
|
||||
{ 0, NULL }
|
||||
};
|
||||
|
||||
@@ -97,6 +116,7 @@ struct queue
|
||||
uint32_t rptr;
|
||||
void *eop_buffer;
|
||||
void *ctx_save_restore;
|
||||
enum asic_family_type type;
|
||||
};
|
||||
|
||||
struct process_doorbells
|
||||
@@ -121,7 +141,7 @@ static struct device_info *get_device_info_by_dev_id(uint16_t dev_id)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void free_queue(struct queue *q)
|
||||
static void free_queue_cpu(struct queue *q)
|
||||
{
|
||||
if (q->eop_buffer)
|
||||
free(q->eop_buffer);
|
||||
@@ -130,7 +150,7 @@ static void free_queue(struct queue *q)
|
||||
free(q);
|
||||
}
|
||||
|
||||
static void* allocate_exec_aligned_memory(uint32_t size, uint32_t align)
|
||||
static void* allocate_exec_aligned_memory_cpu(uint32_t size, uint32_t align)
|
||||
{
|
||||
void *ptr;
|
||||
int retval;
|
||||
@@ -149,13 +169,89 @@ static void* allocate_exec_aligned_memory(uint32_t size, uint32_t align)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void* allocate_exec_aligned_memory_gpu(uint32_t size, uint32_t align)
|
||||
{
|
||||
void *mem;
|
||||
HSAuint64 gpu_va;
|
||||
HsaMemFlags flags;
|
||||
HSAKMT_STATUS ret;
|
||||
|
||||
flags.Value = 0;
|
||||
flags.ui32.HostAccess = 1;
|
||||
flags.ui32.ExecuteAccess = 1;
|
||||
flags.ui32.PageSize = HSA_PAGE_SIZE_4KB;
|
||||
|
||||
size += align - (size % align);
|
||||
|
||||
ret = hsaKmtAllocMemory(0, size, flags, &mem);
|
||||
if (ret != HSAKMT_STATUS_SUCCESS) {
|
||||
return NULL;
|
||||
}
|
||||
if (hsaKmtMapMemoryToGPU(mem, size, &gpu_va) != HSAKMT_STATUS_SUCCESS) {
|
||||
hsaKmtFreeMemory(mem, size);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
void free_exec_aligned_memory_gpu(void *addr, uint32_t size)
|
||||
{
|
||||
size += TONGA_PAGE_SIZE - (size % TONGA_PAGE_SIZE);
|
||||
|
||||
if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS) {
|
||||
hsaKmtFreeMemory(addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void* allocate_exec_aligned_memory(uint32_t size, uint32_t align, enum asic_family_type type)
|
||||
{
|
||||
if (type == CHIP_TONGA)
|
||||
return allocate_exec_aligned_memory_gpu(size, TONGA_PAGE_SIZE);
|
||||
return allocate_exec_aligned_memory_cpu(size, align);
|
||||
}
|
||||
|
||||
static void release_exec_aligned_memory_gpu(void *addr, uint32_t size)
|
||||
{
|
||||
if (hsaKmtUnmapMemoryToGPU(addr) == HSAKMT_STATUS_SUCCESS)
|
||||
hsaKmtFreeMemory(addr, (HSAuint64)size);
|
||||
}
|
||||
|
||||
static void release_exec_aligned_memory(void *addr, uint32_t size, enum asic_family_type type)
|
||||
{
|
||||
if (type == CHIP_TONGA)
|
||||
release_exec_aligned_memory_gpu(addr, TONGA_PAGE_SIZE);
|
||||
else
|
||||
free(addr);
|
||||
}
|
||||
|
||||
static void free_queue_gpu(struct queue *q)
|
||||
{
|
||||
if (q->eop_buffer) {
|
||||
hsaKmtUnmapMemoryToGPU(q->eop_buffer);
|
||||
hsaKmtFreeMemory(q->eop_buffer, TONGA_PAGE_SIZE);
|
||||
}
|
||||
if (q->ctx_save_restore) {
|
||||
hsaKmtUnmapMemoryToGPU(q->ctx_save_restore);
|
||||
hsaKmtFreeMemory(q->ctx_save_restore, TONGA_PAGE_SIZE);
|
||||
}
|
||||
release_exec_aligned_memory((void *)q, sizeof(*q), q->type);
|
||||
}
|
||||
|
||||
static void free_queue(struct queue *q, enum asic_family_type type)
|
||||
{
|
||||
if (type == CHIP_TONGA)
|
||||
return free_queue_gpu(q);
|
||||
return free_queue_cpu(q);
|
||||
}
|
||||
|
||||
static int handle_concrete_asic(struct device_info *dev_info, struct queue *q,
|
||||
struct kfd_ioctl_create_queue_args *args)
|
||||
{
|
||||
if (dev_info) {
|
||||
if (dev_info->eop_buffer_size > 0) {
|
||||
q->eop_buffer =
|
||||
allocate_exec_aligned_memory(dev_info->eop_buffer_size, PAGE_SIZE);
|
||||
allocate_exec_aligned_memory(dev_info->eop_buffer_size, PAGE_SIZE, dev_info->asic_family);
|
||||
if (q->eop_buffer == NULL) {
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
}
|
||||
@@ -165,7 +261,7 @@ static int handle_concrete_asic(struct device_info *dev_info, struct queue *q,
|
||||
if (dev_info->ctx_save_restore_size > 0) {
|
||||
args->ctx_save_restore_size = dev_info->ctx_save_restore_size;
|
||||
q->ctx_save_restore =
|
||||
allocate_exec_aligned_memory(dev_info->ctx_save_restore_size, PAGE_SIZE);
|
||||
allocate_exec_aligned_memory(dev_info->ctx_save_restore_size, PAGE_SIZE, dev_info->asic_family);
|
||||
if (q->ctx_save_restore == NULL) {;
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
}
|
||||
@@ -201,30 +297,35 @@ hsaKmtCreateQueue(
|
||||
if (result != HSAKMT_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct queue *q = malloc(sizeof(*q));
|
||||
dev_id = get_device_id_by_node(NodeId);
|
||||
dev_info = get_device_info_by_dev_id(dev_id);
|
||||
|
||||
struct queue *q = allocate_exec_aligned_memory(sizeof (*q),
|
||||
PAGE_SIZE, dev_info->asic_family);
|
||||
if (q == NULL)
|
||||
return HSAKMT_STATUS_NO_MEMORY;
|
||||
|
||||
memset(q, 0, sizeof(*q));
|
||||
|
||||
struct kfd_ioctl_create_queue_args args;
|
||||
memset(&args, 0, sizeof(args));
|
||||
|
||||
dev_id = get_device_id_by_node(NodeId);
|
||||
dev_info = get_device_info_by_dev_id(dev_id);
|
||||
args.gpu_id = gpu_id;
|
||||
|
||||
q->type = dev_info->asic_family;
|
||||
|
||||
err = handle_concrete_asic(dev_info, q, &args);
|
||||
if (err != HSAKMT_STATUS_SUCCESS) {
|
||||
free_queue(q);
|
||||
free_queue(q, dev_info->asic_family);
|
||||
return err;
|
||||
}
|
||||
|
||||
switch (Type)
|
||||
{
|
||||
case HSA_QUEUE_COMPUTE: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE; break;
|
||||
case HSA_QUEUE_SDMA: free_queue(q); return HSAKMT_STATUS_UNAVAILABLE;
|
||||
case HSA_QUEUE_SDMA: free_queue(q, dev_info->asic_family); return HSAKMT_STATUS_UNAVAILABLE;
|
||||
case HSA_QUEUE_COMPUTE_AQL: args.queue_type = KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; break;
|
||||
default: free_queue(q); return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
default: free_queue(q, dev_info->asic_family); return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
}
|
||||
|
||||
if (Type != HSA_QUEUE_COMPUTE_AQL)
|
||||
@@ -244,7 +345,7 @@ hsaKmtCreateQueue(
|
||||
|
||||
if (err == -1)
|
||||
{
|
||||
free_queue(q);
|
||||
free_queue(q, dev_info->asic_family);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -259,7 +360,7 @@ hsaKmtCreateQueue(
|
||||
if (ptr == MAP_FAILED) {
|
||||
pthread_mutex_unlock(&doorbells[NodeId].doorbells_mutex);
|
||||
hsaKmtDestroyQueue(q->queue_id);
|
||||
free_queue(q);
|
||||
free_queue(q, dev_info->asic_family);
|
||||
return HSAKMT_STATUS_ERROR;
|
||||
}
|
||||
|
||||
@@ -321,7 +422,7 @@ hsaKmtDestroyQueue(
|
||||
struct kfd_ioctl_destroy_queue_args args;
|
||||
|
||||
if (q == NULL)
|
||||
return (HSAKMT_STATUS_INVALID_PARAMETER);
|
||||
return (HSAKMT_STATUS_INVALID_PARAMETER);
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
|
||||
@@ -335,7 +436,7 @@ hsaKmtDestroyQueue(
|
||||
}
|
||||
else
|
||||
{
|
||||
free_queue(q);
|
||||
free_queue(q, q->type);
|
||||
return HSAKMT_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,36 +59,40 @@ static struct hsa_gfxip_table {
|
||||
unsigned char major; // GFXIP Major engine version
|
||||
unsigned char minor; // GFXIP Minor engine version
|
||||
unsigned char stepping; // GFXIP Stepping info
|
||||
unsigned char is_dgpu; // Predicat for dGPU devices
|
||||
} gfxip_lookup_table[] = {
|
||||
/* Kaveri Family */
|
||||
{ 0x1304, 7, 0, 0 },
|
||||
{ 0x1305, 7, 0, 0 },
|
||||
{ 0x1306, 7, 0, 0 },
|
||||
{ 0x1307, 7, 0, 0 },
|
||||
{ 0x1309, 7, 0, 0 },
|
||||
{ 0x130A, 7, 0, 0 },
|
||||
{ 0x130B, 7, 0, 0 },
|
||||
{ 0x130C, 7, 0, 0 },
|
||||
{ 0x130D, 7, 0, 0 },
|
||||
{ 0x130E, 7, 0, 0 },
|
||||
{ 0x130F, 7, 0, 0 },
|
||||
{ 0x1310, 7, 0, 0 },
|
||||
{ 0x1311, 7, 0, 0 },
|
||||
{ 0x1312, 7, 0, 0 },
|
||||
{ 0x1313, 7, 0, 0 },
|
||||
{ 0x1315, 7, 0, 0 },
|
||||
{ 0x1316, 7, 0, 0 },
|
||||
{ 0x1317, 7, 0, 0 },
|
||||
{ 0x1318, 7, 0, 0 },
|
||||
{ 0x131B, 7, 0, 0 },
|
||||
{ 0x131C, 7, 0, 0 },
|
||||
{ 0x131D, 7, 0, 0 },
|
||||
{ 0x1304, 7, 0, 0, 0 },
|
||||
{ 0x1305, 7, 0, 0, 0 },
|
||||
{ 0x1306, 7, 0, 0, 0 },
|
||||
{ 0x1307, 7, 0, 0, 0 },
|
||||
{ 0x1309, 7, 0, 0, 0 },
|
||||
{ 0x130A, 7, 0, 0, 0 },
|
||||
{ 0x130B, 7, 0, 0, 0 },
|
||||
{ 0x130C, 7, 0, 0, 0 },
|
||||
{ 0x130D, 7, 0, 0, 0 },
|
||||
{ 0x130E, 7, 0, 0, 0 },
|
||||
{ 0x130F, 7, 0, 0, 0 },
|
||||
{ 0x1310, 7, 0, 0, 0 },
|
||||
{ 0x1311, 7, 0, 0, 0 },
|
||||
{ 0x1312, 7, 0, 0, 0 },
|
||||
{ 0x1313, 7, 0, 0, 0 },
|
||||
{ 0x1315, 7, 0, 0, 0 },
|
||||
{ 0x1316, 7, 0, 0, 0 },
|
||||
{ 0x1317, 7, 0, 0, 0 },
|
||||
{ 0x1318, 7, 0, 0, 0 },
|
||||
{ 0x131B, 7, 0, 0, 0 },
|
||||
{ 0x131C, 7, 0, 0, 0 },
|
||||
{ 0x131D, 7, 0, 0, 0 },
|
||||
/* Carrizo Family */
|
||||
{ 0x9870, 8, 0, 1 },
|
||||
{ 0x9874, 8, 0, 1 },
|
||||
{ 0x9875, 8, 0, 1 },
|
||||
{ 0x9876, 8, 0, 1 },
|
||||
{ 0x9877, 8, 0, 1 }
|
||||
{ 0x9870, 8, 0, 1, 0 },
|
||||
{ 0x9874, 8, 0, 1, 0 },
|
||||
{ 0x9875, 8, 0, 1, 0 },
|
||||
{ 0x9876, 8, 0, 1, 0 },
|
||||
{ 0x9877, 8, 0, 1, 0 },
|
||||
/* Tonga Family */
|
||||
{ 0x6939, 8, 0, 0, 1 },
|
||||
{ 0x692b, 8, 0, 0, 1 }
|
||||
};
|
||||
|
||||
static void
|
||||
@@ -203,7 +207,7 @@ err1:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS
|
||||
HSAKMT_STATUS
|
||||
topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) {
|
||||
FILE *fd;
|
||||
char path[256];
|
||||
@@ -222,7 +226,25 @@ topology_sysfs_get_gpu_id(uint32_t node_id, uint32_t *gpu_id) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
static HSAKMT_STATUS
|
||||
bool topology_is_dgpu(uint16_t gpu_id)
|
||||
{
|
||||
uint32_t i, table_size;
|
||||
|
||||
if (is_dgpu)
|
||||
return is_dgpu;
|
||||
|
||||
table_size = sizeof(gfxip_lookup_table)/sizeof(struct hsa_gfxip_table);
|
||||
for (i=0; i<table_size; i++) {
|
||||
if(gfxip_lookup_table[i].device_id == gpu_id && gfxip_lookup_table[i].is_dgpu == 1) {
|
||||
is_dgpu = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
HSAKMT_STATUS
|
||||
topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props, uint32_t *gpu_id) {
|
||||
FILE *fd;
|
||||
char *read_buf, *p;
|
||||
|
||||
Ссылка в новой задаче
Block a user