Use new ioctl for getting process apertures

Change-Id: I73678744ad73942edec442ad9c6d38637f7e1235


[ROCm/ROCR-Runtime commit: e7e1361c3d]
このコミットが含まれているのは:
Harish Kasiviswanathan
2015-12-21 15:42:52 -05:00
コミット add443f1ef
2個のファイルの変更48行の追加17行の削除
+12 -2
ファイルの表示
@@ -321,6 +321,14 @@ struct kfd_ioctl_alloc_memory_of_gpu_new_args {
uint32_t flags;
};
struct kfd_ioctl_get_process_apertures_new_args {
uint64_t kfd_process_device_apertures_ptr; /* pointer to struct
struct kfd_process_device_apertures. User allocated */
uint32_t num_of_nodes; /* Number of entries in
kfd_process_device_apertures_ptr */
uint32_t pad;
};
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -402,7 +410,6 @@ struct kfd_ioctl_alloc_memory_of_gpu_new_args {
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU_NEW \
AMDKFD_IOWR(0x19, struct kfd_ioctl_alloc_memory_of_gpu_new_args)
#define AMDKFD_IOC_SET_TRAP_HANDLER \
AMDKFD_IOW(0x1a, struct kfd_ioctl_set_trap_handler_args)
@@ -411,7 +418,10 @@ struct kfd_ioctl_alloc_memory_of_gpu_new_args {
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU_NEW \
AMDKFD_IOWR(0x1c, struct kfd_ioctl_unmap_memory_from_gpu_new_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
AMDKFD_IOWR(0x1d, struct kfd_ioctl_get_process_apertures_new_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x1d
#define AMDKFD_COMMAND_END 0x1e
#endif
+36 -15
ファイルの表示
@@ -980,12 +980,13 @@ static int fmm_set_memory_policy(uint32_t gpu_id, int default_policy, int alt_po
HSAKMT_STATUS fmm_init_process_apertures(void)
{
struct kfd_ioctl_get_process_apertures_args args;
struct kfd_ioctl_get_process_apertures_new_args args;
uint32_t i = 0;
int32_t gpu_mem_id =0;
uint32_t gpu_id;
HsaSystemProperties sys_props;
HsaNodeProperties props;
struct kfd_process_device_apertures * process_apertures;
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
ret = topology_sysfs_get_system_props(&sys_props);
@@ -1010,43 +1011,60 @@ HSAKMT_STATUS fmm_init_process_apertures(void)
i++;
}
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES, (void *) &args))
return HSAKMT_STATUS_ERROR;
/* The ioctl will also return Number of Nodes if args.kfd_process_device_apertures_ptr
* is set to NULL. This is not required since Number of nodes is already known. Kernel
* will fill in the apertures in kfd_process_device_apertures_ptr */
process_apertures = malloc(gpu_mem_id * sizeof(struct kfd_process_device_apertures));
if (process_apertures == NULL)
return HSAKMT_STATUS_NO_MEMORY;
args.kfd_process_device_apertures_ptr = (uintptr_t)process_apertures;
args.num_of_nodes = gpu_mem_id;
if (kmtIoctl(kfd_fd, AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, (void *)&args)) {
ret = HSAKMT_STATUS_ERROR;
goto get_aperture_ioctl_failed;
}
all_gpu_id_array_size = 0;
all_gpu_id_array = NULL;
if (args.num_of_nodes > 0) {
all_gpu_id_array = malloc(sizeof(uint32_t) * args.num_of_nodes);
if (all_gpu_id_array == NULL)
return HSAKMT_STATUS_NO_MEMORY;
if (all_gpu_id_array == NULL) {
ret = HSAKMT_STATUS_NO_MEMORY;
goto get_aperture_ioctl_failed;
}
}
for (i = 0 ; i < args.num_of_nodes ; i++) {
/* Map Kernel process device data node i <--> gpu_mem_id which indexes into gpu_mem[]
* based on gpu_id */
gpu_mem_id = gpu_mem_find_by_gpu_id(args.process_apertures[i].gpu_id);
if (gpu_mem_id < 0)
return HSAKMT_STATUS_ERROR;
gpu_mem_id = gpu_mem_find_by_gpu_id(process_apertures[i].gpu_id);
if (gpu_mem_id < 0) {
ret = HSAKMT_STATUS_ERROR;
goto invalid_gpu_id;
}
all_gpu_id_array[i] = args.process_apertures[i].gpu_id;
all_gpu_id_array[i] = process_apertures[i].gpu_id;
all_gpu_id_array_size += sizeof(uint32_t);
gpu_mem[gpu_mem_id].lds_aperture.base =
PORT_UINT64_TO_VPTR(args.process_apertures[i].lds_base);
PORT_UINT64_TO_VPTR(process_apertures[i].lds_base);
gpu_mem[gpu_mem_id].lds_aperture.limit =
PORT_UINT64_TO_VPTR(args.process_apertures[i].lds_limit);
PORT_UINT64_TO_VPTR(process_apertures[i].lds_limit);
gpu_mem[gpu_mem_id].gpuvm_aperture.base =
PORT_UINT64_TO_VPTR(args.process_apertures[i].gpuvm_base);
PORT_UINT64_TO_VPTR(process_apertures[i].gpuvm_base);
gpu_mem[gpu_mem_id].gpuvm_aperture.limit =
PORT_UINT64_TO_VPTR(args.process_apertures[i].gpuvm_limit);
PORT_UINT64_TO_VPTR(process_apertures[i].gpuvm_limit);
gpu_mem[gpu_mem_id].scratch_aperture.base =
PORT_UINT64_TO_VPTR(args.process_apertures[i].scratch_base);
PORT_UINT64_TO_VPTR(process_apertures[i].scratch_base);
gpu_mem[gpu_mem_id].scratch_aperture.limit =
PORT_UINT64_TO_VPTR(args.process_apertures[i].scratch_limit);
PORT_UINT64_TO_VPTR(process_apertures[i].scratch_limit);
if (topology_is_dgpu(gpu_mem[gpu_mem_id].device_id)) {
uintptr_t alt_base;
@@ -1096,6 +1114,9 @@ HSAKMT_STATUS fmm_init_process_apertures(void)
}
}
get_aperture_ioctl_failed:
invalid_gpu_id :
free(process_apertures);
return ret;
}