From dcacb71f688a08dcb806db2c5f0d3f6025b7903b Mon Sep 17 00:00:00 2001 From: Chris Freehill Date: Thu, 7 May 2020 17:06:49 -0500 Subject: [PATCH] Use user-mode version of kfd_ioctl.h file Previously using kernel mode version. Change-Id: I82bfff9c019a9059b4d0d198c6cf06dc515cc528 [ROCm/amdsmi commit: e1f0d7e85ad2b1fea1047d9b08edc1fcb2473b00] --- projects/amdsmi/include/rocm_smi/kfd_ioctl.h | 756 +++++++++++-------- projects/amdsmi/include/rocm_smi/rocm_smi.h | 4 - 2 files changed, 459 insertions(+), 301 deletions(-) diff --git a/projects/amdsmi/include/rocm_smi/kfd_ioctl.h b/projects/amdsmi/include/rocm_smi/kfd_ioctl.h index 8bf56c926f..7eb582694c 100755 --- a/projects/amdsmi/include/rocm_smi/kfd_ioctl.h +++ b/projects/amdsmi/include/rocm_smi/kfd_ioctl.h @@ -23,76 +23,87 @@ #ifndef INCLUDE_ROCM_SMI_KFD_IOCTL_H_ #define INCLUDE_ROCM_SMI_KFD_IOCTL_H_ -#include +#include #include -/* - * - 1.1 - initial version - * - 1.3 - Add SMI events support - */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 3 +#define KFD_IOCTL_MINOR_VERSION 2 +#define KFD_IOCTL_DBG_MAJOR_VERSION 1 +#define KFD_IOCTL_DBG_MINOR_VERSION 0 struct kfd_ioctl_get_version_args { - __u32 major_version; /* from KFD */ - __u32 minor_version; /* from KFD */ + __u32 major_version; /* from KFD */ + __u32 minor_version; /* from KFD */ }; /* For kfd_ioctl_create_queue_args.queue_type. */ -#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 -#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 -#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 -#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 +#define KFD_IOC_QUEUE_TYPE_SDMA 0x1 +#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 +#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 -#define KFD_MAX_QUEUE_PERCENTAGE 100 -#define KFD_MAX_QUEUE_PRIORITY 15 +#define KFD_MAX_QUEUE_PERCENTAGE 100 +#define KFD_MAX_QUEUE_PRIORITY 15 struct kfd_ioctl_create_queue_args { - __u64 ring_base_address; /* to KFD */ - __u64 write_pointer_address; /* from KFD */ - __u64 read_pointer_address; /* from KFD */ - __u64 doorbell_offset; /* from KFD */ + __u64 ring_base_address; /* to KFD */ + __u64 write_pointer_address; /* from KFD */ + __u64 read_pointer_address; /* from KFD */ + __u64 doorbell_offset; /* from KFD */ - __u32 ring_size; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 queue_type; /* to KFD */ - __u32 queue_percentage; /* to KFD */ - __u32 queue_priority; /* to KFD */ - __u32 queue_id; /* from KFD */ + __u32 ring_size; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 queue_type; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* from KFD */ - __u64 eop_buffer_address; /* to KFD */ - __u64 eop_buffer_size; /* to KFD */ - __u64 ctx_save_restore_address; /* to KFD */ - __u32 ctx_save_restore_size; /* to KFD */ - __u32 ctl_stack_size; /* to KFD */ + __u64 eop_buffer_address; /* to KFD */ + __u64 eop_buffer_size; /* to KFD */ + __u64 ctx_save_restore_address; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ }; struct kfd_ioctl_destroy_queue_args { - __u32 queue_id; /* to KFD */ - __u32 pad; + __u32 queue_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_update_queue_args { - __u64 ring_base_address; /* to KFD */ + __u64 ring_base_address; /* to KFD */ - __u32 queue_id; /* to KFD */ - __u32 ring_size; /* to KFD */ - __u32 queue_percentage; /* to KFD */ - __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 ring_size; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ }; struct kfd_ioctl_set_cu_mask_args { - __u32 queue_id; /* to KFD */ - __u32 num_cu_mask; /* to KFD */ - __u64 cu_mask_ptr; /* to KFD */ + __u32 queue_id; /* to KFD */ + __u32 num_cu_mask; /* to KFD */ + __u64 cu_mask_ptr; /* to KFD */ }; struct kfd_ioctl_get_queue_wave_state_args { - __u64 ctl_stack_address; /* to KFD */ - __u32 ctl_stack_used_size; /* from KFD */ - __u32 save_area_used_size; /* from KFD */ - __u32 queue_id; /* to KFD */ - __u32 pad; + __u64 ctl_stack_address; /* to KFD */ + __u32 ctl_stack_used_size; /* from KFD */ + __u32 save_area_used_size; /* from KFD */ + __u32 queue_id; /* to KFD */ + __u32 pad; +}; + +struct kfd_queue_snapshot_entry { + __u64 ring_base_address; + __u64 write_pointer_address; + __u64 read_pointer_address; + __u64 ctx_save_restore_address; + __u32 queue_id; + __u32 gpu_id; + __u32 ring_size; + __u32 queue_type; + __u32 queue_status; + __u32 reserved[19]; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ @@ -100,13 +111,13 @@ struct kfd_ioctl_get_queue_wave_state_args { #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 struct kfd_ioctl_set_memory_policy_args { - __u64 alternate_aperture_base; /* to KFD */ - __u64 alternate_aperture_size; /* to KFD */ + __u64 alternate_aperture_base; /* to KFD */ + __u64 alternate_aperture_size; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 default_policy; /* to KFD */ - __u32 alternate_policy; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 default_policy; /* to KFD */ + __u32 alternate_policy; /* to KFD */ + __u32 pad; }; /* @@ -117,24 +128,24 @@ struct kfd_ioctl_set_memory_policy_args { */ struct kfd_ioctl_get_clock_counters_args { - __u64 gpu_clock_counter; /* from KFD */ - __u64 cpu_clock_counter; /* from KFD */ - __u64 system_clock_counter; /* from KFD */ - __u64 system_clock_freq; /* from KFD */ + __u64 gpu_clock_counter; /* from KFD */ + __u64 cpu_clock_counter; /* from KFD */ + __u64 system_clock_counter; /* from KFD */ + __u64 system_clock_freq; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_process_device_apertures { - __u64 lds_base; /* from KFD */ - __u64 lds_limit; /* from KFD */ - __u64 scratch_base; /* from KFD */ - __u64 scratch_limit; /* from KFD */ - __u64 gpuvm_base; /* from KFD */ - __u64 gpuvm_limit; /* from KFD */ - __u32 gpu_id; /* from KFD */ - __u32 pad; + __u64 lds_base; /* from KFD */ + __u64 lds_limit; /* from KFD */ + __u64 scratch_base; /* from KFD */ + __u64 scratch_limit; /* from KFD */ + __u64 gpuvm_base; /* from KFD */ + __u64 gpuvm_limit; /* from KFD */ + __u32 gpu_id; /* from KFD */ + __u32 pad; }; /* @@ -144,25 +155,25 @@ struct kfd_process_device_apertures { */ #define NUM_OF_SUPPORTED_GPUS 7 struct kfd_ioctl_get_process_apertures_args { - struct kfd_process_device_apertures - process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ + struct kfd_process_device_apertures + process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */ - /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ - __u32 num_of_nodes; - __u32 pad; + /* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */ + __u32 num_of_nodes; + __u32 pad; }; struct kfd_ioctl_get_process_apertures_new_args { - /* User allocated. Pointer to struct kfd_process_device_apertures - * filled in by Kernel - */ - __u64 kfd_process_device_apertures_ptr; - /* to KFD - indicates amount of memory present in - * kfd_process_device_apertures_ptr - * from KFD - Number of entries filled by KFD. - */ - __u32 num_of_nodes; - __u32 pad; + /* User allocated. Pointer to struct kfd_process_device_apertures + * filled in by Kernel + */ + __u64 kfd_process_device_apertures_ptr; + /* to KFD - indicates amount of memory present in + * kfd_process_device_apertures_ptr + * from KFD - Number of entries filled by KFD. + */ + __u32 num_of_nodes; + __u32 pad; }; #define MAX_ALLOWED_NUM_POINTS 100 @@ -170,185 +181,271 @@ struct kfd_ioctl_get_process_apertures_new_args { #define MAX_ALLOWED_WAC_BUFF_SIZE 128 struct kfd_ioctl_dbg_register_args { - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_unregister_args { - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_dbg_address_watch_args { - __u64 content_ptr; /* a pointer to the actual content */ - __u32 gpu_id; /* to KFD */ - __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; struct kfd_ioctl_dbg_wave_control_args { - __u64 content_ptr; /* a pointer to the actual content */ - __u32 gpu_id; /* to KFD */ - __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ + __u64 content_ptr; /* a pointer to the actual content */ + __u32 gpu_id; /* to KFD */ + __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ +}; + +/* mapping event types to API spec */ +#define KFD_DBG_EV_STATUS_TRAP 1 +#define KFD_DBG_EV_STATUS_VMFAULT 2 +#define KFD_DBG_EV_STATUS_SUSPENDED 4 +#define KFD_DBG_EV_STATUS_NEW_QUEUE 8 +#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1 + +#define KFD_INVALID_QUEUEID 0xffffffff + +/* KFD_IOC_DBG_TRAP_ENABLE: + * ptr: unused + * data1: 0=disable, 1=enable + * data2: queue ID (for future use) + * data3: return value for fd + */ +#define KFD_IOC_DBG_TRAP_ENABLE 0 + +/* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: + * ptr: unused + * data1: override mode: 0=OR, 1=REPLACE + * data2: mask + * data3: unused + */ +#define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE 1 + +/* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: + * ptr: unused + * data1: 0=normal, 1=halt, 2=kill, 3=singlestep, 4=disable + * data2: unused + * data3: unused + */ +#define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE 2 + +/* KFD_IOC_DBG_TRAP_NODE_SUSPEND: + * ptr: pointer to an array of Queues IDs + * data1: flags + * data2: number of queues + * data3: grace period + */ +#define KFD_IOC_DBG_TRAP_NODE_SUSPEND 3 + +/* KFD_IOC_DBG_TRAP_NODE_RESUME: + * ptr: pointer to an array of Queues IDs + * data1: flags + * data2: number of queues + * data3: unused + */ +#define KFD_IOC_DBG_TRAP_NODE_RESUME 4 + +/* KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: + * ptr: unused + * data1: queue id (IN/OUT) + * data2: flags (IN) + * data3: suspend[2:2], event type [1:0] (OUT) + */ +#define KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT 5 + +/* KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: + * ptr: user buffer (IN) + * data1: flags (IN) + * data2: number of queue snapshot entries (IN/OUT) + * data3: unused + */ +#define KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT 6 + +/* KFD_IOC_DBG_TRAP_GET_VERSION: + * prt: unsused + * data1: major version (OUT) + * data2: minor version (OUT) + * data3: unused + */ +#define KFD_IOC_DBG_TRAP_GET_VERSION 7 + + +struct kfd_ioctl_dbg_trap_args { + __u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */ + __u32 pid; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 op; /* to KFD */ + __u32 data1; /* to KFD */ + __u32 data2; /* to KFD */ + __u32 data3; /* to KFD */ }; /* Matching HSA_EVENTTYPE */ -#define KFD_IOC_EVENT_SIGNAL 0 -#define KFD_IOC_EVENT_NODECHANGE 1 -#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 -#define KFD_IOC_EVENT_HW_EXCEPTION 3 -#define KFD_IOC_EVENT_SYSTEM_EVENT 4 -#define KFD_IOC_EVENT_DEBUG_EVENT 5 -#define KFD_IOC_EVENT_PROFILE_EVENT 6 -#define KFD_IOC_EVENT_QUEUE_EVENT 7 -#define KFD_IOC_EVENT_MEMORY 8 +#define KFD_IOC_EVENT_SIGNAL 0 +#define KFD_IOC_EVENT_NODECHANGE 1 +#define KFD_IOC_EVENT_DEVICESTATECHANGE 2 +#define KFD_IOC_EVENT_HW_EXCEPTION 3 +#define KFD_IOC_EVENT_SYSTEM_EVENT 4 +#define KFD_IOC_EVENT_DEBUG_EVENT 5 +#define KFD_IOC_EVENT_PROFILE_EVENT 6 +#define KFD_IOC_EVENT_QUEUE_EVENT 7 +#define KFD_IOC_EVENT_MEMORY 8 -#define KFD_IOC_WAIT_RESULT_COMPLETE 0 -#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 -#define KFD_IOC_WAIT_RESULT_FAIL 2 +#define KFD_IOC_WAIT_RESULT_COMPLETE 0 +#define KFD_IOC_WAIT_RESULT_TIMEOUT 1 +#define KFD_IOC_WAIT_RESULT_FAIL 2 -#define KFD_SIGNAL_EVENT_LIMIT 4096 +#define KFD_SIGNAL_EVENT_LIMIT 4096 /* For kfd_event_data.hw_exception_data.reset_type. */ -#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 -#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 +#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0 +#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1 /* For kfd_event_data.hw_exception_data.reset_cause. */ -#define KFD_HW_EXCEPTION_GPU_HANG 0 -#define KFD_HW_EXCEPTION_ECC 1 +#define KFD_HW_EXCEPTION_GPU_HANG 0 +#define KFD_HW_EXCEPTION_ECC 1 /* For kfd_hsa_memory_exception_data.ErrorType */ -#define KFD_MEM_ERR_NO_RAS 0 -#define KFD_MEM_ERR_SRAM_ECC 1 -#define KFD_MEM_ERR_POISON_CONSUMED 2 -#define KFD_MEM_ERR_GPU_HANG 3 +#define KFD_MEM_ERR_NO_RAS 0 +#define KFD_MEM_ERR_SRAM_ECC 1 +#define KFD_MEM_ERR_POISON_CONSUMED 2 +#define KFD_MEM_ERR_GPU_HANG 3 struct kfd_ioctl_create_event_args { - __u64 event_page_offset; /* from KFD */ - __u32 event_trigger_data; /* from KFD - signal events only */ - __u32 event_type; /* to KFD */ - __u32 auto_reset; /* to KFD */ - __u32 node_id; /* to KFD - only valid for certain event types */ - __u32 event_id; /* from KFD */ - __u32 event_slot_index; /* from KFD */ + __u64 event_page_offset; /* from KFD */ + __u32 event_trigger_data; /* from KFD - signal events only */ + __u32 event_type; /* to KFD */ + __u32 auto_reset; /* to KFD */ + __u32 node_id; /* to KFD - only valid for certain + event types */ + __u32 event_id; /* from KFD */ + __u32 event_slot_index; /* from KFD */ }; struct kfd_ioctl_destroy_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_set_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_reset_event_args { - __u32 event_id; /* to KFD */ - __u32 pad; + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_memory_exception_failure { - __u32 NotPresent; /* Page not present or supervisor privilege */ - __u32 ReadOnly; /* Write access to a read-only page */ - __u32 NoExecute; /* Execute access to a page marked NX */ - __u32 imprecise; /* Can't determine the exact fault address */ + __u32 NotPresent; /* Page not present or supervisor privilege */ + __u32 ReadOnly; /* Write access to a read-only page */ + __u32 NoExecute; /* Execute access to a page marked NX */ + __u32 imprecise; /* Can't determine the exact fault address */ }; -/* memory exception data*/ +/* memory exception data */ struct kfd_hsa_memory_exception_data { - struct kfd_memory_exception_failure failure; - __u64 va; - __u32 gpu_id; - __u32 ErrorType; // 0 = no RAS error, - // 1 = ECC_SRAM, - // 2 = Link_SYNFLOOD (poison), - // 3 = GPU hang (not attributable to a specific cause), - // other values reserved + struct kfd_memory_exception_failure failure; + __u64 va; + __u32 gpu_id; + __u32 ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang (not attributable to a specific cause), + * other values reserved + */ }; /* hw exception data */ struct kfd_hsa_hw_exception_data { - __u32 reset_type; - __u32 reset_cause; - __u32 memory_lost; - __u32 gpu_id; + __u32 reset_type; + __u32 reset_cause; + __u32 memory_lost; + __u32 gpu_id; }; /* Event data */ struct kfd_event_data { - union { - struct kfd_hsa_memory_exception_data memory_exception_data; - struct kfd_hsa_hw_exception_data hw_exception_data; - }; /* From KFD */ - __u64 kfd_event_data_ext; // pointer to an extension structure - // for future exception types - __u32 event_id; /* to KFD */ - __u32 pad; + union { + struct kfd_hsa_memory_exception_data memory_exception_data; + struct kfd_hsa_hw_exception_data hw_exception_data; + }; /* From KFD */ + __u64 kfd_event_data_ext; /* pointer to an extension structure + for future exception types */ + __u32 event_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_wait_events_args { - __u64 events_ptr; // pointed to struct - // kfd_event_data array, to KFD - __u32 num_events; /* to KFD */ - __u32 wait_for_all; /* to KFD */ - __u32 timeout; /* to KFD */ - __u32 wait_result; /* from KFD */ + __u64 events_ptr; /* pointed to struct + kfd_event_data array, to KFD */ + __u32 num_events; /* to KFD */ + __u32 wait_for_all; /* to KFD */ + __u32 timeout; /* to KFD */ + __u32 wait_result; /* from KFD */ }; struct kfd_ioctl_set_scratch_backing_va_args { - __u64 va_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u64 va_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_tile_config_args { - /* to KFD: pointer to tile array */ - __u64 tile_config_ptr; - /* to KFD: pointer to macro tile array */ - __u64 macro_tile_config_ptr; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_tile_configs; - /* to KFD: array size allocated by user mode - * from KFD: array size filled by kernel - */ - __u32 num_macro_tile_configs; + /* to KFD: pointer to tile array */ + __u64 tile_config_ptr; + /* to KFD: pointer to macro tile array */ + __u64 macro_tile_config_ptr; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + __u32 num_tile_configs; + /* to KFD: array size allocated by user mode + * from KFD: array size filled by kernel + */ + __u32 num_macro_tile_configs; - __u32 gpu_id; /* to KFD */ - __u32 gb_addr_config; /* from KFD */ - __u32 num_banks; /* from KFD */ - __u32 num_ranks; /* from KFD */ - /* struct size can be extended later if needed - * without breaking ABI compatibility - */ + __u32 gpu_id; /* to KFD */ + __u32 gb_addr_config; /* from KFD */ + __u32 num_banks; /* from KFD */ + __u32 num_ranks; /* from KFD */ + /* struct size can be extended later if needed + * without breaking ABI compatibility + */ }; struct kfd_ioctl_set_trap_handler_args { - __u64 tba_addr; /* to KFD */ - __u64 tma_addr; /* to KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; + __u64 tba_addr; /* to KFD */ + __u64 tma_addr; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_acquire_vm_args { - __u32 drm_fd; /* to KFD */ - __u32 gpu_id; /* to KFD */ + __u32 drm_fd; /* to KFD */ + __u32 gpu_id; /* to KFD */ }; /* Allocation flags: memory types */ -#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) -#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) -#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) -#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) -#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) +#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0) +#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1) +#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2) +#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3) +#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4) /* Allocation flags: attributes/access options */ -#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) -#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) -#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) -#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) -#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) -#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) +#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31) +#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) +#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29) +#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) +#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) +#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* Allocate memory for later SVM (shared virtual memory) mapping. * @@ -363,12 +460,12 @@ struct kfd_ioctl_acquire_vm_args { * @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above */ struct kfd_ioctl_alloc_memory_of_gpu_args { - __u64 va_addr; /* to KFD */ - __u64 size; /* to KFD */ - __u64 handle; /* from KFD */ - __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ - __u32 gpu_id; /* to KFD */ - __u32 flags; + __u64 va_addr; /* to KFD */ + __u64 size; /* to KFD */ + __u64 handle; /* from KFD */ + __u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */ + __u32 gpu_id; /* to KFD */ + __u32 flags; }; /* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu @@ -376,7 +473,7 @@ struct kfd_ioctl_alloc_memory_of_gpu_args { * @handle: memory handle returned by alloc */ struct kfd_ioctl_free_memory_of_gpu_args { - __u64 handle; /* to KFD */ + __u64 handle; /* to KFD */ }; /* Map memory to one or more GPUs @@ -395,10 +492,10 @@ struct kfd_ioctl_free_memory_of_gpu_args { * n_devices. */ struct kfd_ioctl_map_memory_to_gpu_args { - __u64 handle; /* to KFD */ - __u64 device_ids_array_ptr; /* to KFD */ - __u32 n_devices; /* to KFD */ - __u32 n_success; /* to/from KFD */ + __u64 handle; /* to KFD */ + __u64 device_ids_array_ptr; /* to KFD */ + __u32 n_devices; /* to KFD */ + __u32 n_success; /* to/from KFD */ }; /* Unmap memory from one or more GPUs @@ -406,10 +503,10 @@ struct kfd_ioctl_map_memory_to_gpu_args { * same arguments as for mapping */ struct kfd_ioctl_unmap_memory_from_gpu_args { - __u64 handle; /* to KFD */ - __u64 device_ids_array_ptr; /* to KFD */ - __u32 n_devices; /* to KFD */ - __u32 n_success; /* to/from KFD */ + __u64 handle; /* to KFD */ + __u64 device_ids_array_ptr; /* to KFD */ + __u32 n_devices; /* to KFD */ + __u32 n_success; /* to/from KFD */ }; /* Allocate GWS for specific queue @@ -420,27 +517,28 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { * only support contiguous GWS allocation */ struct kfd_ioctl_alloc_queue_gws_args { - __u32 queue_id; /* to KFD */ - __u32 num_gws; /* to KFD */ - __u32 first_gws; /* from KFD */ - __u32 pad; + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ + __u32 pad; /* to KFD */ }; struct kfd_ioctl_get_dmabuf_info_args { - __u64 size; /* from KFD */ - __u64 metadata_ptr; /* to KFD */ - __u32 metadata_size; // to KFD (space allocated by user) - // from KFD (actual metadata size) - __u32 gpu_id; /* from KFD */ - __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ - __u32 dmabuf_fd; /* to KFD */ + __u64 size; /* from KFD */ + __u64 metadata_ptr; /* to KFD */ + __u32 metadata_size; /* to KFD (space allocated by user) + * from KFD (actual metadata size) + */ + __u32 gpu_id; /* from KFD */ + __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ + __u32 dmabuf_fd; /* to KFD */ }; struct kfd_ioctl_import_dmabuf_args { - __u64 va_addr; /* to KFD */ - __u64 handle; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 dmabuf_fd; /* to KFD */ + __u64 va_addr; /* to KFD */ + __u64 handle; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 dmabuf_fd; /* to KFD */ }; /* @@ -450,118 +548,182 @@ struct kfd_ioctl_import_dmabuf_args { #define KFD_SMI_EVENT_VMFAULT 0x0000000000000001 struct kfd_ioctl_smi_events_args { - __u32 gpuid; /* to KFD */ - __u32 anon_fd; /* from KFD */ + __u32 gpuid; /* to KFD */ + __u32 anon_fd; /* from KFD */ }; /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { - KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, - KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0, + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, +}; + +struct kfd_ioctl_ipc_export_handle_args { + __u64 handle; /* to KFD */ + __u32 share_handle[4]; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + +struct kfd_ioctl_ipc_import_handle_args { + __u64 handle; /* from KFD */ + __u64 va_addr; /* to KFD */ + __u64 mmap_offset; /* from KFD */ + __u32 share_handle[4]; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + +struct kfd_memory_range { + __u64 va_addr; + __u64 size; +}; + +/* flags definitions + * BIT0: 0: read operation, 1: write operation. + * This also identifies if the src or dst array belongs to remote process + */ +#define KFD_CROSS_MEMORY_RW_BIT (1 << 0) +#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT) +#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT) +#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) + +struct kfd_ioctl_cross_memory_copy_args { + /* to KFD: Process ID of the remote process */ + __u32 pid; + /* to KFD: See above definition */ + __u32 flags; + /* to KFD: Source GPU VM range */ + __u64 src_mem_range_array; + /* to KFD: Size of above array */ + __u64 src_mem_array_size; + /* to KFD: Destination GPU VM range */ + __u64 dst_mem_range_array; + /* to KFD: Size of above array */ + __u64 dst_mem_array_size; + /* from KFD: Total amount of bytes copied */ + __u64 bytes_copied; }; #define AMDKFD_IOCTL_BASE 'K' -#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) -#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) +#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type) +#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type) -#define AMDKFD_IOC_GET_VERSION \ - AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) +#define AMDKFD_IOC_GET_VERSION \ + AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args) -#define AMDKFD_IOC_CREATE_QUEUE \ - AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) +#define AMDKFD_IOC_CREATE_QUEUE \ + AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args) -#define AMDKFD_IOC_DESTROY_QUEUE \ - AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) +#define AMDKFD_IOC_DESTROY_QUEUE \ + AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args) -#define AMDKFD_IOC_SET_MEMORY_POLICY \ - AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) +#define AMDKFD_IOC_SET_MEMORY_POLICY \ + AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args) -#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ - AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) +#define AMDKFD_IOC_GET_CLOCK_COUNTERS \ + AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args) -#define AMDKFD_IOC_GET_PROCESS_APERTURES \ - AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) +#define AMDKFD_IOC_GET_PROCESS_APERTURES \ + AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args) -#define AMDKFD_IOC_UPDATE_QUEUE \ - AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) +#define AMDKFD_IOC_UPDATE_QUEUE \ + AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args) -#define AMDKFD_IOC_CREATE_EVENT \ - AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) +#define AMDKFD_IOC_CREATE_EVENT \ + AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args) -#define AMDKFD_IOC_DESTROY_EVENT \ - AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) +#define AMDKFD_IOC_DESTROY_EVENT \ + AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args) -#define AMDKFD_IOC_SET_EVENT \ - AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) +#define AMDKFD_IOC_SET_EVENT \ + AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args) -#define AMDKFD_IOC_RESET_EVENT \ - AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) +#define AMDKFD_IOC_RESET_EVENT \ + AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args) -#define AMDKFD_IOC_WAIT_EVENTS \ - AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) +#define AMDKFD_IOC_WAIT_EVENTS \ + AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ - AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) +#define AMDKFD_IOC_DBG_REGISTER \ + AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ - AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) +#define AMDKFD_IOC_DBG_UNREGISTER \ + AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ - AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) +#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ + AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ - AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) +#define AMDKFD_IOC_DBG_WAVE_CONTROL \ + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) -#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ - AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) +#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ + AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) -#define AMDKFD_IOC_GET_TILE_CONFIG \ - AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) -#define AMDKFD_IOC_SET_TRAP_HANDLER \ - AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) +#define AMDKFD_IOC_SET_TRAP_HANDLER \ + AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) -#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ - AMDKFD_IOWR(0x14, \ - struct kfd_ioctl_get_process_apertures_new_args) +#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ + AMDKFD_IOWR(0x14, \ + struct kfd_ioctl_get_process_apertures_new_args) -#define AMDKFD_IOC_ACQUIRE_VM \ - AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) +#define AMDKFD_IOC_ACQUIRE_VM \ + AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) -#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ - AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) +#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ + AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) -#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ - AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) +#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ + AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) -#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ - AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) +#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ + AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) -#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ - AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) +#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ + AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) -#define AMDKFD_IOC_SET_CU_MASK \ - AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) +#define AMDKFD_IOC_SET_CU_MASK \ + AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) -#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ - AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) +#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ + AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) -#define AMDKFD_IOC_GET_DMABUF_INFO \ - AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) +#define AMDKFD_IOC_GET_DMABUF_INFO \ + AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args) -#define AMDKFD_IOC_IMPORT_DMABUF \ - AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_IMPORT_DMABUF \ + AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) -#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ - AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) -#define AMDKFD_IOC_SMI_EVENTS \ - AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) +#define AMDKFD_IOC_SMI_EVENTS \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) -#define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x20 +#define AMDKFD_COMMAND_START 0x01 +#define AMDKFD_COMMAND_END 0x20 -#endif // INCLUDE_ROCM_SMI_KFD_IOCTL_H_ +/* non-upstream ioctls */ +#define AMDKFD_IOC_IPC_IMPORT_HANDLE \ + AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args) + +#define AMDKFD_IOC_IPC_EXPORT_HANDLE \ + AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args) + +#define AMDKFD_IOC_DBG_TRAP \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args) + +#define AMDKFD_IOC_CROSS_MEMORY_COPY \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args) + +#define AMDKFD_COMMAND_START 0x01 +#undef AMDKFD_COMMAND_END +#define AMDKFD_COMMAND_END 0x22 + +#endif // INCLUDE_ROCM_SMI_KFD_IOCTL_H_ diff --git a/projects/amdsmi/include/rocm_smi/rocm_smi.h b/projects/amdsmi/include/rocm_smi/rocm_smi.h index d510ff3e96..5cf748a630 100755 --- a/projects/amdsmi/include/rocm_smi/rocm_smi.h +++ b/projects/amdsmi/include/rocm_smi/rocm_smi.h @@ -52,11 +52,7 @@ extern "C" { #include -// In a file included from kfd_ioctl.h, is a variable called -// c++ keyword "virtual" -#define virtual virtual_tmp #include "rocm_smi/kfd_ioctl.h" -#undef virtual /** \file rocm_smi.h * Main header file for the ROCm SMI library.