diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index 9c3ec17dbe..109a01e0cd 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -5579,7 +5579,14 @@ class AMDSMICommands(): events = listener.read(2000) for event in events: values_dict["event"] = event["event"] - values_dict["message"] = event["message"] + # parse message as it's own dictionary + message_list = event["message"].split(" ") + message_dict = {} + for item in message_list: + if not item == "": + item_list = item.split(": ") + message_dict.update({item_list[0]: item_list[1]}) + values_dict["message"] = message_dict commands.logger.store_output(device, 'values', values_dict) commands.logger.print_output() except amdsmi_exception.AmdSmiLibraryException as e: diff --git a/include/amd_smi/amdsmi.h b/include/amd_smi/amdsmi.h index 4db08619b3..e27b255007 100644 --- a/include/amd_smi/amdsmi.h +++ b/include/amd_smi/amdsmi.h @@ -961,7 +961,8 @@ typedef enum { #define AMDSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) //! Maximum number of characters an event notification message will be -#define MAX_EVENT_NOTIFICATION_MSG_SIZE 64 +// matches kfd message max size +#define MAX_EVENT_NOTIFICATION_MSG_SIZE 96 /** * Event notification data returned from event notification API diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index 2c4a6b46ba..5d3db14d34 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -69,7 +69,7 @@ AMDSMI_MAX_CACHE_TYPES = 10 AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK = 64 AMDSMI_GPU_UUID_SIZE = 38 MAX_AMDSMI_NAME_LENGTH = 64 -MAX_EVENT_NOTIFICATION_MSG_SIZE = 64 +MAX_EVENT_NOTIFICATION_MSG_SIZE = 96 class AmdSmiInitFlags(IntEnum): diff --git a/py-interface/amdsmi_wrapper.py b/py-interface/amdsmi_wrapper.py index c65800da99..e1513cdbb7 100644 --- a/py-interface/amdsmi_wrapper.py +++ b/py-interface/amdsmi_wrapper.py @@ -1300,7 +1300,7 @@ struct_amdsmi_evt_notification_data_t._pack_ = 1 # source:False struct_amdsmi_evt_notification_data_t._fields_ = [ ('processor_handle', ctypes.POINTER(None)), ('event', amdsmi_evt_notification_type_t), - ('message', ctypes.c_char * 64), + ('message', ctypes.c_char * 96), ('PADDING_0', ctypes.c_ubyte * 4), ] @@ -1741,6 +1741,7 @@ struct_amdsmi_gpu_xcp_metrics_t._fields_ = [ ('gfx_busy_acc', ctypes.c_uint64 * 8), ] +amdsmi_gpu_xcp_metrics_t = struct_amdsmi_gpu_xcp_metrics_t class struct_amdsmi_gpu_metrics_t(Structure): pass @@ -2869,11 +2870,12 @@ __all__ = \ 'amdsmi_gpu_cache_info_t', 'amdsmi_gpu_control_counter', 'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter', 'amdsmi_gpu_destroy_counter', 'amdsmi_gpu_metrics_t', - 'amdsmi_gpu_read_counter', 'amdsmi_gpu_xgmi_error_status', - 'amdsmi_hsmp_freqlimit_src_names', 'amdsmi_hsmp_metrics_table_t', - 'amdsmi_init', 'amdsmi_init_flags_t', - 'amdsmi_init_gpu_event_notification', 'amdsmi_io_bw_encoding_t', - 'amdsmi_io_link_type_t', 'amdsmi_is_P2P_accessible', + 'amdsmi_gpu_read_counter', 'amdsmi_gpu_xcp_metrics_t', + 'amdsmi_gpu_xgmi_error_status', 'amdsmi_hsmp_freqlimit_src_names', + 'amdsmi_hsmp_metrics_table_t', 'amdsmi_init', + 'amdsmi_init_flags_t', 'amdsmi_init_gpu_event_notification', + 'amdsmi_io_bw_encoding_t', 'amdsmi_io_link_type_t', + 'amdsmi_is_P2P_accessible', 'amdsmi_is_gpu_power_management_enabled', 'amdsmi_kfd_info_t', 'amdsmi_link_id_bw_type_t', 'amdsmi_link_metrics_t', 'amdsmi_link_type_t', 'amdsmi_memory_page_status_t', diff --git a/rocm_smi/include/rocm_smi/kfd_ioctl.h b/rocm_smi/include/rocm_smi/kfd_ioctl.h index 99895d2036..2334afb9c0 100644 --- a/rocm_smi/include/rocm_smi/kfd_ioctl.h +++ b/rocm_smi/include/rocm_smi/kfd_ioctl.h @@ -20,28 +20,37 @@ * THE SOFTWARE. */ -#ifndef INCLUDE_ROCM_SMI_KFD_IOCTL_H_ -#define INCLUDE_ROCM_SMI_KFD_IOCTL_H_ +#ifndef KFD_IOCTL_H_INCLUDED +#define KFD_IOCTL_H_INCLUDED -#include +#include #include +/* + * - 1.1 - initial version + * - 1.3 - Add SMI events support + * - 1.4 - Indicate new SRAM EDC bit in device properties + * - 1.5 - Add SVM API + * - 1.6 - Query clear flags in SVM get_attr API + * - 1.7 - Checkpoint Restore (CRIU) API + * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs + * - 1.9 - Add available memory ioctl + * - 1.10 - Add SMI profiler event log + * - 1.11 - Add unified memory for ctx save/restore area + * - 1.12 - Add DMA buf export ioctl + * - 1.13 - Add debugger API + * - 1.14 - Update kfd_event_data + * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl + * - 1.16 - Add contiguous VRAM allocation flag + */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 2 -#define KFD_IOCTL_DBG_MAJOR_VERSION 1 -#define KFD_IOCTL_DBG_MINOR_VERSION 0 +#define KFD_IOCTL_MINOR_VERSION 16 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ __u32 minor_version; /* from KFD */ }; -struct kfd_ioctl_get_available_memory_args { - __u64 available; /* from KFD */ - __u32 gpu_id; /* to KFD */ - __u32 pad; -}; - /* For kfd_ioctl_create_queue_args.queue_type. */ #define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0 #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 @@ -99,17 +108,36 @@ struct kfd_ioctl_get_queue_wave_state_args { __u32 pad; }; -struct kfd_queue_snapshot_entry { - __u64 ring_base_address; - __u64 write_pointer_address; - __u64 read_pointer_address; - __u64 ctx_save_restore_address; - __u32 queue_id; +struct kfd_ioctl_get_available_memory_args { + __u64 available; /* from KFD */ + __u32 gpu_id; /* to KFD */ + __u32 pad; +}; + +struct kfd_dbg_device_info_entry { + __u64 exception_status; + __u64 lds_base; + __u64 lds_limit; + __u64 scratch_base; + __u64 scratch_limit; + __u64 gpuvm_base; + __u64 gpuvm_limit; __u32 gpu_id; - __u32 ring_size; - __u32 queue_type; - __u32 queue_status; - __u32 reserved[19]; + __u32 location_id; + __u32 vendor_id; + __u32 device_id; + __u32 revision_id; + __u32 subsystem_vendor_id; + __u32 subsystem_device_id; + __u32 fw_version; + __u32 gfx_target_version; + __u32 simd_count; + __u32 max_waves_per_simd; + __u32 array_count; + __u32 simd_arrays_per_engine; + __u32 num_xcc; + __u32 capability; + __u32 debug_prop; }; /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ @@ -208,88 +236,17 @@ struct kfd_ioctl_dbg_wave_control_args { __u32 buf_size_in_bytes; /*including gpu_id and buf_size */ }; -/* mapping event types to API spec */ -#define KFD_DBG_EV_STATUS_TRAP 1 -#define KFD_DBG_EV_STATUS_VMFAULT 2 -#define KFD_DBG_EV_STATUS_SUSPENDED 4 -#define KFD_DBG_EV_STATUS_NEW_QUEUE 8 -#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1 +#define KFD_INVALID_FD 0xffffffff -#define KFD_INVALID_QUEUEID 0xffffffff - -/* KFD_IOC_DBG_TRAP_ENABLE: - * ptr: unused - * data1: 0=disable, 1=enable - * data2: queue ID (for future use) - * data3: return value for fd - */ -#define KFD_IOC_DBG_TRAP_ENABLE 0 - -/* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: - * ptr: unused - * data1: override mode: 0=OR, 1=REPLACE - * data2: mask - * data3: unused - */ -#define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE 1 - -/* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: - * ptr: unused - * data1: 0=normal, 1=halt, 2=kill, 3=singlestep, 4=disable - * data2: unused - * data3: unused - */ -#define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE 2 - -/* KFD_IOC_DBG_TRAP_NODE_SUSPEND: - * ptr: pointer to an array of Queues IDs - * data1: flags - * data2: number of queues - * data3: grace period - */ -#define KFD_IOC_DBG_TRAP_NODE_SUSPEND 3 - -/* KFD_IOC_DBG_TRAP_NODE_RESUME: - * ptr: pointer to an array of Queues IDs - * data1: flags - * data2: number of queues - * data3: unused - */ -#define KFD_IOC_DBG_TRAP_NODE_RESUME 4 - -/* KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: - * ptr: unused - * data1: queue id (IN/OUT) - * data2: flags (IN) - * data3: suspend[2:2], event type [1:0] (OUT) - */ -#define KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT 5 - -/* KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: - * ptr: user buffer (IN) - * data1: flags (IN) - * data2: number of queue snapshot entries (IN/OUT) - * data3: unused - */ -#define KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT 6 - -/* KFD_IOC_DBG_TRAP_GET_VERSION: - * prt: unsused - * data1: major version (OUT) - * data2: minor version (OUT) - * data3: unused - */ -#define KFD_IOC_DBG_TRAP_GET_VERSION 7 - - -struct kfd_ioctl_dbg_trap_args { +struct kfd_ioctl_dbg_trap_args_deprecated { + __u64 exception_mask; /* to KFD */ __u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */ __u32 pid; /* to KFD */ - __u32 gpu_id; /* to KFD */ __u32 op; /* to KFD */ __u32 data1; /* to KFD */ __u32 data2; /* to KFD */ __u32 data3; /* to KFD */ + __u32 data4; /* to KFD */ }; /* Matching HSA_EVENTTYPE */ @@ -328,7 +285,8 @@ struct kfd_ioctl_create_event_args { __u32 event_trigger_data; /* from KFD - signal events only */ __u32 event_type; /* to KFD */ __u32 auto_reset; /* to KFD */ - __u32 node_id; /* to KFD - only valid for certain event types */ + __u32 node_id; /* to KFD - only valid for certain + event types */ __u32 event_id; /* from KFD */ __u32 event_slot_index; /* from KFD */ }; @@ -360,11 +318,12 @@ struct kfd_hsa_memory_exception_data { struct kfd_memory_exception_failure failure; __u64 va; __u32 gpu_id; - __u32 ErrorType; // 0 = no RAS error, - // 1 = ECC_SRAM, - // 2 = Link_SYNFLOOD (poison), - // 3 = GPU hang (not attributable to a specific cause), - // other values reserved + __u32 ErrorType; /* 0 = no RAS error, + * 1 = ECC_SRAM, + * 2 = Link_SYNFLOOD (poison), + * 3 = GPU hang (not attributable to a specific cause), + * other values reserved + */ }; /* hw exception data */ @@ -375,21 +334,29 @@ struct kfd_hsa_hw_exception_data { __u32 gpu_id; }; +/* hsa signal event data */ +struct kfd_hsa_signal_event_data { + __u64 last_event_age; /* to and from KFD */ +}; + /* Event data */ struct kfd_event_data { union { + /* From KFD */ struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_hw_exception_data hw_exception_data; - }; /* From KFD */ - __u64 kfd_event_data_ext; // pointer to an extension structure - // for future exception types + /* To and From KFD */ + struct kfd_hsa_signal_event_data signal_event_data; + }; + __u64 kfd_event_data_ext; /* pointer to an extension structure + for future exception types */ __u32 event_id; /* to KFD */ __u32 pad; }; struct kfd_ioctl_wait_events_args { - __u64 events_ptr; // pointed to struct - // kfd_event_data array, to KFD + __u64 events_ptr; /* pointed to struct + kfd_event_data array, to KFD */ __u32 num_events; /* to KFD */ __u32 wait_for_all; /* to KFD */ __u32 timeout; /* to KFD */ @@ -450,6 +417,9 @@ struct kfd_ioctl_acquire_vm_args { #define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26) +#define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED (1 << 25) +#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT (1 << 24) +#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS (1 << 23) /* Allocate memory for later SVM (shared virtual memory) mapping. * @@ -524,14 +494,15 @@ struct kfd_ioctl_alloc_queue_gws_args { __u32 queue_id; /* to KFD */ __u32 num_gws; /* to KFD */ __u32 first_gws; /* from KFD */ - __u32 pad; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ - __u32 metadata_size; // to KFD (space allocated by user) - // from KFD (actual metadata size) + __u32 metadata_size; /* to KFD (space allocated by user) + * from KFD (actual metadata size) + */ __u32 gpu_id; /* from KFD */ __u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */ __u32 dmabuf_fd; /* to KFD */ @@ -544,6 +515,12 @@ struct kfd_ioctl_import_dmabuf_args { __u32 dmabuf_fd; /* to KFD */ }; +struct kfd_ioctl_export_dmabuf_args { + __u64 handle; /* to KFD */ + __u32 flags; /* to KFD */ + __u32 dmabuf_fd; /* from KFD */ +}; + /* * KFD SMI(System Management Interface) events */ @@ -553,16 +530,203 @@ enum kfd_smi_event { KFD_SMI_EVENT_THERMAL_THROTTLE = 2, KFD_SMI_EVENT_GPU_PRE_RESET = 3, KFD_SMI_EVENT_GPU_POST_RESET = 4, - KFD_SMI_EVENT_RING_HANG = 5, + KFD_SMI_EVENT_MIGRATE_START = 5, + KFD_SMI_EVENT_MIGRATE_END = 6, + KFD_SMI_EVENT_PAGE_FAULT_START = 7, + KFD_SMI_EVENT_PAGE_FAULT_END = 8, + KFD_SMI_EVENT_QUEUE_EVICTION = 9, + KFD_SMI_EVENT_QUEUE_RESTORE = 10, + KFD_SMI_EVENT_UNMAP_FROM_GPU = 11, + + /* + * max event number, as a flag bit to get events from all processes, + * this requires super user permission, otherwise will not be able to + * receive event from any process. Without this flag to receive events + * from same process. + */ + KFD_SMI_EVENT_ALL_PROCESS = 64 +}; + +enum KFD_MIGRATE_TRIGGERS { + KFD_MIGRATE_TRIGGER_PREFETCH, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, + KFD_MIGRATE_TRIGGER_TTM_EVICTION +}; + +enum KFD_QUEUE_EVICTION_TRIGGERS { + KFD_QUEUE_EVICTION_TRIGGER_SVM, + KFD_QUEUE_EVICTION_TRIGGER_USERPTR, + KFD_QUEUE_EVICTION_TRIGGER_TTM, + KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, + KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, + KFD_QUEUE_EVICTION_CRIU_RESTORE +}; + +enum KFD_SVM_UNMAP_TRIGGERS { + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE, + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) +#define KFD_SMI_EVENT_MSG_SIZE 96 struct kfd_ioctl_smi_events_args { - __u32 gpuid; /* to KFD */ + __u32 gpuid; /* to KFD */ __u32 anon_fd; /* from KFD */ }; +/** + * kfd_ioctl_spm_op - SPM ioctl operations + * + * @KFD_IOCTL_SPM_OP_ACQUIRE: acquire exclusive access to SPM + * @KFD_IOCTL_SPM_OP_RELEASE: release exclusive access to SPM + * @KFD_IOCTL_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM streaming + */ +enum kfd_ioctl_spm_op { + KFD_IOCTL_SPM_OP_ACQUIRE, + KFD_IOCTL_SPM_OP_RELEASE, + KFD_IOCTL_SPM_OP_SET_DEST_BUF +}; + +/** + * kfd_ioctl_spm_args - Arguments for SPM ioctl + * + * @op[in]: specifies the operation to perform + * @gpu_id[in]: GPU ID of the GPU to profile + * @dst_buf[in]: used for the address of the destination buffer + * in @KFD_IOCTL_SPM_SET_DEST_BUFFER + * @buf_size[in]: size of the destination buffer + * @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left + * `in the timeout window + * @bytes_copied[out]: amount of data that was copied to the previous dest_buf + * @has_data_loss: boolean indicating whether data was lost + * (e.g. due to a ring-buffer overflow) + * + * This ioctl performs different functions depending on the @op parameter. + * + * KFD_IOCTL_SPM_OP_ACQUIRE + * ------------------------ + * + * Acquires exclusive access of SPM on the specified @gpu_id for the calling process. + * This must be called before using KFD_IOCTL_SPM_OP_SET_DEST_BUF. + * + * KFD_IOCTL_SPM_OP_RELEASE + * ------------------------ + * + * Releases exclusive access of SPM on the specified @gpu_id for the calling process, + * which allows another process to acquire it in the future. + * + * KFD_IOCTL_SPM_OP_SET_DEST_BUF + * ----------------------------- + * + * If @dst_buf is NULL, the destination buffer address is unset and copying of counters + * is stopped. + * + * If @dst_buf is not NULL, it specifies the pointer to a new destination buffer. + * @buf_size specifies the size of the buffer. + * + * If @timeout is non-0, the call will wait for up to @timeout ms for the previous + * buffer to be filled. If previous buffer to be filled before timeout, the @timeout + * will be updated value with the time remaining. If the timeout is exceeded, the function + * copies any partial data available into the previous user buffer and returns success. + * The amount of valid data in the previous user buffer is indicated by @bytes_copied. + * + * If @timeout is 0, the function immediately replaces the previous destination buffer + * without waiting for the previous buffer to be filled. That means the previous buffer + * may only be partially filled, and @bytes_copied will indicate how much data has been + * copied to it. + * + * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will be non-0. + * + * Returns negative error code on failure, 0 on success. + */ +struct kfd_ioctl_spm_args { + __u64 dest_buf; + __u32 buf_size; + __u32 op; + __u32 timeout; + __u32 gpu_id; + __u32 bytes_copied; + __u32 has_data_loss; +}; + +/************************************************************************************************** + * CRIU IOCTLs (Checkpoint Restore In Userspace) + * + * When checkpointing a process, the userspace application will perform: + * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts + * all the queues. + * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges) + * 3. UNPAUSE op to un-evict all the queues + * + * When restoring a process, the CRIU userspace application will perform: + * + * 1. RESTORE op to restore process contents + * 2. RESUME op to start the process + * + * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User + * application needs to perform an UNPAUSE operation after calling PROCESS_INFO. + */ + +enum kfd_criu_op { + KFD_CRIU_OP_PROCESS_INFO, + KFD_CRIU_OP_CHECKPOINT, + KFD_CRIU_OP_UNPAUSE, + KFD_CRIU_OP_RESTORE, + KFD_CRIU_OP_RESUME, +}; + +/** + * kfd_ioctl_criu_args - Arguments perform CRIU operation + * @devices: [in/out] User pointer to memory location for devices information. + * This is an array of type kfd_criu_device_bucket. + * @bos: [in/out] User pointer to memory location for BOs information + * This is an array of type kfd_criu_bo_bucket. + * @priv_data: [in/out] User pointer to memory location for private data + * @priv_data_size: [in/out] Size of priv_data in bytes + * @num_devices: [in/out] Number of GPUs used by process. Size of @devices array. + * @num_bos [in/out] Number of BOs used by process. Size of @bos array. + * @num_objects: [in/out] Number of objects used by process. Objects are opaque to + * user application. + * @pid: [in/out] PID of the process being checkpointed + * @op [in] Type of operation (kfd_criu_op) + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_criu_args { + __u64 devices; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 bos; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data; /* Used during ops: CHECKPOINT, RESTORE */ + __u64 priv_data_size; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_devices; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_bos; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 num_objects; /* Used during ops: PROCESS_INFO, RESTORE */ + __u32 pid; /* Used during ops: PROCESS_INFO, RESUME */ + __u32 op; +}; + +struct kfd_criu_device_bucket { + __u32 user_gpu_id; + __u32 actual_gpu_id; + __u32 drm_fd; + __u32 pad; +}; + +struct kfd_criu_bo_bucket { + __u64 addr; + __u64 size; + __u64 offset; + __u64 restored_offset; /* During restore, updated offset for BO */ + __u32 gpu_id; /* This is the user_gpu_id */ + __u32 alloc_flags; + __u32 dmabuf_fd; + __u32 pad; +}; + +/* CRIU IOCTLs - END */ +/**************************************************************************************************/ /* Register offset inside the remapped mmio page */ enum kfd_mmio_remap { @@ -574,33 +738,19 @@ struct kfd_ioctl_ipc_export_handle_args { __u64 handle; /* to KFD */ __u32 share_handle[4]; /* from KFD */ __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 flags; /* to KFD */ }; struct kfd_ioctl_ipc_import_handle_args { __u64 handle; /* from KFD */ __u64 va_addr; /* to KFD */ - __u64 mmap_offset; /* from KFD */ + __u64 mmap_offset; /* from KFD */ __u32 share_handle[4]; /* to KFD */ __u32 gpu_id; /* to KFD */ - __u32 pad; + __u32 flags; /* from KFD */ }; -struct kfd_memory_range { - __u64 va_addr; - __u64 size; -}; - -/* flags definitions - * BIT0: 0: read operation, 1: write operation. - * This also identifies if the src or dst array belongs to remote process - */ -#define KFD_CROSS_MEMORY_RW_BIT (1 << 0) -#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT) -#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT) -#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) // NOLINT - -struct kfd_ioctl_cross_memory_copy_args { +struct kfd_ioctl_cross_memory_copy_deprecated_args { /* to KFD: Process ID of the remote process */ __u32 pid; /* to KFD: See above definition */ @@ -617,6 +767,874 @@ struct kfd_ioctl_cross_memory_copy_args { __u64 bytes_copied; }; +/* Guarantee host access to memory */ +#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x00000001 +/* Fine grained coherency between all devices with access */ +#define KFD_IOCTL_SVM_FLAG_COHERENT 0x00000002 +/* Use any GPU in same hive as preferred device */ +#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x00000004 +/* GPUs only read, allows replication */ +#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x00000008 +/* Allow execution on GPU */ +#define KFD_IOCTL_SVM_FLAG_GPU_EXEC 0x00000010 +/* GPUs mostly read, may allow similar optimizations as RO, but writes fault */ +#define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY 0x00000020 +/* Keep GPU memory mapping always valid as if XNACK is disable */ +#define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED 0x00000040 +/* Fine grained coherency between all devices using device-scope atomics */ +#define KFD_IOCTL_SVM_FLAG_EXT_COHERENT 0x00000080 + +/** + * kfd_ioctl_svm_op - SVM ioctl operations + * + * @KFD_IOCTL_SVM_OP_SET_ATTR: Modify one or more attributes + * @KFD_IOCTL_SVM_OP_GET_ATTR: Query one or more attributes + */ +enum kfd_ioctl_svm_op { + KFD_IOCTL_SVM_OP_SET_ATTR, + KFD_IOCTL_SVM_OP_GET_ATTR +}; + +/** kfd_ioctl_svm_location - Enum for preferred and prefetch locations + * + * GPU IDs are used to specify GPUs as preferred and prefetch locations. + * Below definitions are used for system memory or for leaving the preferred + * location unspecified. + */ +enum kfd_ioctl_svm_location { + KFD_IOCTL_SVM_LOCATION_SYSMEM = 0, + KFD_IOCTL_SVM_LOCATION_UNDEFINED = 0xffffffff +}; + +/** + * kfd_ioctl_svm_attr_type - SVM attribute types + * + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: gpuid of the preferred location, 0 for + * system memory + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: gpuid of the prefetch location, 0 for + * system memory. Setting this triggers an + * immediate prefetch (migration). + * @KFD_IOCTL_SVM_ATTR_ACCESS: + * @KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + * @KFD_IOCTL_SVM_ATTR_NO_ACCESS: specify memory access for the gpuid given + * by the attribute value + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS: bitmask of flags to set (see + * KFD_IOCTL_SVM_FLAG_...) + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS: bitmask of flags to clear + * @KFD_IOCTL_SVM_ATTR_GRANULARITY: migration granularity + * (log2 num pages) + */ +enum kfd_ioctl_svm_attr_type { + KFD_IOCTL_SVM_ATTR_PREFERRED_LOC, + KFD_IOCTL_SVM_ATTR_PREFETCH_LOC, + KFD_IOCTL_SVM_ATTR_ACCESS, + KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE, + KFD_IOCTL_SVM_ATTR_NO_ACCESS, + KFD_IOCTL_SVM_ATTR_SET_FLAGS, + KFD_IOCTL_SVM_ATTR_CLR_FLAGS, + KFD_IOCTL_SVM_ATTR_GRANULARITY +}; + +/** + * kfd_ioctl_svm_attribute - Attributes as pairs of type and value + * + * The meaning of the @value depends on the attribute type. + * + * @type: attribute type (see enum @kfd_ioctl_svm_attr_type) + * @value: attribute value + */ +struct kfd_ioctl_svm_attribute { + __u32 type; + __u32 value; +}; + +/** + * kfd_ioctl_svm_args - Arguments for SVM ioctl + * + * @op specifies the operation to perform (see enum + * @kfd_ioctl_svm_op). @start_addr and @size are common for all + * operations. + * + * A variable number of attributes can be given in @attrs. + * @nattr specifies the number of attributes. New attributes can be + * added in the future without breaking the ABI. If unknown attributes + * are given, the function returns -EINVAL. + * + * @KFD_IOCTL_SVM_OP_SET_ATTR sets attributes for a virtual address + * range. It may overlap existing virtual address ranges. If it does, + * the existing ranges will be split such that the attribute changes + * only apply to the specified address range. + * + * @KFD_IOCTL_SVM_OP_GET_ATTR returns the intersection of attributes + * over all memory in the given range and returns the result as the + * attribute value. If different pages have different preferred or + * prefetch locations, 0xffffffff will be returned for + * @KFD_IOCTL_SVM_ATTR_PREFERRED_LOC or + * @KFD_IOCTL_SVM_ATTR_PREFETCH_LOC resepctively. For + * @KFD_IOCTL_SVM_ATTR_SET_FLAGS, flags of all pages will be + * aggregated by bitwise AND. That means, a flag will be set in the + * output, if that flag is set for all pages in the range. For + * @KFD_IOCTL_SVM_ATTR_CLR_FLAGS, flags of all pages will be + * aggregated by bitwise NOR. That means, a flag will be set in the + * output, if that flag is clear for all pages in the range. + * The minimum migration granularity throughout the range will be + * returned for @KFD_IOCTL_SVM_ATTR_GRANULARITY. + * + * Querying of accessibility attributes works by initializing the + * attribute type to @KFD_IOCTL_SVM_ATTR_ACCESS and the value to the + * GPUID being queried. Multiple attributes can be given to allow + * querying multiple GPUIDs. The ioctl function overwrites the + * attribute type to indicate the access for the specified GPU. + */ +struct kfd_ioctl_svm_args { + __u64 start_addr; + __u64 size; + __u32 op; + __u32 nattr; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[]; +}; + +/** + * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode + * + * @xnack_enabled: [in/out] Whether to enable XNACK mode for this process + * + * @xnack_enabled indicates whether recoverable page faults should be + * enabled for the current process. 0 means disabled, positive means + * enabled, negative means leave unchanged. If enabled, virtual address + * translations on GFXv9 and later AMD GPUs can return XNACK and retry + * the access until a valid PTE is available. This is used to implement + * device page faults. + * + * On output, @xnack_enabled returns the (new) current mode (0 or + * positive). Therefore, a negative input value can be used to query + * the current mode without changing it. + * + * The XNACK mode fundamentally changes the way SVM managed memory works + * in the driver, with subtle effects on application performance and + * functionality. + * + * Enabling XNACK mode requires shader programs to be compiled + * differently. Furthermore, not all GPUs support changing the mode + * per-process. Therefore changing the mode is only allowed while no + * user mode queues exist in the process. This ensure that no shader + * code is running that may be compiled for the wrong mode. And GPUs + * that cannot change to the requested mode will prevent the XNACK + * mode from occurring. All GPUs used by the process must be in the + * same XNACK mode. + * + * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM. + * Therefore those GPUs are not considered for the XNACK mode switch. + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_set_xnack_mode_args { + __s32 xnack_enabled; +}; + +/* Wave launch override modes */ +enum kfd_dbg_trap_override_mode { + KFD_DBG_TRAP_OVERRIDE_OR = 0, + KFD_DBG_TRAP_OVERRIDE_REPLACE = 1 +}; + +/* Wave launch overrides */ +enum kfd_dbg_trap_mask { + KFD_DBG_TRAP_MASK_FP_INVALID = 1, + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL = 2, + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO = 4, + KFD_DBG_TRAP_MASK_FP_OVERFLOW = 8, + KFD_DBG_TRAP_MASK_FP_UNDERFLOW = 16, + KFD_DBG_TRAP_MASK_FP_INEXACT = 32, + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO = 64, + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH = 128, + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION = 256, + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START = (1 << 30), + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END = (1 << 31) +}; + +/* Wave launch modes */ +enum kfd_dbg_trap_wave_launch_mode { + KFD_DBG_TRAP_WAVE_LAUNCH_MODE_NORMAL = 0, + KFD_DBG_TRAP_WAVE_LAUNCH_MODE_HALT = 1, + KFD_DBG_TRAP_WAVE_LAUNCH_MODE_DEBUG = 3 +}; + +/* Address watch modes */ +enum kfd_dbg_trap_address_watch_mode { + KFD_DBG_TRAP_ADDRESS_WATCH_MODE_READ = 0, + KFD_DBG_TRAP_ADDRESS_WATCH_MODE_NONREAD = 1, + KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ATOMIC = 2, + KFD_DBG_TRAP_ADDRESS_WATCH_MODE_ALL = 3 +}; + +/* Additional wave settings */ +enum kfd_dbg_trap_flags { + KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1, + KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2, +}; + +/* Trap exceptions */ +enum kfd_dbg_trap_exception_code { + EC_NONE = 0, + /* per queue */ + EC_QUEUE_WAVE_ABORT = 1, + EC_QUEUE_WAVE_TRAP = 2, + EC_QUEUE_WAVE_MATH_ERROR = 3, + EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION = 4, + EC_QUEUE_WAVE_MEMORY_VIOLATION = 5, + EC_QUEUE_WAVE_APERTURE_VIOLATION = 6, + EC_QUEUE_PACKET_DISPATCH_DIM_INVALID = 16, + EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID = 17, + EC_QUEUE_PACKET_DISPATCH_CODE_INVALID = 18, + EC_QUEUE_PACKET_RESERVED = 19, + EC_QUEUE_PACKET_UNSUPPORTED = 20, + EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID = 21, + EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID = 22, + EC_QUEUE_PACKET_VENDOR_UNSUPPORTED = 23, + EC_QUEUE_PREEMPTION_ERROR = 30, + EC_QUEUE_NEW = 31, + /* per device */ + EC_DEVICE_QUEUE_DELETE = 32, + EC_DEVICE_MEMORY_VIOLATION = 33, + EC_DEVICE_RAS_ERROR = 34, + EC_DEVICE_FATAL_HALT = 35, + EC_DEVICE_NEW = 36, + /* per process */ + EC_PROCESS_RUNTIME = 48, + EC_PROCESS_DEVICE_REMOVE = 49, + EC_MAX +}; + +/* Mask generated by ecode in kfd_dbg_trap_exception_code */ +#define KFD_EC_MASK(ecode) (1ULL << (ecode - 1)) + +/* Masks for exception code type checks below */ +#define KFD_EC_MASK_QUEUE (KFD_EC_MASK(EC_QUEUE_WAVE_ABORT) | \ + KFD_EC_MASK(EC_QUEUE_WAVE_TRAP) | \ + KFD_EC_MASK(EC_QUEUE_WAVE_MATH_ERROR) | \ + KFD_EC_MASK(EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION) | \ + KFD_EC_MASK(EC_QUEUE_WAVE_MEMORY_VIOLATION) | \ + KFD_EC_MASK(EC_QUEUE_WAVE_APERTURE_VIOLATION) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PREEMPTION_ERROR) | \ + KFD_EC_MASK(EC_QUEUE_NEW)) +#define KFD_EC_MASK_DEVICE (KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE) | \ + KFD_EC_MASK(EC_DEVICE_RAS_ERROR) | \ + KFD_EC_MASK(EC_DEVICE_FATAL_HALT) | \ + KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION) | \ + KFD_EC_MASK(EC_DEVICE_NEW)) +#define KFD_EC_MASK_PROCESS (KFD_EC_MASK(EC_PROCESS_RUNTIME) | \ + KFD_EC_MASK(EC_PROCESS_DEVICE_REMOVE)) +#define KFD_EC_MASK_PACKET (KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_DIM_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_GROUP_SEGMENT_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_CODE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_RESERVED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_UNSUPPORTED) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_WORK_GROUP_SIZE_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_DISPATCH_REGISTER_INVALID) | \ + KFD_EC_MASK(EC_QUEUE_PACKET_VENDOR_UNSUPPORTED)) + +/* Checks for exception code types for KFD search */ +#define KFD_DBG_EC_IS_VALID(ecode) (ecode > EC_NONE && ecode < EC_MAX) +#define KFD_DBG_EC_TYPE_IS_QUEUE(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_QUEUE)) +#define KFD_DBG_EC_TYPE_IS_DEVICE(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_DEVICE)) +#define KFD_DBG_EC_TYPE_IS_PROCESS(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PROCESS)) +#define KFD_DBG_EC_TYPE_IS_PACKET(ecode) \ + (KFD_DBG_EC_IS_VALID(ecode) && !!(KFD_EC_MASK(ecode) & KFD_EC_MASK_PACKET)) + + +/* Runtime enable states */ +enum kfd_dbg_runtime_state { + DEBUG_RUNTIME_STATE_DISABLED = 0, + DEBUG_RUNTIME_STATE_ENABLED = 1, + DEBUG_RUNTIME_STATE_ENABLED_BUSY = 2, + DEBUG_RUNTIME_STATE_ENABLED_ERROR = 3 +}; + +/* Runtime enable status */ +struct kfd_runtime_info { + __u64 r_debug; + __u32 runtime_state; + __u32 ttmp_setup; +}; + +/* Enable modes for runtime enable */ +#define KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK 1 +#define KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK 2 + +/** + * kfd_ioctl_runtime_enable_args - Arguments for runtime enable + * + * Coordinates debug exception signalling and debug device enablement with runtime. + * + * @r_debug - pointer to user struct for sharing information between ROCr and the debuggger + * @mode_mask - mask to set mode + * KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK - enable runtime for debugging, otherwise disable + * KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK - enable trap temporary setup (ignore on disable) + * @capabilities_mask - mask to notify runtime on what KFD supports + * + * Return - 0 on SUCCESS. + * - EBUSY if runtime enable call already pending. + * - EEXIST if user queues already active prior to call. + * If process is debug enabled, runtime enable will enable debug devices and + * wait for debugger process to send runtime exception EC_PROCESS_RUNTIME + * to unblock - see kfd_ioctl_dbg_trap_args. + * + */ +struct kfd_ioctl_runtime_enable_args { + __u64 r_debug; + __u32 mode_mask; + __u32 capabilities_mask; +}; + +/* Queue information */ +struct kfd_queue_snapshot_entry { + __u64 exception_status; + __u64 ring_base_address; + __u64 write_pointer_address; + __u64 read_pointer_address; + __u64 ctx_save_restore_address; + __u32 queue_id; + __u32 gpu_id; + __u32 ring_size; + __u32 queue_type; + __u32 ctx_save_restore_area_size; + __u32 reserved; +}; + +/* Queue status return for suspend/resume */ +#define KFD_DBG_QUEUE_ERROR_BIT 30 +#define KFD_DBG_QUEUE_INVALID_BIT 31 +#define KFD_DBG_QUEUE_ERROR_MASK (1 << KFD_DBG_QUEUE_ERROR_BIT) +#define KFD_DBG_QUEUE_INVALID_MASK (1 << KFD_DBG_QUEUE_INVALID_BIT) + +/* Context save area header information */ +struct kfd_context_save_area_header { + struct { + __u32 control_stack_offset; + __u32 control_stack_size; + __u32 wave_state_offset; + __u32 wave_state_size; + } wave_state; + __u32 debug_offset; + __u32 debug_size; + __u64 err_payload_addr; + __u32 err_event_id; + __u32 reserved1; +}; + +/* + * Debug operations + * + * For specifics on usage and return values, see documentation per operation + * below. Otherwise, generic error returns apply: + * - ESRCH if the process to debug does not exist. + * + * - EINVAL (with KFD_IOC_DBG_TRAP_ENABLE exempt) if operation + * KFD_IOC_DBG_TRAP_ENABLE has not succeeded prior. + * Also returns this error if GPU hardware scheduling is not supported. + * + * - EPERM (with KFD_IOC_DBG_TRAP_DISABLE exempt) if target process is not + * PTRACE_ATTACHED. KFD_IOC_DBG_TRAP_DISABLE is exempt to allow + * clean up of debug mode as long as process is debug enabled. + * + * - EACCES if any DBG_HW_OP (debug hardware operation) is requested when + * AMDKFD_IOC_RUNTIME_ENABLE has not succeeded prior. + * + * - ENODEV if any GPU does not support debugging on a DBG_HW_OP call. + * + * - Other errors may be returned when a DBG_HW_OP occurs while the GPU + * is in a fatal state. + * + */ +enum kfd_dbg_trap_operations { + KFD_IOC_DBG_TRAP_ENABLE = 0, + KFD_IOC_DBG_TRAP_DISABLE = 1, + KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT = 2, + KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED = 3, + KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE = 4, /* DBG_HW_OP */ + KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE = 5, /* DBG_HW_OP */ + KFD_IOC_DBG_TRAP_SUSPEND_QUEUES = 6, /* DBG_HW_OP */ + KFD_IOC_DBG_TRAP_RESUME_QUEUES = 7, /* DBG_HW_OP */ + KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH = 8, /* DBG_HW_OP */ + KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH = 9, /* DBG_HW_OP */ + KFD_IOC_DBG_TRAP_SET_FLAGS = 10, + KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT = 11, + KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO = 12, + KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT = 13, + KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT = 14 +}; + +/** + * kfd_ioctl_dbg_trap_enable_args + * + * Arguments for KFD_IOC_DBG_TRAP_ENABLE. + * + * Enables debug session for target process. Call @op KFD_IOC_DBG_TRAP_DISABLE in + * kfd_ioctl_dbg_trap_args to disable debug session. + * + * @exception_mask (IN) - exceptions to raise to the debugger + * @rinfo_ptr (IN) - pointer to runtime info buffer (see kfd_runtime_info) + * @rinfo_size (IN/OUT) - size of runtime info buffer in bytes + * @dbg_fd (IN) - fd the KFD will nofify the debugger with of raised + * exceptions set in exception_mask. + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * Copies KFD saved kfd_runtime_info to @rinfo_ptr on enable. + * Size of kfd_runtime saved by the KFD returned to @rinfo_size. + * - EBADF if KFD cannot get a reference to dbg_fd. + * - EFAULT if KFD cannot copy runtime info to rinfo_ptr. + * - EINVAL if target process is already debug enabled. + * + */ +struct kfd_ioctl_dbg_trap_enable_args { + __u64 exception_mask; + __u64 rinfo_ptr; + __u32 rinfo_size; + __u32 dbg_fd; +}; + +/** + * kfd_ioctl_dbg_trap_send_runtime_event_args + * + * + * Arguments for KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT. + * Raises exceptions to runtime. + * + * @exception_mask (IN) - exceptions to raise to runtime + * @gpu_id (IN) - target device id + * @queue_id (IN) - target queue id + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * - ENODEV if gpu_id not found. + * If exception_mask contains EC_PROCESS_RUNTIME, unblocks pending + * AMDKFD_IOC_RUNTIME_ENABLE call - see kfd_ioctl_runtime_enable_args. + * All other exceptions are raised to runtime through err_payload_addr. + * See kfd_context_save_area_header. + */ +struct kfd_ioctl_dbg_trap_send_runtime_event_args { + __u64 exception_mask; + __u32 gpu_id; + __u32 queue_id; +}; + +/** + * kfd_ioctl_dbg_trap_set_exceptions_enabled_args + * + * Arguments for KFD_IOC_SET_EXCEPTIONS_ENABLED + * Set new exceptions to be raised to the debugger. + * + * @exception_mask (IN) - new exceptions to raise the debugger + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + */ +struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args { + __u64 exception_mask; +}; + +/** + * kfd_ioctl_dbg_trap_set_wave_launch_override_args + * + * Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE + * Enable HW exceptions to raise trap. + * + * @override_mode (IN) - see kfd_dbg_trap_override_mode + * @enable_mask (IN/OUT) - reference kfd_dbg_trap_mask. + * IN is the override modes requested to be enabled. + * OUT is referenced in Return below. + * @support_request_mask (IN/OUT) - reference kfd_dbg_trap_mask. + * IN is the override modes requested for support check. + * OUT is referenced in Return below. + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * Previous enablement is returned in @enable_mask. + * Actual override support is returned in @support_request_mask. + * - EINVAL if override mode is not supported. + * - EACCES if trap support requested is not actually supported. + * i.e. enable_mask (IN) is not a subset of support_request_mask (OUT). + * Otherwise it is considered a generic error (see kfd_dbg_trap_operations). + */ +struct kfd_ioctl_dbg_trap_set_wave_launch_override_args { + __u32 override_mode; + __u32 enable_mask; + __u32 support_request_mask; + __u32 pad; +}; + +/** + * kfd_ioctl_dbg_trap_set_wave_launch_mode_args + * + * Arguments for KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE + * Set wave launch mode. + * + * @mode (IN) - see kfd_dbg_trap_wave_launch_mode + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + */ +struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args { + __u32 launch_mode; + __u32 pad; +}; + +/** + * kfd_ioctl_dbg_trap_suspend_queues_ags + * + * Arguments for KFD_IOC_DBG_TRAP_SUSPEND_QUEUES + * Suspend queues. + * + * @exception_mask (IN) - raised exceptions to clear + * @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id) + * to suspend + * @num_queues (IN) - number of queues to suspend in @queue_array_ptr + * @grace_period (IN) - wave time allowance before preemption + * per 1K GPU clock cycle unit + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Destruction of a suspended queue is blocked until the queue is + * resumed. This allows the debugger to access queue information and + * the its context save area without running into a race condition on + * queue destruction. + * Automatically copies per queue context save area header information + * into the save area base + * (see kfd_queue_snapshot_entry and kfd_context_save_area_header). + * + * Return - Number of queues suspended on SUCCESS. + * . KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK masked + * for each queue id in @queue_array_ptr array reports unsuccessful + * suspend reason. + * KFD_DBG_QUEUE_ERROR_MASK = HW failure. + * KFD_DBG_QUEUE_INVALID_MASK = queue does not exist, is new or + * is being destroyed. + */ +struct kfd_ioctl_dbg_trap_suspend_queues_args { + __u64 exception_mask; + __u64 queue_array_ptr; + __u32 num_queues; + __u32 grace_period; +}; + +/** + * kfd_ioctl_dbg_trap_resume_queues_args + * + * Arguments for KFD_IOC_DBG_TRAP_RESUME_QUEUES + * Resume queues. + * + * @queue_array_ptr (IN) - pointer to array of queue ids (u32 per queue id) + * to resume + * @num_queues (IN) - number of queues to resume in @queue_array_ptr + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - Number of queues resumed on SUCCESS. + * KFD_DBG_QUEUE_ERROR_MASK and KFD_DBG_QUEUE_INVALID_MASK mask + * for each queue id in @queue_array_ptr array reports unsuccessful + * resume reason. + * KFD_DBG_QUEUE_ERROR_MASK = HW failure. + * KFD_DBG_QUEUE_INVALID_MASK = queue does not exist. + */ +struct kfd_ioctl_dbg_trap_resume_queues_args { + __u64 queue_array_ptr; + __u32 num_queues; + __u32 pad; +}; + +/** + * kfd_ioctl_dbg_trap_set_node_address_watch_args + * + * Arguments for KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH + * Sets address watch for device. + * + * @address (IN) - watch address to set + * @mode (IN) - see kfd_dbg_trap_address_watch_mode + * @mask (IN) - watch address mask + * @gpu_id (IN) - target gpu to set watch point + * @id (OUT) - watch id allocated + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * Allocated watch ID returned to @id. + * - ENODEV if gpu_id not found. + * - ENOMEM if watch IDs can be allocated + */ +struct kfd_ioctl_dbg_trap_set_node_address_watch_args { + __u64 address; + __u32 mode; + __u32 mask; + __u32 gpu_id; + __u32 id; +}; + +/** + * kfd_ioctl_dbg_trap_clear_node_address_watch_args + * + * Arguments for KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH + * Clear address watch for device. + * + * @gpu_id (IN) - target device to clear watch point + * @id (IN) - allocated watch id to clear + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * - ENODEV if gpu_id not found. + * - EINVAL if watch ID has not been allocated. + */ +struct kfd_ioctl_dbg_trap_clear_node_address_watch_args { + __u32 gpu_id; + __u32 id; +}; + +/** + * kfd_ioctl_dbg_trap_set_flags_args + * + * Arguments for KFD_IOC_DBG_TRAP_SET_FLAGS + * Sets flags for wave behaviour. + * + * @flags (IN/OUT) - IN = flags to enable, OUT = flags previously enabled + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * - EACCESS if any debug device does not allow flag options. + */ +struct kfd_ioctl_dbg_trap_set_flags_args { + __u32 flags; + __u32 pad; +}; + +/** + * kfd_ioctl_dbg_trap_query_debug_event_args + * + * Arguments for KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT + * + * Find one or more raised exceptions. This function can return multiple + * exceptions from a single queue or a single device with one call. To find + * all raised exceptions, this function must be called repeatedly until it + * returns -EAGAIN. Returned exceptions can optionally be cleared by + * setting the corresponding bit in the @exception_mask input parameter. + * However, clearing an exception prevents retrieving further information + * about it with KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO. + * + * @exception_mask (IN/OUT) - exception to clear (IN) and raised (OUT) + * @gpu_id (OUT) - gpu id of exceptions raised + * @queue_id (OUT) - queue id of exceptions raised + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on raised exception found + * Raised exceptions found are returned in @exception mask + * with reported source id returned in @gpu_id or @queue_id. + * - EAGAIN if no raised exception has been found + */ +struct kfd_ioctl_dbg_trap_query_debug_event_args { + __u64 exception_mask; + __u32 gpu_id; + __u32 queue_id; +}; + +/** + * kfd_ioctl_dbg_trap_query_exception_info_args + * + * Arguments KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO + * Get additional info on raised exception. + * + * @info_ptr (IN) - pointer to exception info buffer to copy to + * @info_size (IN/OUT) - exception info buffer size (bytes) + * @source_id (IN) - target gpu or queue id + * @exception_code (IN) - target exception + * @clear_exception (IN) - clear raised @exception_code exception + * (0 = false, 1 = true) + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * If @exception_code is EC_DEVICE_MEMORY_VIOLATION, copy @info_size(OUT) + * bytes of memory exception data to @info_ptr. + * If @exception_code is EC_PROCESS_RUNTIME, copy saved + * kfd_runtime_info to @info_ptr. + * Actual required @info_ptr size (bytes) is returned in @info_size. + */ +struct kfd_ioctl_dbg_trap_query_exception_info_args { + __u64 info_ptr; + __u32 info_size; + __u32 source_id; + __u32 exception_code; + __u32 clear_exception; +}; + +/** + * kfd_ioctl_dbg_trap_get_queue_snapshot_args + * + * Arguments KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT + * Get queue information. + * + * @exception_mask (IN) - exceptions raised to clear + * @snapshot_buf_ptr (IN) - queue snapshot entry buffer (see kfd_queue_snapshot_entry) + * @num_queues (IN/OUT) - number of queue snapshot entries + * The debugger specifies the size of the array allocated in @num_queues. + * KFD returns the number of queues that actually existed. If this is + * larger than the size specified by the debugger, KFD will not overflow + * the array allocated by the debugger. + * + * @entry_size (IN/OUT) - size per entry in bytes + * The debugger specifies sizeof(struct kfd_queue_snapshot_entry) in + * @entry_size. KFD returns the number of bytes actually populated per + * entry. The debugger should use the KFD_IOCTL_MINOR_VERSION to determine, + * which fields in struct kfd_queue_snapshot_entry are valid. This allows + * growing the ABI in a backwards compatible manner. + * Note that entry_size(IN) should still be used to stride the snapshot buffer in the + * event that it's larger than actual kfd_queue_snapshot_entry. + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * Copies @num_queues(IN) queue snapshot entries of size @entry_size(IN) + * into @snapshot_buf_ptr if @num_queues(IN) > 0. + * Otherwise return @num_queues(OUT) queue snapshot entries that exist. + */ +struct kfd_ioctl_dbg_trap_queue_snapshot_args { + __u64 exception_mask; + __u64 snapshot_buf_ptr; + __u32 num_queues; + __u32 entry_size; +}; + +/** + * kfd_ioctl_dbg_trap_get_device_snapshot_args + * + * Arguments for KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT + * Get device information. + * + * @exception_mask (IN) - exceptions raised to clear + * @snapshot_buf_ptr (IN) - pointer to snapshot buffer (see kfd_dbg_device_info_entry) + * @num_devices (IN/OUT) - number of debug devices to snapshot + * The debugger specifies the size of the array allocated in @num_devices. + * KFD returns the number of devices that actually existed. If this is + * larger than the size specified by the debugger, KFD will not overflow + * the array allocated by the debugger. + * + * @entry_size (IN/OUT) - size per entry in bytes + * The debugger specifies sizeof(struct kfd_dbg_device_info_entry) in + * @entry_size. KFD returns the number of bytes actually populated. The + * debugger should use KFD_IOCTL_MINOR_VERSION to determine, which fields + * in struct kfd_dbg_device_info_entry are valid. This allows growing the + * ABI in a backwards compatible manner. + * Note that entry_size(IN) should still be used to stride the snapshot buffer in the + * event that it's larger than actual kfd_dbg_device_info_entry. + * + * Generic errors apply (see kfd_dbg_trap_operations). + * Return - 0 on SUCCESS. + * Copies @num_devices(IN) device snapshot entries of size @entry_size(IN) + * into @snapshot_buf_ptr if @num_devices(IN) > 0. + * Otherwise return @num_devices(OUT) queue snapshot entries that exist. + */ +struct kfd_ioctl_dbg_trap_device_snapshot_args { + __u64 exception_mask; + __u64 snapshot_buf_ptr; + __u32 num_devices; + __u32 entry_size; +}; + +/** + * kfd_ioctl_dbg_trap_args + * + * Arguments to debug target process. + * + * @pid - target process to debug + * @op - debug operation (see kfd_dbg_trap_operations) + * + * @op determines which union struct args to use. + * Refer to kern docs for each kfd_ioctl_dbg_trap_*_args struct. + */ +struct kfd_ioctl_dbg_trap_args { + __u32 pid; + __u32 op; + + union { + struct kfd_ioctl_dbg_trap_enable_args enable; + struct kfd_ioctl_dbg_trap_send_runtime_event_args send_runtime_event; + struct kfd_ioctl_dbg_trap_set_exceptions_enabled_args set_exceptions_enabled; + struct kfd_ioctl_dbg_trap_set_wave_launch_override_args launch_override; + struct kfd_ioctl_dbg_trap_set_wave_launch_mode_args launch_mode; + struct kfd_ioctl_dbg_trap_suspend_queues_args suspend_queues; + struct kfd_ioctl_dbg_trap_resume_queues_args resume_queues; + struct kfd_ioctl_dbg_trap_set_node_address_watch_args set_node_address_watch; + struct kfd_ioctl_dbg_trap_clear_node_address_watch_args clear_node_address_watch; + struct kfd_ioctl_dbg_trap_set_flags_args set_flags; + struct kfd_ioctl_dbg_trap_query_debug_event_args query_debug_event; + struct kfd_ioctl_dbg_trap_query_exception_info_args query_exception_info; + struct kfd_ioctl_dbg_trap_queue_snapshot_args queue_snapshot; + struct kfd_ioctl_dbg_trap_device_snapshot_args device_snapshot; + }; +}; + +/** + * kfd_ioctl_pc_sample_op - PC Sampling ioctl operations + * + * @KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES: Query device PC Sampling capabilities + * @KFD_IOCTL_PCS_OP_CREATE: Register this process with a per-device PC sampler instance + * @KFD_IOCTL_PCS_OP_DESTROY: Unregister from a previously registered PC sampler instance + * @KFD_IOCTL_PCS_OP_START: Process begins taking samples from a previously registered PC sampler instance + * @KFD_IOCTL_PCS_OP_STOP: Process stops taking samples from a previously registered PC sampler instance + */ +enum kfd_ioctl_pc_sample_op { + KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES, + KFD_IOCTL_PCS_OP_CREATE, + KFD_IOCTL_PCS_OP_DESTROY, + KFD_IOCTL_PCS_OP_START, + KFD_IOCTL_PCS_OP_STOP, +}; + +/* Values have to be a power of 2*/ +#define KFD_IOCTL_PCS_FLAG_POWER_OF_2 0x00000001 + +enum kfd_ioctl_pc_sample_method { + KFD_IOCTL_PCS_METHOD_HOSTTRAP = 1, + KFD_IOCTL_PCS_METHOD_STOCHASTIC, +}; + +enum kfd_ioctl_pc_sample_type { + KFD_IOCTL_PCS_TYPE_TIME_US, + KFD_IOCTL_PCS_TYPE_CLOCK_CYCLES, + KFD_IOCTL_PCS_TYPE_INSTRUCTIONS +}; + +struct kfd_pc_sample_info { + __u64 interval; /* [IN] if PCS_TYPE_INTERVAL_US: sample interval in us + * if PCS_TYPE_CLOCK_CYCLES: sample interval in graphics core clk cycles + * if PCS_TYPE_INSTRUCTIONS: sample interval in instructions issued by + * graphics compute units + */ + __u64 interval_min; /* [OUT] */ + __u64 interval_max; /* [OUT] */ + __u64 flags; /* [OUT] indicate potential restrictions e.g FLAG_POWER_OF_2 */ + __u32 method; /* [IN/OUT] kfd_ioctl_pc_sample_method */ + __u32 type; /* [IN/OUT] kfd_ioctl_pc_sample_type */ +}; + +#define KFD_IOCTL_PCS_QUERY_TYPE_FULL (1 << 0) /* If not set, return current */ + +struct kfd_ioctl_pc_sample_args { + __u64 sample_info_ptr; /* array of kfd_pc_sample_info */ + __u32 num_sample_info; + __u32 op; /* kfd_ioctl_pc_sample_op */ + __u32 gpu_id; + __u32 trace_id; + __u32 flags; /* kfd_ioctl_pcs_query flags */ + __u32 version; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -659,45 +1677,44 @@ struct kfd_ioctl_cross_memory_copy_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) -#define AMDKFD_IOC_DBG_REGISTER \ +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_DBG_UNREGISTER \ +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_DBG_WAVE_CONTROL \ +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \ AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args) -#define AMDKFD_IOC_GET_TILE_CONFIG \ +#define AMDKFD_IOC_GET_TILE_CONFIG \ AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) #define AMDKFD_IOC_SET_TRAP_HANDLER \ AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args) -#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \ - AMDKFD_IOWR(0x14, \ - struct kfd_ioctl_get_process_apertures_new_args) +#define AMDKFD_IOC_DBG_REGISTER_DEPRECATED \ + AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) -#define AMDKFD_IOC_ACQUIRE_VM \ - AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args) +#define AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED \ + AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) -#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \ - AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args) +#define AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED \ + AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) -#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \ - AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args) +#define AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED \ + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) #define AMDKFD_IOC_MAP_MEMORY_TO_GPU \ AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args) -#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \ - AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args) +#define AMDKFD_IOC_GET_TILE_CONFIG \ + AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args) #define AMDKFD_IOC_SET_CU_MASK \ AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) @@ -717,28 +1734,69 @@ struct kfd_ioctl_cross_memory_copy_args { #define AMDKFD_IOC_SMI_EVENTS \ AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args) +#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) + +#define AMDKFD_IOC_SET_XNACK_MODE \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) + +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + +#define AMDKFD_IOC_AVAILABLE_MEMORY \ + AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args) + +#define AMDKFD_IOC_EXPORT_DMABUF \ + AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args) + +#define AMDKFD_IOC_RUNTIME_ENABLE \ + AMDKFD_IOWR(0x25, struct kfd_ioctl_runtime_enable_args) + +#define AMDKFD_IOC_DBG_TRAP \ + AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args) + +#define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) + +#define AMDKFD_IOC_SET_XNACK_MODE \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) + +#define AMDKFD_IOC_CRIU_OP \ + AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) + +#define AMDKFD_IOC_AVAILABLE_MEMORY \ + AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args) + +#define AMDKFD_IOC_EXPORT_DMABUF \ + AMDKFD_IOWR(0x24, struct kfd_ioctl_export_dmabuf_args) + +#define AMDKFD_IOC_RUNTIME_ENABLE \ + AMDKFD_IOWR(0x25, struct kfd_ioctl_runtime_enable_args) + +#define AMDKFD_IOC_DBG_TRAP \ + AMDKFD_IOWR(0x26, struct kfd_ioctl_dbg_trap_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x20 +#define AMDKFD_COMMAND_END 0x27 /* non-upstream ioctls */ #define AMDKFD_IOC_IPC_IMPORT_HANDLE \ - AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args) + AMDKFD_IOWR(0x80, struct kfd_ioctl_ipc_import_handle_args) #define AMDKFD_IOC_IPC_EXPORT_HANDLE \ - AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args) + AMDKFD_IOWR(0x81, struct kfd_ioctl_ipc_export_handle_args) -#define AMDKFD_IOC_DBG_TRAP \ - AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args) +#define AMDKFD_IOC_DBG_TRAP_DEPRECATED \ + AMDKFD_IOWR(0x82, struct kfd_ioctl_dbg_trap_args_deprecated) -#define AMDKFD_IOC_CROSS_MEMORY_COPY \ - AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args) +#define AMDKFD_IOC_CROSS_MEMORY_COPY_DEPRECATED \ + AMDKFD_IOWR(0x83, struct kfd_ioctl_cross_memory_copy_deprecated_args) +#define AMDKFD_IOC_RLC_SPM \ + AMDKFD_IOWR(0x84, struct kfd_ioctl_spm_args) -#define AMDKFD_IOC_AVAILABLE_MEMORY \ - AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args) +#define AMDKFD_IOC_PC_SAMPLE \ + AMDKFD_IOWR(0x85, struct kfd_ioctl_pc_sample_args) -#define AMDKFD_COMMAND_START 0x01 -#undef AMDKFD_COMMAND_END -#define AMDKFD_COMMAND_END 0x22 +#define AMDKFD_COMMAND_START_2 0x80 +#define AMDKFD_COMMAND_END_2 0x86 -#endif // INCLUDE_ROCM_SMI_KFD_IOCTL_H_ +#endif diff --git a/rocm_smi/include/rocm_smi/rocm_smi.h b/rocm_smi/include/rocm_smi/rocm_smi.h index 40b2180b51..f4c58b5bcc 100644 --- a/rocm_smi/include/rocm_smi/rocm_smi.h +++ b/rocm_smi/include/rocm_smi/rocm_smi.h @@ -344,9 +344,15 @@ typedef enum { RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE, RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET, RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET, - RSMI_EVT_NOTIF_RING_HANG = KFD_SMI_EVENT_RING_HANG, - - RSMI_EVT_NOTIF_LAST = RSMI_EVT_NOTIF_RING_HANG + RSMI_EVT_NOTIF_EVENT_MIGRATE_START = KFD_SMI_EVENT_MIGRATE_START, + RSMI_EVT_NOTIF_EVENT_MIGRATE_END = KFD_SMI_EVENT_MIGRATE_END, + RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_START = KFD_SMI_EVENT_PAGE_FAULT_START, + RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_END = KFD_SMI_EVENT_PAGE_FAULT_END, + RSMI_EVT_NOTIF_EVENT_QUEUE_EVICTION = KFD_SMI_EVENT_QUEUE_EVICTION, + RSMI_EVT_NOTIF_EVENT_QUEUE_RESTORE = KFD_SMI_EVENT_QUEUE_RESTORE, + RSMI_EVT_NOTIF_EVENT_UNMAP_FROM_GPU = KFD_SMI_EVENT_UNMAP_FROM_GPU, + RSMI_EVT_NOTIF_EVENT_ALL_PROCESS = KFD_SMI_EVENT_ALL_PROCESS, + RSMI_EVT_NOTIF_LAST = KFD_SMI_EVENT_ALL_PROCESS } rsmi_evt_notification_type_t; /** @@ -355,7 +361,8 @@ typedef enum { #define RSMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) //! Maximum number of characters an event notification message will be -#define MAX_EVENT_NOTIFICATION_MSG_SIZE 64 +// matches kfd message max size +#define MAX_EVENT_NOTIFICATION_MSG_SIZE 96 /** * Event notification data returned from event notification API @@ -1264,7 +1271,7 @@ typedef struct { /** * Accumulated throttler residencies * - * Socket (thermal) - + * Socket (thermal) - * Socket thermal violation % (greater than 0% is a violation); * aka TVIOL * diff --git a/rocm_smi/src/rocm_smi.cc b/rocm_smi/src/rocm_smi.cc index 5f36e8d256..6c0a6767f5 100644 --- a/rocm_smi/src/rocm_smi.cc +++ b/rocm_smi/src/rocm_smi.cc @@ -6618,16 +6618,230 @@ rsmi_event_notification_get(int timeout_ms, reinterpret_cast(&data[*num_elem]); uint32_t event; - while (fscanf(anon_fp, "%x %63s\n", &event, - reinterpret_cast(&data_item->message)) == 2) { - /* Output is in format as "event information\n" + char event_in[MAX_EVENT_NOTIFICATION_MSG_SIZE]; + memcpy(reinterpret_cast(event_in), "\0", MAX_EVENT_NOTIFICATION_MSG_SIZE); + while (fgets(event_in, MAX_EVENT_NOTIFICATION_MSG_SIZE, anon_fp)) { + /* Output is in format as "event_number message_information\n" * Both event are expressed in hex. * information is a string */ + char message[MAX_EVENT_NOTIFICATION_MSG_SIZE]; + // parse the line here for event_number and rest of message_information + sscanf(event_in, "%x %[^\n]\n", &event, message); + + // parse message based on event received + switch (event){ + case RSMI_EVT_NOTIF_NONE: + strcpy(reinterpret_cast(&data_item->message), "Event type None received"); + break; + case RSMI_EVT_NOTIF_VMFAULT: + { + uint32_t pid; + char task_name[MAX_EVENT_NOTIFICATION_MSG_SIZE]; + memcpy(reinterpret_cast(task_name), "\0", MAX_EVENT_NOTIFICATION_MSG_SIZE); + + sscanf(message, "%x:%s\n", &pid, task_name); + std::stringstream final_message; + final_message << "PID: " << std::to_string(pid).c_str() + << " task name: " << task_name; + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_THERMAL_THROTTLE: + { + uint64_t bitmask; + uint64_t counter; + + sscanf(message, "%llx:%llx\n", &bitmask, &counter); + std::stringstream final_message; + final_message << "bitmask: 0x" << std::hex << bitmask + << " counter: 0x" << std::hex << counter; + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_GPU_PRE_RESET: + { + uint32_t reset_seq_num; + char reset_cause[MAX_EVENT_NOTIFICATION_MSG_SIZE]; + memcpy(reinterpret_cast(reset_cause), "\0", MAX_EVENT_NOTIFICATION_MSG_SIZE); + + sscanf(message, "%x %[^\n]\n", &reset_seq_num, reset_cause); + std::stringstream final_message; + final_message << "reset sequence number: " << std::to_string(reset_seq_num).c_str() + << " reset cause: " << reset_cause; + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_GPU_POST_RESET: + { + uint32_t reset_seq_num; + + sscanf(message, "%x %[^\n]\n", &reset_seq_num); + std::stringstream final_message; + final_message << "reset sequence number: " << std::to_string(reset_seq_num).c_str(); + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_MIGRATE_START: + { + int64_t ns; + int32_t pid; + uint32_t start; + uint32_t size; + uint16_t from; + uint16_t to; + uint16_t prefetch_loc; + uint16_t preferred_loc; + int32_t migrate_trigger; + + sscanf(message, "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", &ns, &pid, &start, &size, &from, &to, &prefetch_loc, &preferred_loc, &migrate_trigger); + std::stringstream final_message; + final_message << "nd: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " start: 0x" << std::hex << start + << " size: 0x" << std::hex << size + << " from: 0x" << std::hex << from + << " to: 0x" << std::hex << to + << " prefetch_loc: 0x" << std::hex << prefetch_loc + << " preferred_loc: 0x" << std::hex << preferred_loc + << " migrate_trigger: " << std::to_string(migrate_trigger).c_str(); + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_MIGRATE_END: + { + int64_t ns; + int32_t pid; + uint32_t start; + uint32_t size; + uint32_t from; + uint32_t to; + uint32_t migrate_trigger; + uint32_t error_code; + + sscanf(message, "%lld -%d @%lx(%lx) %x->%x %d %d\n", &ns, &pid, &start, &size, &from, &to, &migrate_trigger, &error_code); + std::stringstream final_message; + final_message << "nd: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " start: 0x" << std::hex << start + << " size: 0x" << std::hex << size + << " from: 0x" << std::hex << from + << " to: 0x" << std::hex << to + << " migrate_trigger: " << std::to_string(migrate_trigger).c_str() + << " error_code: " << std::to_string(error_code).c_str(); + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_START: + { + int64_t ns; + int32_t pid; + uint32_t addr; + uint32_t node; + char *rw; + + sscanf(message, "%lld -%d @%lx(%x) %c\n", &ns, &pid, &addr, &node, rw); + std::stringstream final_message; + final_message << "ns: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " addr: 0x" << std::hex << addr + << " node: 0x" << std::hex << node + << " rw: " << rw; + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_END: + { + int64_t ns; + int32_t pid; + uint32_t addr; + uint32_t node; + char *migrate_update; + + sscanf(message, "%lld -%d @%lx(%x) %c\n", &ns, &pid, &addr, &node, migrate_update); + std::stringstream final_message; + final_message << "ns: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " addr: 0x" << std::hex << addr + << " node: 0x" << std::hex << node + << " migrate_udpate: " << migrate_update; + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_QUEUE_EVICTION: + { + int64_t ns; + int32_t pid; + uint32_t node; + uint32_t evict_trigger; + + sscanf(message, "%lld -%d %x %d\n", &ns, &pid, &node, &evict_trigger); + std::stringstream final_message; + final_message << "ns: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " node: 0x" << std::hex << node + << " evict_trigger: " << std::to_string(evict_trigger).c_str(); + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_QUEUE_RESTORE: + { + int64_t ns; + int32_t pid; + uint32_t node; + char *rescheduled; + + sscanf(message, "%lld -%d %x %c\n", &ns, &pid, &node, rescheduled); + std::stringstream final_message; + final_message << "ns: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " node: 0x" << std::hex << node + << " rescheduled: " << rescheduled; + + strcpy(reinterpret_cast(&data_item->message), final_message.str().c_str()); + } + break; + case RSMI_EVT_NOTIF_EVENT_UNMAP_FROM_GPU: + { + int64_t ns; + int32_t pid; + uint32_t addr; + uint32_t size; + uint32_t node; + uint32_t unmap_trigger; + + sscanf(message, "%lld -%d @%lx(%lx) %x %d\n", &ns, &pid, &addr, &size, &node, &unmap_trigger); + std::stringstream final_message; + final_message << "ns: " << std::to_string(ns).c_str() + << " pid: " << std::to_string(pid).c_str() + << " addr: 0x" <(&data_item->message), final_message.str().c_str()); + } + break; + default: + strcpy(reinterpret_cast(&data_item->message), "Unknown event received"); + break; + } data_item->event = (rsmi_evt_notification_type_t)event; data_item->dv_ind = fd_indx_to_dev_id[i]; ++(*num_elem); + // zero out event_in after each use + memcpy(reinterpret_cast(event_in), "\0", MAX_EVENT_NOTIFICATION_MSG_SIZE); + if (*num_elem >= buffer_size) { break; }