Add ref counting for rsmi init and shutdown

Also, clean lint from kfd_ioctl.h file.

Change-Id: I5a2ae127ab6ab6676a1b075ed10858d0ebfe13c1


[ROCm/rocm_smi_lib commit: 8e03d10035]
Bu işleme şunda yer alıyor:
Chris Freehill
2020-05-11 15:24:47 -05:00
ebeveyn ab2a22c90c
işleme 0ab5e76b33
9 değiştirilmiş dosya ile 834 ekleme ve 375 silme
+351 -354
Dosyayı Görüntüle
@@ -28,82 +28,82 @@
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 2
#define KFD_IOCTL_DBG_MAJOR_VERSION 1
#define KFD_IOCTL_DBG_MINOR_VERSION 0
#define KFD_IOCTL_DBG_MAJOR_VERSION 1
#define KFD_IOCTL_DBG_MINOR_VERSION 0
struct kfd_ioctl_get_version_args {
__u32 major_version; /* from KFD */
__u32 minor_version; /* from KFD */
__u32 major_version; /* from KFD */
__u32 minor_version; /* from KFD */
};
/* For kfd_ioctl_create_queue_args.queue_type. */
#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
#define KFD_IOC_QUEUE_TYPE_SDMA 0x1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
#define KFD_IOC_QUEUE_TYPE_SDMA 0x1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
#define KFD_MAX_QUEUE_PERCENTAGE 100
#define KFD_MAX_QUEUE_PRIORITY 15
#define KFD_MAX_QUEUE_PERCENTAGE 100
#define KFD_MAX_QUEUE_PRIORITY 15
struct kfd_ioctl_create_queue_args {
__u64 ring_base_address; /* to KFD */
__u64 write_pointer_address; /* from KFD */
__u64 read_pointer_address; /* from KFD */
__u64 doorbell_offset; /* from KFD */
__u64 ring_base_address; /* to KFD */
__u64 write_pointer_address; /* from KFD */
__u64 read_pointer_address; /* from KFD */
__u64 doorbell_offset; /* from KFD */
__u32 ring_size; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 queue_type; /* to KFD */
__u32 queue_percentage; /* to KFD */
__u32 queue_priority; /* to KFD */
__u32 queue_id; /* from KFD */
__u32 ring_size; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 queue_type; /* to KFD */
__u32 queue_percentage; /* to KFD */
__u32 queue_priority; /* to KFD */
__u32 queue_id; /* from KFD */
__u64 eop_buffer_address; /* to KFD */
__u64 eop_buffer_size; /* to KFD */
__u64 ctx_save_restore_address; /* to KFD */
__u32 ctx_save_restore_size; /* to KFD */
__u32 ctl_stack_size; /* to KFD */
__u64 eop_buffer_address; /* to KFD */
__u64 eop_buffer_size; /* to KFD */
__u64 ctx_save_restore_address; /* to KFD */
__u32 ctx_save_restore_size; /* to KFD */
__u32 ctl_stack_size; /* to KFD */
};
struct kfd_ioctl_destroy_queue_args {
__u32 queue_id; /* to KFD */
__u32 pad;
__u32 queue_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_update_queue_args {
__u64 ring_base_address; /* to KFD */
__u64 ring_base_address; /* to KFD */
__u32 queue_id; /* to KFD */
__u32 ring_size; /* to KFD */
__u32 queue_percentage; /* to KFD */
__u32 queue_priority; /* to KFD */
__u32 queue_id; /* to KFD */
__u32 ring_size; /* to KFD */
__u32 queue_percentage; /* to KFD */
__u32 queue_priority; /* to KFD */
};
struct kfd_ioctl_set_cu_mask_args {
__u32 queue_id; /* to KFD */
__u32 num_cu_mask; /* to KFD */
__u64 cu_mask_ptr; /* to KFD */
__u32 queue_id; /* to KFD */
__u32 num_cu_mask; /* to KFD */
__u64 cu_mask_ptr; /* to KFD */
};
struct kfd_ioctl_get_queue_wave_state_args {
__u64 ctl_stack_address; /* to KFD */
__u32 ctl_stack_used_size; /* from KFD */
__u32 save_area_used_size; /* from KFD */
__u32 queue_id; /* to KFD */
__u32 pad;
__u64 ctl_stack_address; /* to KFD */
__u32 ctl_stack_used_size; /* from KFD */
__u32 save_area_used_size; /* from KFD */
__u32 queue_id; /* to KFD */
__u32 pad;
};
struct kfd_queue_snapshot_entry {
__u64 ring_base_address;
__u64 write_pointer_address;
__u64 read_pointer_address;
__u64 ctx_save_restore_address;
__u32 queue_id;
__u32 gpu_id;
__u32 ring_size;
__u32 queue_type;
__u32 queue_status;
__u32 reserved[19];
__u64 ring_base_address;
__u64 write_pointer_address;
__u64 read_pointer_address;
__u64 ctx_save_restore_address;
__u32 queue_id;
__u32 gpu_id;
__u32 ring_size;
__u32 queue_type;
__u32 queue_status;
__u32 reserved[19];
};
/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -111,13 +111,13 @@ struct kfd_queue_snapshot_entry {
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
struct kfd_ioctl_set_memory_policy_args {
__u64 alternate_aperture_base; /* to KFD */
__u64 alternate_aperture_size; /* to KFD */
__u64 alternate_aperture_base; /* to KFD */
__u64 alternate_aperture_size; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 default_policy; /* to KFD */
__u32 alternate_policy; /* to KFD */
__u32 pad;
__u32 gpu_id; /* to KFD */
__u32 default_policy; /* to KFD */
__u32 alternate_policy; /* to KFD */
__u32 pad;
};
/*
@@ -128,24 +128,24 @@ struct kfd_ioctl_set_memory_policy_args {
*/
struct kfd_ioctl_get_clock_counters_args {
__u64 gpu_clock_counter; /* from KFD */
__u64 cpu_clock_counter; /* from KFD */
__u64 system_clock_counter; /* from KFD */
__u64 system_clock_freq; /* from KFD */
__u64 gpu_clock_counter; /* from KFD */
__u64 cpu_clock_counter; /* from KFD */
__u64 system_clock_counter; /* from KFD */
__u64 system_clock_freq; /* from KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_process_device_apertures {
__u64 lds_base; /* from KFD */
__u64 lds_limit; /* from KFD */
__u64 scratch_base; /* from KFD */
__u64 scratch_limit; /* from KFD */
__u64 gpuvm_base; /* from KFD */
__u64 gpuvm_limit; /* from KFD */
__u32 gpu_id; /* from KFD */
__u32 pad;
__u64 lds_base; /* from KFD */
__u64 lds_limit; /* from KFD */
__u64 scratch_base; /* from KFD */
__u64 scratch_limit; /* from KFD */
__u64 gpuvm_base; /* from KFD */
__u64 gpuvm_limit; /* from KFD */
__u32 gpu_id; /* from KFD */
__u32 pad;
};
/*
@@ -155,25 +155,25 @@ struct kfd_process_device_apertures {
*/
#define NUM_OF_SUPPORTED_GPUS 7
struct kfd_ioctl_get_process_apertures_args {
struct kfd_process_device_apertures
process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
struct kfd_process_device_apertures
process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
__u32 num_of_nodes;
__u32 pad;
/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
__u32 num_of_nodes;
__u32 pad;
};
struct kfd_ioctl_get_process_apertures_new_args {
/* User allocated. Pointer to struct kfd_process_device_apertures
* filled in by Kernel
*/
__u64 kfd_process_device_apertures_ptr;
/* to KFD - indicates amount of memory present in
* kfd_process_device_apertures_ptr
* from KFD - Number of entries filled by KFD.
*/
__u32 num_of_nodes;
__u32 pad;
/* User allocated. Pointer to struct kfd_process_device_apertures
* filled in by Kernel
*/
__u64 kfd_process_device_apertures_ptr;
/* to KFD - indicates amount of memory present in
* kfd_process_device_apertures_ptr
* from KFD - Number of entries filled by KFD.
*/
__u32 num_of_nodes;
__u32 pad;
};
#define MAX_ALLOWED_NUM_POINTS 100
@@ -181,35 +181,35 @@ struct kfd_ioctl_get_process_apertures_new_args {
#define MAX_ALLOWED_WAC_BUFF_SIZE 128
struct kfd_ioctl_dbg_register_args {
__u32 gpu_id; /* to KFD */
__u32 pad;
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_dbg_unregister_args {
__u32 gpu_id; /* to KFD */
__u32 pad;
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_dbg_address_watch_args {
__u64 content_ptr; /* a pointer to the actual content */
__u32 gpu_id; /* to KFD */
__u32 buf_size_in_bytes; /*including gpu_id and buf_size */
__u64 content_ptr; /* a pointer to the actual content */
__u32 gpu_id; /* to KFD */
__u32 buf_size_in_bytes; /*including gpu_id and buf_size */
};
struct kfd_ioctl_dbg_wave_control_args {
__u64 content_ptr; /* a pointer to the actual content */
__u32 gpu_id; /* to KFD */
__u32 buf_size_in_bytes; /*including gpu_id and buf_size */
__u64 content_ptr; /* a pointer to the actual content */
__u32 gpu_id; /* to KFD */
__u32 buf_size_in_bytes; /*including gpu_id and buf_size */
};
/* mapping event types to API spec */
#define KFD_DBG_EV_STATUS_TRAP 1
#define KFD_DBG_EV_STATUS_VMFAULT 2
#define KFD_DBG_EV_STATUS_SUSPENDED 4
#define KFD_DBG_EV_STATUS_NEW_QUEUE 8
#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1
#define KFD_DBG_EV_STATUS_TRAP 1
#define KFD_DBG_EV_STATUS_VMFAULT 2
#define KFD_DBG_EV_STATUS_SUSPENDED 4
#define KFD_DBG_EV_STATUS_NEW_QUEUE 8
#define KFD_DBG_EV_FLAG_CLEAR_STATUS 1
#define KFD_INVALID_QUEUEID 0xffffffff
#define KFD_INVALID_QUEUEID 0xffffffff
/* KFD_IOC_DBG_TRAP_ENABLE:
* ptr: unused
@@ -273,179 +273,177 @@ struct kfd_ioctl_dbg_wave_control_args {
* data2: minor version (OUT)
* data3: unused
*/
#define KFD_IOC_DBG_TRAP_GET_VERSION 7
#define KFD_IOC_DBG_TRAP_GET_VERSION 7
struct kfd_ioctl_dbg_trap_args {
__u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */
__u32 pid; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 op; /* to KFD */
__u32 data1; /* to KFD */
__u32 data2; /* to KFD */
__u32 data3; /* to KFD */
__u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */
__u32 pid; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 op; /* to KFD */
__u32 data1; /* to KFD */
__u32 data2; /* to KFD */
__u32 data3; /* to KFD */
};
/* Matching HSA_EVENTTYPE */
#define KFD_IOC_EVENT_SIGNAL 0
#define KFD_IOC_EVENT_NODECHANGE 1
#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
#define KFD_IOC_EVENT_HW_EXCEPTION 3
#define KFD_IOC_EVENT_SYSTEM_EVENT 4
#define KFD_IOC_EVENT_DEBUG_EVENT 5
#define KFD_IOC_EVENT_PROFILE_EVENT 6
#define KFD_IOC_EVENT_QUEUE_EVENT 7
#define KFD_IOC_EVENT_MEMORY 8
#define KFD_IOC_EVENT_SIGNAL 0
#define KFD_IOC_EVENT_NODECHANGE 1
#define KFD_IOC_EVENT_DEVICESTATECHANGE 2
#define KFD_IOC_EVENT_HW_EXCEPTION 3
#define KFD_IOC_EVENT_SYSTEM_EVENT 4
#define KFD_IOC_EVENT_DEBUG_EVENT 5
#define KFD_IOC_EVENT_PROFILE_EVENT 6
#define KFD_IOC_EVENT_QUEUE_EVENT 7
#define KFD_IOC_EVENT_MEMORY 8
#define KFD_IOC_WAIT_RESULT_COMPLETE 0
#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
#define KFD_IOC_WAIT_RESULT_FAIL 2
#define KFD_IOC_WAIT_RESULT_COMPLETE 0
#define KFD_IOC_WAIT_RESULT_TIMEOUT 1
#define KFD_IOC_WAIT_RESULT_FAIL 2
#define KFD_SIGNAL_EVENT_LIMIT 4096
#define KFD_SIGNAL_EVENT_LIMIT 4096
/* For kfd_event_data.hw_exception_data.reset_type. */
#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0
#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1
#define KFD_HW_EXCEPTION_WHOLE_GPU_RESET 0
#define KFD_HW_EXCEPTION_PER_ENGINE_RESET 1
/* For kfd_event_data.hw_exception_data.reset_cause. */
#define KFD_HW_EXCEPTION_GPU_HANG 0
#define KFD_HW_EXCEPTION_ECC 1
#define KFD_HW_EXCEPTION_GPU_HANG 0
#define KFD_HW_EXCEPTION_ECC 1
/* For kfd_hsa_memory_exception_data.ErrorType */
#define KFD_MEM_ERR_NO_RAS 0
#define KFD_MEM_ERR_SRAM_ECC 1
#define KFD_MEM_ERR_POISON_CONSUMED 2
#define KFD_MEM_ERR_GPU_HANG 3
#define KFD_MEM_ERR_NO_RAS 0
#define KFD_MEM_ERR_SRAM_ECC 1
#define KFD_MEM_ERR_POISON_CONSUMED 2
#define KFD_MEM_ERR_GPU_HANG 3
struct kfd_ioctl_create_event_args {
__u64 event_page_offset; /* from KFD */
__u32 event_trigger_data; /* from KFD - signal events only */
__u32 event_type; /* to KFD */
__u32 auto_reset; /* to KFD */
__u32 node_id; /* to KFD - only valid for certain
event types */
__u32 event_id; /* from KFD */
__u32 event_slot_index; /* from KFD */
__u64 event_page_offset; /* from KFD */
__u32 event_trigger_data; /* from KFD - signal events only */
__u32 event_type; /* to KFD */
__u32 auto_reset; /* to KFD */
__u32 node_id; /* to KFD - only valid for certain event types */
__u32 event_id; /* from KFD */
__u32 event_slot_index; /* from KFD */
};
struct kfd_ioctl_destroy_event_args {
__u32 event_id; /* to KFD */
__u32 pad;
__u32 event_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_set_event_args {
__u32 event_id; /* to KFD */
__u32 pad;
__u32 event_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_reset_event_args {
__u32 event_id; /* to KFD */
__u32 pad;
__u32 event_id; /* to KFD */
__u32 pad;
};
struct kfd_memory_exception_failure {
__u32 NotPresent; /* Page not present or supervisor privilege */
__u32 ReadOnly; /* Write access to a read-only page */
__u32 NoExecute; /* Execute access to a page marked NX */
__u32 imprecise; /* Can't determine the exact fault address */
__u32 NotPresent; /* Page not present or supervisor privilege */
__u32 ReadOnly; /* Write access to a read-only page */
__u32 NoExecute; /* Execute access to a page marked NX */
__u32 imprecise; /* Can't determine the exact fault address */
};
/* memory exception data */
struct kfd_hsa_memory_exception_data {
struct kfd_memory_exception_failure failure;
__u64 va;
__u32 gpu_id;
__u32 ErrorType; /* 0 = no RAS error,
* 1 = ECC_SRAM,
* 2 = Link_SYNFLOOD (poison),
* 3 = GPU hang (not attributable to a specific cause),
* other values reserved
*/
struct kfd_memory_exception_failure failure;
__u64 va;
__u32 gpu_id;
__u32 ErrorType; // 0 = no RAS error,
// 1 = ECC_SRAM,
// 2 = Link_SYNFLOOD (poison),
// 3 = GPU hang (not attributable to a specific cause),
// other values reserved
};
/* hw exception data */
struct kfd_hsa_hw_exception_data {
__u32 reset_type;
__u32 reset_cause;
__u32 memory_lost;
__u32 gpu_id;
__u32 reset_type;
__u32 reset_cause;
__u32 memory_lost;
__u32 gpu_id;
};
/* Event data */
struct kfd_event_data {
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
}; /* From KFD */
__u64 kfd_event_data_ext; /* pointer to an extension structure
for future exception types */
__u32 event_id; /* to KFD */
__u32 pad;
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
}; /* From KFD */
__u64 kfd_event_data_ext; // pointer to an extension structure
// for future exception types
__u32 event_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_wait_events_args {
__u64 events_ptr; /* pointed to struct
kfd_event_data array, to KFD */
__u32 num_events; /* to KFD */
__u32 wait_for_all; /* to KFD */
__u32 timeout; /* to KFD */
__u32 wait_result; /* from KFD */
__u64 events_ptr; // pointed to struct
// kfd_event_data array, to KFD
__u32 num_events; /* to KFD */
__u32 wait_for_all; /* to KFD */
__u32 timeout; /* to KFD */
__u32 wait_result; /* from KFD */
};
struct kfd_ioctl_set_scratch_backing_va_args {
__u64 va_addr; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
__u64 va_addr; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_get_tile_config_args {
/* to KFD: pointer to tile array */
__u64 tile_config_ptr;
/* to KFD: pointer to macro tile array */
__u64 macro_tile_config_ptr;
/* to KFD: array size allocated by user mode
* from KFD: array size filled by kernel
*/
__u32 num_tile_configs;
/* to KFD: array size allocated by user mode
* from KFD: array size filled by kernel
*/
__u32 num_macro_tile_configs;
/* to KFD: pointer to tile array */
__u64 tile_config_ptr;
/* to KFD: pointer to macro tile array */
__u64 macro_tile_config_ptr;
/* to KFD: array size allocated by user mode
* from KFD: array size filled by kernel
*/
__u32 num_tile_configs;
/* to KFD: array size allocated by user mode
* from KFD: array size filled by kernel
*/
__u32 num_macro_tile_configs;
__u32 gpu_id; /* to KFD */
__u32 gb_addr_config; /* from KFD */
__u32 num_banks; /* from KFD */
__u32 num_ranks; /* from KFD */
/* struct size can be extended later if needed
* without breaking ABI compatibility
*/
__u32 gpu_id; /* to KFD */
__u32 gb_addr_config; /* from KFD */
__u32 num_banks; /* from KFD */
__u32 num_ranks; /* from KFD */
/* struct size can be extended later if needed
* without breaking ABI compatibility
*/
};
struct kfd_ioctl_set_trap_handler_args {
__u64 tba_addr; /* to KFD */
__u64 tma_addr; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
__u64 tba_addr; /* to KFD */
__u64 tma_addr; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_acquire_vm_args {
__u32 drm_fd; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 drm_fd; /* to KFD */
__u32 gpu_id; /* to KFD */
};
/* Allocation flags: memory types */
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
#define KFD_IOC_ALLOC_MEM_FLAGS_VRAM (1 << 0)
#define KFD_IOC_ALLOC_MEM_FLAGS_GTT (1 << 1)
#define KFD_IOC_ALLOC_MEM_FLAGS_USERPTR (1 << 2)
#define KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL (1 << 3)
#define KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP (1 << 4)
/* Allocation flags: attributes/access options */
#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
#define KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
#define KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)
#define KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC (1 << 29)
#define KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28)
#define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27)
#define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
/* Allocate memory for later SVM (shared virtual memory) mapping.
*
@@ -460,12 +458,12 @@ struct kfd_ioctl_acquire_vm_args {
* @flags: memory type and attributes. See KFD_IOC_ALLOC_MEM_FLAGS above
*/
struct kfd_ioctl_alloc_memory_of_gpu_args {
__u64 va_addr; /* to KFD */
__u64 size; /* to KFD */
__u64 handle; /* from KFD */
__u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
__u32 gpu_id; /* to KFD */
__u32 flags;
__u64 va_addr; /* to KFD */
__u64 size; /* to KFD */
__u64 handle; /* from KFD */
__u64 mmap_offset; /* to KFD (userptr), from KFD (mmap offset) */
__u32 gpu_id; /* to KFD */
__u32 flags;
};
/* Free memory allocated with kfd_ioctl_alloc_memory_of_gpu
@@ -473,7 +471,7 @@ struct kfd_ioctl_alloc_memory_of_gpu_args {
* @handle: memory handle returned by alloc
*/
struct kfd_ioctl_free_memory_of_gpu_args {
__u64 handle; /* to KFD */
__u64 handle; /* to KFD */
};
/* Map memory to one or more GPUs
@@ -492,10 +490,10 @@ struct kfd_ioctl_free_memory_of_gpu_args {
* n_devices.
*/
struct kfd_ioctl_map_memory_to_gpu_args {
__u64 handle; /* to KFD */
__u64 device_ids_array_ptr; /* to KFD */
__u32 n_devices; /* to KFD */
__u32 n_success; /* to/from KFD */
__u64 handle; /* to KFD */
__u64 device_ids_array_ptr; /* to KFD */
__u32 n_devices; /* to KFD */
__u32 n_success; /* to/from KFD */
};
/* Unmap memory from one or more GPUs
@@ -503,10 +501,10 @@ struct kfd_ioctl_map_memory_to_gpu_args {
* same arguments as for mapping
*/
struct kfd_ioctl_unmap_memory_from_gpu_args {
__u64 handle; /* to KFD */
__u64 device_ids_array_ptr; /* to KFD */
__u32 n_devices; /* to KFD */
__u32 n_success; /* to/from KFD */
__u64 handle; /* to KFD */
__u64 device_ids_array_ptr; /* to KFD */
__u32 n_devices; /* to KFD */
__u32 n_success; /* to/from KFD */
};
/* Allocate GWS for specific queue
@@ -517,28 +515,27 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
* only support contiguous GWS allocation
*/
struct kfd_ioctl_alloc_queue_gws_args {
__u32 queue_id; /* to KFD */
__u32 num_gws; /* to KFD */
__u32 first_gws; /* from KFD */
__u32 pad; /* to KFD */
__u32 queue_id; /* to KFD */
__u32 num_gws; /* to KFD */
__u32 first_gws; /* from KFD */
__u32 pad; /* to KFD */
};
struct kfd_ioctl_get_dmabuf_info_args {
__u64 size; /* from KFD */
__u64 metadata_ptr; /* to KFD */
__u32 metadata_size; /* to KFD (space allocated by user)
* from KFD (actual metadata size)
*/
__u32 gpu_id; /* from KFD */
__u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
__u32 dmabuf_fd; /* to KFD */
__u64 size; /* from KFD */
__u64 metadata_ptr; /* to KFD */
__u32 metadata_size; // to KFD (space allocated by user)
// from KFD (actual metadata size)
__u32 gpu_id; /* from KFD */
__u32 flags; /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
__u32 dmabuf_fd; /* to KFD */
};
struct kfd_ioctl_import_dmabuf_args {
__u64 va_addr; /* to KFD */
__u64 handle; /* from KFD */
__u32 gpu_id; /* to KFD */
__u32 dmabuf_fd; /* to KFD */
__u64 va_addr; /* to KFD */
__u64 handle; /* from KFD */
__u32 gpu_id; /* to KFD */
__u32 dmabuf_fd; /* to KFD */
};
/*
@@ -548,36 +545,36 @@ struct kfd_ioctl_import_dmabuf_args {
#define KFD_SMI_EVENT_VMFAULT 0x0000000000000001
struct kfd_ioctl_smi_events_args {
__u32 gpuid; /* to KFD */
__u32 anon_fd; /* from KFD */
__u32 gpuid; /* to KFD */
__u32 anon_fd; /* from KFD */
};
/* Register offset inside the remapped mmio page
*/
enum kfd_mmio_remap {
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL = 0,
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
};
struct kfd_ioctl_ipc_export_handle_args {
__u64 handle; /* to KFD */
__u32 share_handle[4]; /* from KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
__u64 handle; /* to KFD */
__u32 share_handle[4]; /* from KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_ipc_import_handle_args {
__u64 handle; /* from KFD */
__u64 va_addr; /* to KFD */
__u64 mmap_offset; /* from KFD */
__u32 share_handle[4]; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
__u64 handle; /* from KFD */
__u64 va_addr; /* to KFD */
__u64 mmap_offset; /* from KFD */
__u32 share_handle[4]; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 pad;
};
struct kfd_memory_range {
__u64 va_addr;
__u64 size;
__u64 va_addr;
__u64 size;
};
/* flags definitions
@@ -587,143 +584,143 @@ struct kfd_memory_range {
#define KFD_CROSS_MEMORY_RW_BIT (1 << 0)
#define KFD_SET_CROSS_MEMORY_READ(flags) (flags &= ~KFD_CROSS_MEMORY_RW_BIT)
#define KFD_SET_CROSS_MEMORY_WRITE(flags) (flags |= KFD_CROSS_MEMORY_RW_BIT)
#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT)
#define KFD_IS_CROSS_MEMORY_WRITE(flags) (flags & KFD_CROSS_MEMORY_RW_BIT) // NOLINT
struct kfd_ioctl_cross_memory_copy_args {
/* to KFD: Process ID of the remote process */
__u32 pid;
/* to KFD: See above definition */
__u32 flags;
/* to KFD: Source GPU VM range */
__u64 src_mem_range_array;
/* to KFD: Size of above array */
__u64 src_mem_array_size;
/* to KFD: Destination GPU VM range */
__u64 dst_mem_range_array;
/* to KFD: Size of above array */
__u64 dst_mem_array_size;
/* from KFD: Total amount of bytes copied */
__u64 bytes_copied;
/* to KFD: Process ID of the remote process */
__u32 pid;
/* to KFD: See above definition */
__u32 flags;
/* to KFD: Source GPU VM range */
__u64 src_mem_range_array;
/* to KFD: Size of above array */
__u64 src_mem_array_size;
/* to KFD: Destination GPU VM range */
__u64 dst_mem_range_array;
/* to KFD: Size of above array */
__u64 dst_mem_array_size;
/* from KFD: Total amount of bytes copied */
__u64 bytes_copied;
};
#define AMDKFD_IOCTL_BASE 'K'
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
#define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOW(nr, type) _IOW(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOWR(nr, type) _IOWR(AMDKFD_IOCTL_BASE, nr, type)
#define AMDKFD_IOC_GET_VERSION \
AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)
#define AMDKFD_IOC_GET_VERSION \
AMDKFD_IOR(0x01, struct kfd_ioctl_get_version_args)
#define AMDKFD_IOC_CREATE_QUEUE \
AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)
#define AMDKFD_IOC_CREATE_QUEUE \
AMDKFD_IOWR(0x02, struct kfd_ioctl_create_queue_args)
#define AMDKFD_IOC_DESTROY_QUEUE \
AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)
#define AMDKFD_IOC_DESTROY_QUEUE \
AMDKFD_IOWR(0x03, struct kfd_ioctl_destroy_queue_args)
#define AMDKFD_IOC_SET_MEMORY_POLICY \
AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)
#define AMDKFD_IOC_SET_MEMORY_POLICY \
AMDKFD_IOW(0x04, struct kfd_ioctl_set_memory_policy_args)
#define AMDKFD_IOC_GET_CLOCK_COUNTERS \
AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)
#define AMDKFD_IOC_GET_CLOCK_COUNTERS \
AMDKFD_IOWR(0x05, struct kfd_ioctl_get_clock_counters_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES \
AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES \
AMDKFD_IOR(0x06, struct kfd_ioctl_get_process_apertures_args)
#define AMDKFD_IOC_UPDATE_QUEUE \
AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
#define AMDKFD_IOC_UPDATE_QUEUE \
AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
#define AMDKFD_IOC_CREATE_EVENT \
AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
#define AMDKFD_IOC_CREATE_EVENT \
AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
#define AMDKFD_IOC_DESTROY_EVENT \
AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
#define AMDKFD_IOC_DESTROY_EVENT \
AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
#define AMDKFD_IOC_SET_EVENT \
AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
#define AMDKFD_IOC_SET_EVENT \
AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
#define AMDKFD_IOC_RESET_EVENT \
AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
#define AMDKFD_IOC_RESET_EVENT \
AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
#define AMDKFD_IOC_WAIT_EVENTS \
AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
#define AMDKFD_IOC_WAIT_EVENTS \
AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
#define AMDKFD_IOC_DBG_REGISTER \
AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
#define AMDKFD_IOC_DBG_REGISTER \
AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
#define AMDKFD_IOC_DBG_UNREGISTER \
AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
#define AMDKFD_IOC_DBG_UNREGISTER \
AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
#define AMDKFD_IOC_DBG_ADDRESS_WATCH \
AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
#define AMDKFD_IOC_DBG_ADDRESS_WATCH \
AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
#define AMDKFD_IOC_DBG_WAVE_CONTROL \
AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
#define AMDKFD_IOC_DBG_WAVE_CONTROL \
AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \
AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA \
AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
#define AMDKFD_IOC_GET_TILE_CONFIG \
AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
#define AMDKFD_IOC_GET_TILE_CONFIG \
AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
#define AMDKFD_IOC_SET_TRAP_HANDLER \
AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
#define AMDKFD_IOC_SET_TRAP_HANDLER \
AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
AMDKFD_IOWR(0x14, \
struct kfd_ioctl_get_process_apertures_new_args)
#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW \
AMDKFD_IOWR(0x14, \
struct kfd_ioctl_get_process_apertures_new_args)
#define AMDKFD_IOC_ACQUIRE_VM \
AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
#define AMDKFD_IOC_ACQUIRE_VM \
AMDKFD_IOW(0x15, struct kfd_ioctl_acquire_vm_args)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
#define AMDKFD_IOC_ALLOC_MEMORY_OF_GPU \
AMDKFD_IOWR(0x16, struct kfd_ioctl_alloc_memory_of_gpu_args)
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
#define AMDKFD_IOC_FREE_MEMORY_OF_GPU \
AMDKFD_IOW(0x17, struct kfd_ioctl_free_memory_of_gpu_args)
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
#define AMDKFD_IOC_MAP_MEMORY_TO_GPU \
AMDKFD_IOWR(0x18, struct kfd_ioctl_map_memory_to_gpu_args)
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
#define AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU \
AMDKFD_IOWR(0x19, struct kfd_ioctl_unmap_memory_from_gpu_args)
#define AMDKFD_IOC_SET_CU_MASK \
AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
#define AMDKFD_IOC_SET_CU_MASK \
AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \
AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \
AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
#define AMDKFD_IOC_GET_DMABUF_INFO \
AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
#define AMDKFD_IOC_GET_DMABUF_INFO \
AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
#define AMDKFD_IOC_IMPORT_DMABUF \
AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
#define AMDKFD_IOC_IMPORT_DMABUF \
AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
#define AMDKFD_IOC_ALLOC_QUEUE_GWS \
AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
#define AMDKFD_IOC_ALLOC_QUEUE_GWS \
AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
#define AMDKFD_IOC_SMI_EVENTS \
AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
#define AMDKFD_IOC_SMI_EVENTS \
AMDKFD_IOWR(0x1F, struct kfd_ioctl_smi_events_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x20
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_END 0x20
/* non-upstream ioctls */
#define AMDKFD_IOC_IPC_IMPORT_HANDLE \
AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args)
AMDKFD_IOWR(0x1F, struct kfd_ioctl_ipc_import_handle_args)
#define AMDKFD_IOC_IPC_EXPORT_HANDLE \
AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args)
#define AMDKFD_IOC_IPC_EXPORT_HANDLE \
AMDKFD_IOWR(0x20, struct kfd_ioctl_ipc_export_handle_args)
#define AMDKFD_IOC_DBG_TRAP \
AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args)
#define AMDKFD_IOC_DBG_TRAP \
AMDKFD_IOWR(0x21, struct kfd_ioctl_dbg_trap_args)
#define AMDKFD_IOC_CROSS_MEMORY_COPY \
AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args)
#define AMDKFD_IOC_CROSS_MEMORY_COPY \
AMDKFD_IOWR(0x22, struct kfd_ioctl_cross_memory_copy_args)
#define AMDKFD_COMMAND_START 0x01
#define AMDKFD_COMMAND_START 0x01
#undef AMDKFD_COMMAND_END
#define AMDKFD_COMMAND_END 0x22
#define AMDKFD_COMMAND_END 0x22
#endif // INCLUDE_ROCM_SMI_KFD_IOCTL_H_
+2
Dosyayı Görüntüle
@@ -123,6 +123,8 @@ typedef enum {
RSMI_STATUS_BUSY, //!< A resource or mutex could not be
//!< acquired because it is already
//!< being used
RSMI_STATUS_REFCOUNT_OVERFLOW, //!< An internal reference counter
//!< exceeded INT32_MAX
RSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF, //!< An unknown error occurred
} rsmi_status_t;
+17 -3
Dosyayı Görüntüle
@@ -94,8 +94,18 @@ class RocmSMI {
int kfd_notif_evt_fh(void) const {return kfd_notif_evt_fh_;}
void set_kfd_notif_evt_fh(int fd) {kfd_notif_evt_fh_ = fd;}
std::mutex *kfd_notif_evt_fh_mutex(void) {return &kfd_notif_evt_fh_mutex_;}
int kfd_notif_evt_fh_refcnt_inc() {return ++kfd_notif_evt_fh_refcnt_;}
int kfd_notif_evt_fh_refcnt_dec() {return --kfd_notif_evt_fh_refcnt_;}
std::mutex *bootstrap_mutex(void) {return &bootstrap_mutex_;}
uint32_t ref_count(void) const {return ref_count_;}
uint32_t ref_count_inc(void) {return ++ref_count_;}
uint32_t ref_count_dec(void) {return --ref_count_;}
uint32_t kfd_notif_evt_fh_refcnt(void) const {
return kfd_notif_evt_fh_refcnt_;}
uint32_t kfd_notif_evt_fh_refcnt_inc(void) {
return ++kfd_notif_evt_fh_refcnt_;}
uint32_t kfd_notif_evt_fh_refcnt_dec(void) {
return --kfd_notif_evt_fh_refcnt_;}
private:
std::vector<std::shared_ptr<Device>> devices_;
@@ -114,8 +124,12 @@ class RocmSMI {
uint32_t euid_;
int kfd_notif_evt_fh_;
int kfd_notif_evt_fh_refcnt_;
std::mutex kfd_notif_evt_fh_mutex_;
uint32_t kfd_notif_evt_fh_refcnt_; // Access to this should be protected
// by kfd_notif_evt_fh_mutex_
std::mutex bootstrap_mutex_;
uint32_t ref_count_; // Access to this should be protected
// by bootstrap_mutex_
};
} // namespace smi
+79
Dosyayı Görüntüle
@@ -105,6 +105,85 @@ struct ScopedPthread {
pthread_wrap& pthrd_ref_;
bool mutex_not_acquired_; // Use for AcquireNB (not for Aquire())
};
#define PASTE2(x, y) x##y
#define PASTE(x, y) PASTE2(x, y)
#define __forceinline __inline__ __attribute__((always_inline))
template <typename lambda>
class ScopeGuard {
public:
explicit __forceinline ScopeGuard(const lambda& release)
: release_(release), dismiss_(false) {}
ScopeGuard(const ScopeGuard& rhs) {*this = rhs; }
__forceinline ~ScopeGuard() {
if (!dismiss_) release_();
}
__forceinline ScopeGuard& operator=(const ScopeGuard& rhs) {
dismiss_ = rhs.dismiss_;
release_ = rhs.release_;
rhs.dismiss_ = true;
}
__forceinline void Dismiss() { dismiss_ = true; }
private:
lambda release_;
bool dismiss_;
};
template <typename lambda>
static __forceinline ScopeGuard<lambda> MakeScopeGuard(lambda rel) {
return ScopeGuard<lambda>(rel);
}
#define MAKE_SCOPE_GUARD_HELPER(lname, sname, ...) \
auto lname = __VA_ARGS__; \
amd::smi::ScopeGuard<decltype(lname)> sname(lname);
#define MAKE_SCOPE_GUARD(...) \
MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), \
PASTE(scopeGuard, __COUNTER__), __VA_ARGS__)
#define MAKE_NAMED_SCOPE_GUARD(name, ...) \
MAKE_SCOPE_GUARD_HELPER(PASTE(scopeGuardLambda, __COUNTER__), name, \
__VA_ARGS__)
// A macro to disallow the copy and move constructor and operator= functions
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
TypeName(const TypeName&) = delete; \
TypeName(TypeName&&) = delete; \
void operator=(const TypeName&) = delete; \
void operator=(TypeName&&) = delete;
template <class LockType>
class ScopedAcquire {
public:
/// @brief: When constructing, acquire the lock.
/// @param: lock(Input), pointer to an existing lock.
explicit ScopedAcquire(LockType* lock) : lock_(lock), doRelease(true) {
lock_->Acquire();}
/// @brief: when destructing, release the lock.
~ScopedAcquire() {
if (doRelease) lock_->Release();
}
/// @brief: Release the lock early. Avoid using when possible.
void Release() {
lock_->Release();
doRelease = false;
}
private:
LockType* lock_;
bool doRelease;
/// @brief: Disable copiable and assignable ability.
DISALLOW_COPY_AND_ASSIGN(ScopedAcquire);
};
} // namespace smi
} // namespace amd
+61 -10
Dosyayı Görüntüle
@@ -165,20 +165,20 @@ static rsmi_status_t handleException() {
return RSMI_STATUS_NOT_SUPPORTED; \
} \
return RSMI_STATUS_INVALID_ARGS; \
} \
}
#define CHK_SUPPORT(RT_PTR, VR, SUB_VR) \
GET_DEV_FROM_INDX \
CHK_API_SUPPORT_ONLY((RT_PTR), (VR), (SUB_VR))
#define CHK_SUPPORT_NAME_ONLY(RT_PTR) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, RSMI_DEFAULT_VARIANT)
#define CHK_SUPPORT_VAR(RT_PTR, VR) \
CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT) \
CHK_SUPPORT((RT_PTR), (VR), RSMI_DEFAULT_VARIANT)
#define CHK_SUPPORT_SUBVAR_ONLY(RT_PTR, SUB_VR) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR)) \
CHK_SUPPORT((RT_PTR), RSMI_DEFAULT_VARIANT, (SUB_VR))
static pthread_mutex_t *get_mutex(uint32_t dv_ind) {
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
@@ -540,9 +540,29 @@ static bool is_power_of_2(uint64_t n) {
rsmi_status_t
rsmi_init(uint64_t flags) {
TRY
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
smi.Initialize(flags);
std::lock_guard<std::mutex> guard(*smi.bootstrap_mutex());
if (smi.ref_count() == INT32_MAX) {
return RSMI_STATUS_REFCOUNT_OVERFLOW;
}
(void)smi.ref_count_inc();
// If smi.Initialize() throws, we should clean up and dec. ref_count_.
// Otherwise, if no issues, the Dismiss() will prevent the ref_count_
// decrement.
MAKE_NAMED_SCOPE_GUARD(refGuard, [&]() { (void)smi.ref_count_dec(); });
if (smi.ref_count() == 1) {
try {
smi.Initialize(flags);
} catch(...) {
smi.Cleanup();
throw;
}
}
refGuard.Dismiss();
return RSMI_STATUS_SUCCESS;
CATCH
@@ -555,9 +575,17 @@ rsmi_shut_down(void) {
TRY
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
std::lock_guard<std::mutex> guard(*smi.bootstrap_mutex());
smi.Cleanup();
if (smi.ref_count() == 0) {
return RSMI_STATUS_INIT_ERROR;
}
(void)smi.ref_count_dec();
if (smi.ref_count() == 0) {
smi.Cleanup();
}
return RSMI_STATUS_SUCCESS;
CATCH
}
@@ -2371,6 +2399,15 @@ rsmi_status_string(rsmi_status_t status, const char **status_string) {
"type that was expected";
break;
case RSMI_STATUS_BUSY:
*status_string = "A resource or mutex could not be acquired "
"because it is already being used";
break;
case RSMI_STATUS_REFCOUNT_OVERFLOW:
*status_string = "An internal reference counter exceeded INT32_MAX";
break;
case RSMI_STATUS_UNKNOWN_ERROR:
*status_string = "An unknown error prevented the call from completing"
" successfully";
@@ -3186,6 +3223,7 @@ rsmi_event_notification_init(uint32_t dv_ind) {
std::lock_guard<std::mutex> guard(*smi.kfd_notif_evt_fh_mutex());
if (smi.kfd_notif_evt_fh() == -1) {
assert(smi.kfd_notif_evt_fh_refcnt() == 0);
int kfd_fd = open(kPathKFDIoctl, O_RDWR | O_CLOEXEC);
if (kfd_fd <= 0) {
@@ -3199,8 +3237,7 @@ rsmi_event_notification_init(uint32_t dv_ind) {
smi.set_kfd_notif_evt_fh(kfd_fd);
}
smi.kfd_notif_evt_fh_refcnt_inc();
(void)smi.kfd_notif_evt_fh_refcnt_inc();
struct kfd_ioctl_smi_events_args args;
assert(dev->kfd_gpu_id() <= UINT32_MAX);
@@ -3354,7 +3391,7 @@ rsmi_status_t rsmi_event_notification_stop(uint32_t dv_ind) {
dev->set_evt_notif_anon_file_ptr(nullptr);
dev->set_evt_notif_anon_fd(-1);
if (!smi.kfd_notif_evt_fh_refcnt_dec()) {
if (smi.kfd_notif_evt_fh_refcnt_dec() == 0) {
int ret = close(smi.kfd_notif_evt_fh());
smi.set_kfd_notif_evt_fh(-1);
if (ret < 0) {
@@ -3385,3 +3422,17 @@ rsmi_test_sleep(uint32_t dv_ind, uint32_t seconds) {
sleep(seconds);
return RSMI_STATUS_SUCCESS;
}
int32_t
rsmi_test_refcount(uint64_t refcnt_type) {
(void)refcnt_type;
amd::smi::RocmSMI& smi = amd::smi::RocmSMI::getInstance();
std::lock_guard<std::mutex> guard(*smi.bootstrap_mutex());
if (smi.ref_count() == 0 && smi.monitor_devices().size() != 0) {
return -1;
}
return smi.ref_count();
}
+10 -3
Dosyayı Görüntüle
@@ -244,6 +244,12 @@ RocmSMI::Initialize(uint64_t flags) {
auto i = 0;
uint32_t ret;
assert(ref_count_ == 1);
if (ref_count_ != 1) {
throw amd::smi::rsmi_exception(RSMI_INITIALIZATION_ERROR,
"Unexpected: RocmSMI ref_count_ != 1");
}
init_options_ = flags;
euid_ = geteuid();
@@ -299,6 +305,10 @@ RocmSMI::Initialize(uint64_t flags) {
void
RocmSMI::Cleanup() {
s_monitor_devices.clear();
devices_.clear();
monitors_.clear();
if (kfd_notif_evt_fh() >= 0) {
int ret = close(kfd_notif_evt_fh());
if (ret < 0) {
@@ -306,9 +316,6 @@ RocmSMI::Cleanup() {
"Failed to close kfd file handle on shutdown.");
}
}
s_monitor_devices.clear();
devices_.clear();
monitors_.clear();
}
RocmSMI::RocmSMI(uint64_t flags) : init_options_(flags),
+226
Dosyayı Görüntüle
@@ -0,0 +1,226 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2020, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#include <pthread.h>
#include <algorithm>
#include <iostream>
#include <thread> // NOLINT
#include <random>
#include <chrono> // NOLINT
#include "rocm_smi_test/functional/init_shutdown_refcount.h"
#include "gtest/gtest.h"
#include "rocm_smi/rocm_smi.h"
#include "rocm_smi_test/test_common.h"
extern int32_t
rsmi_test_refcount(uint64_t refcnt_type);
static void rand_sleep_mod(int msec) {
unsigned int seed = time(NULL);
std::mt19937_64 eng{seed};
std::uniform_int_distribution<> dist{10, msec};
std::this_thread::sleep_for(std::chrono::milliseconds{dist(eng)});
}
static void* RSMIInitFunction(void* args) {
rsmi_status_t status;
(void)args;
rand_sleep_mod(100);
status = rsmi_init(0);
EXPECT_EQ(RSMI_STATUS_SUCCESS, status);
pthread_exit(nullptr);
return nullptr;
}
static void* RSMIShutDownFunction(void* args) {
rsmi_status_t status;
(void)args;
rand_sleep_mod(100);
status = rsmi_shut_down();
EXPECT_EQ(RSMI_STATUS_SUCCESS, status);
pthread_exit(nullptr);
return nullptr;
}
static void *RSMIInitShutDownFunction(void* args) {
rsmi_status_t status;
(void)args;
rand_sleep_mod(100);
status = rsmi_init(0);
EXPECT_EQ(RSMI_STATUS_SUCCESS, status);
rand_sleep_mod(100);
status = rsmi_shut_down();
EXPECT_EQ(RSMI_STATUS_SUCCESS, status);
pthread_exit(nullptr);
return nullptr;
}
static const int NumOfThreads = 100;
TestConcurrentInit::TestConcurrentInit(void) : TestBase() {
set_title("RSMI Concurrent Init Test");
set_description("This test initializes RSMI concurrently to verify "
"reference counting functionality.");
}
TestConcurrentInit::~TestConcurrentInit(void) {
}
void TestConcurrentInit::SetUp(void) {
// TestBase::SetUp(); // Skip usual SetUp to avoid doing the usual rsmi_init
return;
}
// Compare required profile for this test case with what we're actually
// running on
void TestConcurrentInit::DisplayTestInfo(void) {
TestBase::DisplayTestInfo();
}
void TestConcurrentInit::DisplayResults(void) const {
TestBase::DisplayResults();
return;
}
void TestConcurrentInit::Close() {
// This will close handles opened within rsmitst utility calls and call
// rsmi_shut_down(), so it should be done after other hsa cleanup
TestBase::Close();
}
// Compare required profile for this test case with what we're actually
// running on
void TestConcurrentInit::Run(void) {
if (setup_failed_) {
std::cout << "** SetUp Failed for this test. Skipping.**" << std::endl;
return;
}
pthread_t ThreadId[NumOfThreads];
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
std::cout << "Testing concurrent rsmi_init()..." << std::endl;
for (int Id = 0; Id < NumOfThreads; ++Id) {
int ThreadStatus = pthread_create(&ThreadId[Id], &attr,
RSMIInitFunction, nullptr);
ASSERT_EQ(0, ThreadStatus) << "pthead_create failed.";
}
for (int Id = 0; Id < NumOfThreads; ++Id) {
int err = pthread_join(ThreadId[Id], nullptr);
ASSERT_EQ(0, err) << "pthread_join failed.";
}
// Invoke hsa_shut_down and verify that all the hsa_init's were counted.
// HSA should be exactly closed after NumOfThreads calls.
for (int Id = 0; Id < NumOfThreads; ++Id) {
rsmi_status_t err = rsmi_shut_down();
ASSERT_EQ(RSMI_STATUS_SUCCESS, err) << "An rsmi_init was missed.";
}
rsmi_status_t err = rsmi_shut_down();
ASSERT_EQ(RSMI_INITIALIZATION_ERROR, err) <<
"rsmi_init reference count was too high.";
int32_t refcnt = rsmi_test_refcount(0);
ASSERT_EQ(0, refcnt);
std::cout << "Concurrent rsmi_init() test passed." << std::endl << std::endl;
std::cout << "Testing concurrent rsmi_shut_down()..." << std::endl;
// Invoke hsa_shut_down and verify that all the hsa_init's were counted.
// HSA should be exactly closed after NumOfThreads calls.
for (int Id = 0; Id < NumOfThreads; ++Id) {
rsmi_status_t err = rsmi_init(0);
ASSERT_EQ(RSMI_STATUS_SUCCESS, err);
}
for (int Id = 0; Id < NumOfThreads; ++Id) {
int ThreadStatus =
pthread_create(&ThreadId[Id], &attr, RSMIShutDownFunction, nullptr);
ASSERT_EQ(0, ThreadStatus) << "pthead_create failed.";
}
for (int Id = 0; Id < NumOfThreads; ++Id) {
int err = pthread_join(ThreadId[Id], nullptr);
ASSERT_EQ(0, err) << "pthread_join failed.";
}
refcnt = rsmi_test_refcount(0);
ASSERT_EQ(0, refcnt);
std::cout << "Concurrent rsmi_shut_down() passed." << std::endl;
std::cout <<
"Testing concurrent rsmi_init() followed by rsmi_shut_down()..." <<
std::endl;
for (int Id = 0; Id < NumOfThreads; ++Id) {
int ThreadStatus =
pthread_create(&ThreadId[Id], &attr, RSMIInitShutDownFunction, nullptr);
ASSERT_EQ(0, ThreadStatus) << "pthead_create failed.";
}
for (int Id = 0; Id < NumOfThreads; ++Id) {
int err = pthread_join(ThreadId[Id], nullptr);
ASSERT_EQ(0, err) << "pthread_join failed.";
}
refcnt = rsmi_test_refcount(0);
ASSERT_EQ(0, refcnt);
std::cout <<
"Concurrent rsmi_init() followed by rsmi_shut_down() passed." <<
std::endl;
}
+74
Dosyayı Görüntüle
@@ -0,0 +1,74 @@
/*
* =============================================================================
* ROC Runtime Conformance Release License
* =============================================================================
* The University of Illinois/NCSA
* Open Source License (NCSA)
*
* Copyright (c) 2020, Advanced Micro Devices, Inc.
* All rights reserved.
*
* Developed by:
*
* AMD Research and AMD ROC Software Development
*
* Advanced Micro Devices, Inc.
*
* www.amd.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal with the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimers.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimers in
* the documentation and/or other materials provided with the distribution.
* - Neither the names of <Name of Development Group, Name of Institution>,
* nor the names of its contributors may be used to endorse or promote
* products derived from this Software without specific prior written
* permission.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS WITH THE SOFTWARE.
*
*/
#ifndef TESTS_ROCM_SMI_TEST_FUNCTIONAL_INIT_SHUTDOWN_REFCOUNT_H_
#define TESTS_ROCM_SMI_TEST_FUNCTIONAL_INIT_SHUTDOWN_REFCOUNT_H_
#include "rocm_smi_test/test_base.h"
class TestConcurrentInit : public TestBase {
public:
TestConcurrentInit();
// @Brief: Destructor for the TestConcurrentInit class
virtual ~TestConcurrentInit();
// @Brief: Setup the environment for measurement
virtual void SetUp();
// @Brief: Core measurement execution
virtual void Run();
// @Brief: Clean up and retrive the resource
virtual void Close();
// @Brief: Display results
virtual void DisplayResults() const;
// @Brief: Display information about what this test does
virtual void DisplayTestInfo(void);
};
#endif // TESTS_ROCM_SMI_TEST_FUNCTIONAL_INIT_SHUTDOWN_REFCOUNT_H_
+14 -5
Dosyayı Görüntüle
@@ -79,6 +79,7 @@
#include "functional/api_support_read.h"
#include "functional/mutual_exclusion.h"
#include "functional/evt_notif_read_write.h"
#include "functional/init_shutdown_refcount.h"
static RSMITstGlobals *sRSMIGlvalues = nullptr;
@@ -226,17 +227,25 @@ TEST(rsmitstReadOnly, TestAPISupportRead) {
RunGenericTest(&tst);
}
TEST(rsmitstReadOnly, TestMutualExclusion) {
TestMutualExclusion test;
TestMutualExclusion tst;
test.DisplayTestInfo();
test.SetUp();
test.Run();
RunCustomTestEpilog(&test);
tst.DisplayTestInfo();
tst.SetUp();
tst.Run();
RunCustomTestEpilog(&tst);
}
TEST(rsmitstReadWrite, TestEvtNotifReadWrite) {
TestEvtNotifReadWrite tst;
RunGenericTest(&tst);
}
TEST(rsmitstReadOnly, TestConcurrentInit) {
TestConcurrentInit tst;
tst.DisplayTestInfo();
// tst.SetUp(); // Avoid extra rsmi_init
tst.Run();
// RunCustomTestEpilog(&tst); // Avoid extra rsmi_shut_down
tst.DisplayResults();
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);