Updated docs with new KFD events (#382)
* Updated docs with new KFD events
---------
Signed-off-by: Pham, Gabriel <Gabriel.Pham@amd.com>
[ROCm/amdsmi commit: c40d4291f6]
This commit is contained in:
committad av
GitHub
förälder
004c51d909
incheckning
24f25b4133
@@ -1341,9 +1341,18 @@ Event Type | Description
|
||||
---|------
|
||||
`VMFAULT` | VM page fault
|
||||
`THERMAL_THROTTLE` | thermal throttle
|
||||
`GPU_PRE_RESET` | gpu pre reset
|
||||
`GPU_PRE_RESET` | gpu pre reset; this event includes a message which indicates the cause of the reset. They are as follows: `job hang`, `RAS error`, `MES hang`, `HWS hang`, `user trigger`, and `unknown`
|
||||
`GPU_POST_RESET` | gpu post reset
|
||||
`RING_HANG` | ring hang event
|
||||
`RING_HANG` | ring hang event; This event will be deprecated in ROCm 7.0
|
||||
`MIGRATE_START` | migrate start
|
||||
`MIGRATE_END` | migrate end
|
||||
`PAGE_FAULT_START` | page fault start
|
||||
`PAGE_FAULT_END` | page fault end
|
||||
`QUEUE_EVICTION` | queue eviction
|
||||
`QUEUE_RESTORE` | queue restore
|
||||
`UNMAP_FROM_GPU` | unmap from GPU
|
||||
`PROCESS_START` | KFD process start
|
||||
`PROCESS_END` | KFD process end
|
||||
|
||||
#### read
|
||||
|
||||
|
||||
@@ -1234,23 +1234,25 @@ typedef struct {
|
||||
* @cond @tag{gpu_bm_linux} @endcond
|
||||
*/
|
||||
typedef enum {
|
||||
AMDSMI_EVT_NOTIF_NONE = 0, //!< Not used
|
||||
AMDSMI_EVT_NOTIF_VMFAULT = 1, //!< VM page fault
|
||||
AMDSMI_EVT_NOTIF_NONE = 0, //!< Not used
|
||||
AMDSMI_EVT_NOTIF_VMFAULT = 1, //!< VM page fault
|
||||
AMDSMI_EVT_NOTIF_FIRST = AMDSMI_EVT_NOTIF_VMFAULT,
|
||||
AMDSMI_EVT_NOTIF_THERMAL_THROTTLE = 2,
|
||||
AMDSMI_EVT_NOTIF_GPU_PRE_RESET = 3,
|
||||
AMDSMI_EVT_NOTIF_GPU_POST_RESET = 4,
|
||||
AMDSMI_EVT_NOTIF_RING_HANG = 5, // Ringhang now maps to AMDSMI_EVT_NOTIF_MIGRATE_START.
|
||||
// Will be depreciated in 7.0
|
||||
AMDSMI_EVT_NOTIF_MIGRATE_START = AMDSMI_EVT_NOTIF_RING_HANG,
|
||||
AMDSMI_EVT_NOTIF_MIGRATE_END = 6,
|
||||
AMDSMI_EVT_NOTIF_PAGE_FAULT_START = 7,
|
||||
AMDSMI_EVT_NOTIF_PAGE_FAULT_END = 8,
|
||||
AMDSMI_EVT_NOTIF_QUEUE_EVICTION = 9,
|
||||
AMDSMI_EVT_NOTIF_QUEUE_RESTORE = 10,
|
||||
AMDSMI_EVT_NOTIF_UNMAP_FROM_GPU = 11,
|
||||
AMDSMI_EVT_NOTIF_PROCESS_START = 12,
|
||||
AMDSMI_EVT_NOTIF_PROCESS_END = 13,
|
||||
AMDSMI_EVT_NOTIF_THERMAL_THROTTLE = 2, //!< thermal throttle
|
||||
AMDSMI_EVT_NOTIF_GPU_PRE_RESET = 3, //!< pre reset; event includes message indicating cause
|
||||
//!< causes include job hang, RAS error,
|
||||
//!< MES hang, HWS hang, user trigger, and unknown
|
||||
AMDSMI_EVT_NOTIF_GPU_POST_RESET = 4, //!< post reset
|
||||
AMDSMI_EVT_NOTIF_RING_HANG = 5, //!< Ringhang now maps to AMDSMI_EVT_NOTIF_MIGRATE_START.
|
||||
//!< Will be deprecated in ROCm 7.0
|
||||
AMDSMI_EVT_NOTIF_MIGRATE_START = AMDSMI_EVT_NOTIF_RING_HANG, //!< migrate start
|
||||
AMDSMI_EVT_NOTIF_MIGRATE_END = 6, //!< migrate end
|
||||
AMDSMI_EVT_NOTIF_PAGE_FAULT_START = 7, //!< page fault start
|
||||
AMDSMI_EVT_NOTIF_PAGE_FAULT_END = 8, //!< page fault end
|
||||
AMDSMI_EVT_NOTIF_QUEUE_EVICTION = 9, //!< queue eviction
|
||||
AMDSMI_EVT_NOTIF_QUEUE_RESTORE = 10, //!< queue restore
|
||||
AMDSMI_EVT_NOTIF_UNMAP_FROM_GPU = 11, //!< unmap from GPU
|
||||
AMDSMI_EVT_NOTIF_PROCESS_START = 12, //!< KFD process start
|
||||
AMDSMI_EVT_NOTIF_PROCESS_END = 13, //!< KFD process end
|
||||
|
||||
AMDSMI_EVT_NOTIF_LAST = AMDSMI_EVT_NOTIF_PROCESS_END
|
||||
} amdsmi_evt_notification_type_t;
|
||||
|
||||
@@ -338,21 +338,23 @@ typedef struct {
|
||||
* Event notification event types
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_EVT_NOTIF_NONE = KFD_SMI_EVENT_NONE, //!< Unused
|
||||
RSMI_EVT_NOTIF_VMFAULT = KFD_SMI_EVENT_VMFAULT, //!< VM page fault
|
||||
RSMI_EVT_NOTIF_NONE = KFD_SMI_EVENT_NONE, //!< Unused
|
||||
RSMI_EVT_NOTIF_VMFAULT = KFD_SMI_EVENT_VMFAULT, //!< VM page fault
|
||||
RSMI_EVT_NOTIF_FIRST = RSMI_EVT_NOTIF_VMFAULT,
|
||||
RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE,
|
||||
RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET,
|
||||
RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET,
|
||||
RSMI_EVT_NOTIF_EVENT_MIGRATE_START = KFD_SMI_EVENT_MIGRATE_START,
|
||||
RSMI_EVT_NOTIF_EVENT_MIGRATE_END = KFD_SMI_EVENT_MIGRATE_END,
|
||||
RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_START = KFD_SMI_EVENT_PAGE_FAULT_START,
|
||||
RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_END = KFD_SMI_EVENT_PAGE_FAULT_END,
|
||||
RSMI_EVT_NOTIF_EVENT_QUEUE_EVICTION = KFD_SMI_EVENT_QUEUE_EVICTION,
|
||||
RSMI_EVT_NOTIF_EVENT_QUEUE_RESTORE = KFD_SMI_EVENT_QUEUE_RESTORE,
|
||||
RSMI_EVT_NOTIF_EVENT_UNMAP_FROM_GPU = KFD_SMI_EVENT_UNMAP_FROM_GPU,
|
||||
RSMI_EVT_NOTIF_EVENT_PROCESS_START = KFD_SMI_EVENT_PROCESS_START,
|
||||
RSMI_EVT_NOTIF_EVENT_PROCESS_END = KFD_SMI_EVENT_PROCESS_END,
|
||||
RSMI_EVT_NOTIF_THERMAL_THROTTLE = KFD_SMI_EVENT_THERMAL_THROTTLE, //!< thermal throttle
|
||||
RSMI_EVT_NOTIF_GPU_PRE_RESET = KFD_SMI_EVENT_GPU_PRE_RESET, //!< pre reset; event includes message indicating cause
|
||||
//!< causes include job hang, RAS error,
|
||||
//!< MES hang, HWS hang, user trigger, and unknown
|
||||
RSMI_EVT_NOTIF_GPU_POST_RESET = KFD_SMI_EVENT_GPU_POST_RESET, //!< post reset
|
||||
RSMI_EVT_NOTIF_EVENT_MIGRATE_START = KFD_SMI_EVENT_MIGRATE_START, //!< migrate start
|
||||
RSMI_EVT_NOTIF_EVENT_MIGRATE_END = KFD_SMI_EVENT_MIGRATE_END, //!< migrate end
|
||||
RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_START = KFD_SMI_EVENT_PAGE_FAULT_START, //!< page fault start
|
||||
RSMI_EVT_NOTIF_EVENT_PAGE_FAULT_END = KFD_SMI_EVENT_PAGE_FAULT_END, //!< page fault end
|
||||
RSMI_EVT_NOTIF_EVENT_QUEUE_EVICTION = KFD_SMI_EVENT_QUEUE_EVICTION, //!< queue eviction
|
||||
RSMI_EVT_NOTIF_EVENT_QUEUE_RESTORE = KFD_SMI_EVENT_QUEUE_RESTORE, //!< queue restore
|
||||
RSMI_EVT_NOTIF_EVENT_UNMAP_FROM_GPU = KFD_SMI_EVENT_UNMAP_FROM_GPU, //!< unmap from GPU
|
||||
RSMI_EVT_NOTIF_EVENT_PROCESS_START = KFD_SMI_EVENT_PROCESS_START, //!< KFD process start
|
||||
RSMI_EVT_NOTIF_EVENT_PROCESS_END = KFD_SMI_EVENT_PROCESS_END, //!< KFD process end
|
||||
RSMI_EVT_NOTIF_EVENT_ALL_PROCESS = KFD_SMI_EVENT_ALL_PROCESS,
|
||||
RSMI_EVT_NOTIF_LAST = KFD_SMI_EVENT_ALL_PROCESS
|
||||
} rsmi_evt_notification_type_t;
|
||||
|
||||
Referens i nytt ärende
Block a user