From c2c1385e290d811beede3cb094bfa6ea6e606dcf Mon Sep 17 00:00:00 2001 From: Philip Cox Date: Fri, 29 Mar 2019 14:42:53 -0400 Subject: [PATCH] libhsakmt: Update wave suspend/resume API This is updating to the new suspend and resume API for the KFD and the thunk. We now support passing in a list of queues to suspend, and not just all of the queues for the process. The kfdtest testcase was also updated so it still compiles. Change-Id: I71d1b178476bd9df0c311bdedaa6a891528cebcf Signed-off-by: Philip Cox --- include/hsakmt.h | 86 ++++++++++---- include/hsakmttypes.h | 7 +- include/linux/kfd_ioctl.h | 41 +++---- src/debug.c | 197 +++++++++++++++++++++++-------- src/libhsakmt.ver | 4 +- tests/kfdtest/src/KFDDBGTest.cpp | 17 ++- 6 files changed, 244 insertions(+), 108 deletions(-) diff --git a/include/hsakmt.h b/include/hsakmt.h index 031ab1dc57..6d4b1f3a26 100644 --- a/include/hsakmt.h +++ b/include/hsakmt.h @@ -624,54 +624,88 @@ hsaKmtDbgAddressWatch( /** Suspend the execution of a set of queues. A queue that is suspended - allows the context save state to be inspected and modified. If a + allows the wave context save state to be inspected and modified. If a queue is already suspended it remains suspended. A suspended queue can be resumed by hsaKmtDbgQueueResume(). - If NoGracePeriod is false then the default grace period used for - waiting for waves to complete before context switching is used. If - NoGracePeriod is true then no grace period us used and waves are - context saved as soon as possible. - - If MemFence is true all queues being suspended will perform a - sequentially consistent system scope release that synchronizes with - a sequentially consistent system scope acquire performed by this + For each node that has a queue suspended, a sequentially consistent + system scope release will be performed that synchronizes with a + sequentially consistent system scope acquire performed by this call. This ensures any memory updates performed by the suspended queues are visible to the thread calling this operation. + Pid is the process that owns the queues that are to be supended or + resumed. If the value is -1 then the Pid of the process calling + hsaKmtQueueSuspend or hsaKmtQueueResume is used. + + NumQueues is the number of queues that are being requested to + suspend or resume. + + Queues is a pointer to an array with NumQueues entries of + HSA_QUEUEID. The queues in the list must be for queues that exist + for Pid, and can be a mixture of queues for different nodes. + + GracePeriod to wait after initialiating context save before forcing + waves to context save. A value of 0 indicates no grace period. + It is ignored by hsaKmtQueueResume. + + Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL. Returns: - HSAKMT_STATUS_SUCCESS if successful. - - - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid. + - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid for Pid. */ + HSAKMT_STATUS - HSAKMTAPI - hsaKmtNodeSuspend( - HSAuint32 Pid, - HSAuint32 NodeId, - HSAuint32 Flags); +HSAKMTAPI +hsaKmtQueueSuspend( + HSAuint32 Pid, // IN + HSAuint32 NumQueues, // IN + HSA_QUEUEID *Queues, // IN + HSAuint32 GracePeriod, // IN + HSAuint32 Flags); // IN + /** Resume the execution of a set of queues. If a queue is not - suspended by hsaKmtDbgQueueSuspend() then it remains executing. + suspended by hsaKmtDbgQueueSuspend() then it remains executing. Any + changes to the wave state data will be used when the waves are + restored. Changes to the control stack data will have no effect. - If MemFence is true this call will perform a sequentially - consistent system scope release that synchronizes with a + For each node that has a queue resumed, a sequentially consistent + system scope release will be performed that synchronizes with a sequentially consistent system scope acquire performed by all queues being resumed. This ensures any memory updates performed by the thread calling this operation are visible to the resumed queues. - Returns: - - HSAKMT_STATUS_SUCCESS if successful. + For each node that has a queue resumed, the instruction cache will + be invalidated. This ensures any instruction code updates performed + by the thread calling this operation are visible to the resumed + queues. + Pid is the process that owns the queues that are to be supended or + resumed. If the value is -1 then the Pid of the process calling + hsaKmtQueueSuspend or hsaKmtQueueResume is used. + + NumQueues is the number of queues that are being requested to + suspend or resume. + + Queues is a pointer to an array with NumQueues entries of + HSA_QUEUEID. The queues in the list must be for queues that exist + for Pid, and can be a mixture of queues for different nodes. + + Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL. + Returns: + - HSAKMT_STATUS_SUCCESS if successful - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid. */ + HSAKMT_STATUS - HSAKMTAPI - hsaKmtNodeResume( - HSAuint32 Pid, - HSAuint32 NodeId, - HSAuint32 Flags); +HSAKMTAPI +hsaKmtQueueResume( + HSAuint32 Pid, // IN + HSAuint32 NumQueues, // IN + HSA_QUEUEID *Queues, // IN + HSAuint32 Flags); // IN /** Enable debug trap for NodeId. If QueueId is INVALID_QUEUEID then diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h index 8b0d232597..84292a70f0 100644 --- a/include/hsakmttypes.h +++ b/include/hsakmttypes.h @@ -804,10 +804,11 @@ typedef enum _HSA_DBG_WAVE_LAUNCH_MODE HSA_DBG_WAVE_LAUNCH_MODE_NUM } HSA_DBG_WAVE_LAUNCH_MODE; +/** + * There are no flags currently defined. + */ typedef enum HSA_DBG_NODE_CONTROL { - HSA_DBG_NODE_CONTROL_NO_GRACE_PERIOD = 0x01, - HSA_DBG_NODE_CONTROL_MEMORY_FENCE = 0x02, - HSA_DBG_NODE_CONTROL_UPDATE_CONTEXT = 0x04, + HSA_DBG_NODE_CONTROL_FLAG_MAX = 0x01 } HSA_DBG_NODE_CONTROL; diff --git a/include/linux/kfd_ioctl.h b/include/linux/kfd_ioctl.h index 2eb86860da..177365d17b 100644 --- a/include/linux/kfd_ioctl.h +++ b/include/linux/kfd_ioctl.h @@ -188,64 +188,61 @@ struct kfd_ioctl_dbg_wave_control_args { }; /* KFD_IOC_DBG_TRAP_ENABLE: + * ptr: unused * data1: 0=disable, 1=enable * data2: queue ID (for future use) * data3: unused - * data4: unused */ #define KFD_IOC_DBG_TRAP_ENABLE 0 /* KFD_IOC_DBG_TRAP_SET_TRAP_DATA: + * ptr: unused * data1: SPI_GDBG_TRAP_DATA0 * data2: SPI_GDBG_TRAP_DATA1 * data3: unused - * data4: unused */ #define KFD_IOC_DBG_TRAP_SET_TRAP_DATA 1 /* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: + * ptr: unused * data1: override mode: 0=OR, 1=REPLACE * data2: mask * data3: unused - * data4: unused */ #define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE 2 /* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: + * ptr: unused * data1: 0=normal, 1=halt, 2=kill, 3=singlestep, 4=disable * data2: unused * data3: unused - * data4: unused */ #define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE 3 - -#define KFD__DBG_NODE_SUSPEND_NO_GRACE 0x01 -#define KFD__DBG_NODE_SUSPEND_MEMORY_FENCE 0x02 -#define KFD__DBG_NODE_SUSPEND_UPDATE_CONTEXT 0x04 /* KFD_IOC_DBG_TRAP_NODE_SUSPEND: - * data1: pid - * data2: nodeid - * data3: flags no_grace=0x01 memory_fence=0x02 update_context=0x04 - * data4: unused + * ptr: pointer to an array of Queues IDs + * data1: flags + * data2: number of queues + * data3: grace period */ #define KFD_IOC_DBG_TRAP_NODE_SUSPEND 4 /* KFD_IOC_DBG_TRAP_NODE_RESUME: - * data1: pid - * data2: nodeid - * data3: flags no_grace=0x01 memory_fence=0x02 update_context=0x04 - * data4: unused + * ptr: pointer to an array of Queues IDs + * data1: flags + * data2: number of queues + * data3: unused */ #define KFD_IOC_DBG_TRAP_NODE_RESUME 5 struct kfd_ioctl_dbg_trap_args { - __u32 gpu_id; /* to KFD */ - __u32 op; /* to KFD */ - __u32 data1; /* to KFD */ - __u32 data2; /* to KFD */ - __u32 data3; /* to KFD */ - __u32 data4; /* to KFD */ + __u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */ + __u32 pid; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 op; /* to KFD */ + __u32 data1; /* to KFD */ + __u32 data2; /* to KFD */ + __u32 data3; /* to KFD */ }; /* Matching HSA_EVENTTYPE */ diff --git a/src/debug.c b/src/debug.c index 521aaaaf14..64e68891c6 100644 --- a/src/debug.c +++ b/src/debug.c @@ -273,7 +273,10 @@ static HSAKMT_STATUS debug_trap(HSAuint32 NodeId, HSAuint32 op, HSAuint32 data1, HSAuint32 data2, - HSAuint32 data3) + HSAuint32 data3, + HSAuint32 pid, + HSAuint64 pointer + ) { uint32_t gpu_id; HSAKMT_STATUS result; @@ -282,16 +285,29 @@ static HSAKMT_STATUS debug_trap(HSAuint32 NodeId, CHECK_KFD_OPEN(); - if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) - return HSAKMT_STATUS_INVALID_HANDLE; + if (op == KFD_IOC_DBG_TRAP_NODE_RESUME || + op == KFD_IOC_DBG_TRAP_NODE_RESUME) { + if (NodeId != INVALID_NODEID) + return HSAKMT_STATUS_INVALID_HANDLE; - result = hsaKmtGetNodeProperties(NodeId, &NodeProperties); + // gpu_id is ignored for suspend/resume queues. + gpu_id = INVALID_NODEID; + } else { + if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS) + return HSAKMT_STATUS_INVALID_HANDLE; - if (result != HSAKMT_STATUS_SUCCESS) - return result; + result = hsaKmtGetNodeProperties(NodeId, &NodeProperties); - if (!NodeProperties.Capability.ui32.DebugTrapSupported) - return HSAKMT_STATUS_NOT_SUPPORTED; + if (result != HSAKMT_STATUS_SUCCESS) + return result; + + if (!NodeProperties.Capability.ui32.DebugTrapSupported) + return HSAKMT_STATUS_NOT_SUPPORTED; + } + + if (pid == INVALID_PID) { + pid = (HSAuint32) getpid(); + } memset(&args, 0x00, sizeof(args)); args.gpu_id = gpu_id; @@ -299,7 +315,8 @@ static HSAKMT_STATUS debug_trap(HSAuint32 NodeId, args.data1 = data1; args.data2 = data2; args.data3 = data3; - args.data4 = 0; + args.pid = pid; + args.ptr = pointer; long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args); @@ -317,12 +334,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtEnableDebugTrap(HSAuint32 NodeId, if (QueueId != INVALID_QUEUEID) return HSAKMT_STATUS_NOT_SUPPORTED; - return debug_trap(NodeId, KFD_IOC_DBG_TRAP_ENABLE, 1, QueueId, 0); + return debug_trap(NodeId, + KFD_IOC_DBG_TRAP_ENABLE, + 1, + QueueId, + 0, + INVALID_PID, + 0); } HSAKMT_STATUS HSAKMTAPI hsaKmtDisableDebugTrap(HSAuint32 NodeId) { - return debug_trap(NodeId, KFD_IOC_DBG_TRAP_ENABLE, 0, 0, 0); + return debug_trap(NodeId, + KFD_IOC_DBG_TRAP_ENABLE, + 0, + 0, + 0, + INVALID_PID, + 0); } HSAKMT_STATUS HSAKMTAPI hsaKmtSetDebugTrapData2(HSAuint32 NodeId, @@ -333,6 +362,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetDebugTrapData2(HSAuint32 NodeId, KFD_IOC_DBG_TRAP_SET_TRAP_DATA, TrapData0, TrapData1, + 0, + INVALID_PID, 0); } @@ -348,6 +379,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetWaveLaunchTrapOverride( KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE, TrapOverride, TrapMask, + 0, + INVALID_PID, 0); } @@ -359,51 +392,115 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetWaveLaunchMode( KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE, WaveLaunchMode, 0, + 0, + INVALID_PID, 0); } -HSAKMT_STATUS HSAKMTAPI hsaKmtNodeSuspend( - HSAuint32 Pid, - HSAuint32 NodeId, - HSAuint32 Flags) -{ - pid_t current_pid = INVALID_PID; +/** + * Suspend the execution of a set of queues. A queue that is suspended + * allows the wave context save state to be inspected and modified. If a + * queue is already suspended it remains suspended. A suspended queue + * can be resumed by hsaKmtDbgQueueResume(). + * + * For each node that has a queue suspended, a sequentially consistent + * system scope release will be performed that synchronizes with a + * sequentially consistent system scope acquire performed by this + * call. This ensures any memory updates performed by the suspended + * queues are visible to the thread calling this operation. + * + * Pid is the process that owns the queues that are to be supended or + * resumed. If the value is -1 then the Pid of the process calling + * hsaKmtQueueSuspend or hsaKmtQueueResume is used. + * + * NumQueues is the number of queues that are being requested to + * suspend or resume. + * + * Queues is a pointer to an array with NumQueues entries of + * HSA_QUEUEID. The queues in the list must be for queues the exist + * for Pid, and can be a mixture of queues for different nodes. + * + * GracePeriod is the number of milliseconds to wait after + * initialiating context save before forcing waves to context save. A + * value of 0 indicates no grace period. It is ignored by + * hsaKmtQueueResume. + * + * Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL. + * Returns: + * - HSAKMT_STATUS_SUCCESS if successful. + * - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid for Pid. + */ +HSAKMT_STATUS +HSAKMTAPI +hsaKmtQueueSuspend( + HSAuint32 Pid, // IN + HSAuint32 NumQueues, // IN + HSA_QUEUEID *Queues, // IN + HSAuint32 GracePeriod, // IN + HSAuint32 Flags) // IN +{ CHECK_KFD_OPEN(); - if (Pid == INVALID_PID) { - current_pid = getpid(); - if (current_pid == 0) - return HSAKMT_STATUS_INVALID_HANDLE; - Pid = (HSAuint32) current_pid; - } - - return debug_trap(NodeId, - KFD_IOC_DBG_TRAP_NODE_SUSPEND, - Pid, - NodeId, - Flags); -} - -HSAKMT_STATUS HSAKMTAPI hsaKmtNodeResume( - HSAuint32 Pid, - HSAuint32 NodeId, - HSAuint32 Flags) -{ - pid_t current_pid = INVALID_PID; - - CHECK_KFD_OPEN(); - - if (Pid == INVALID_PID) { - current_pid = getpid(); - if (current_pid == 0) - return HSAKMT_STATUS_INVALID_HANDLE; - Pid = (HSAuint32) current_pid; - } - - return debug_trap(NodeId, + return debug_trap(INVALID_NODEID, KFD_IOC_DBG_TRAP_NODE_RESUME, + Flags, + NumQueues, + GracePeriod, Pid, - NodeId, - Flags); + (HSAuint64)Queues); +} +/** + * Resume the execution of a set of queues. If a queue is not + * suspended by hsaKmtDbgQueueSuspend() then it remains executing. Any + * changes to the wave state data will be used when the waves are + * restored. Changes to the control stack data will have no effect. + * + * For each node that has a queue resumed, a sequentially consistent + * system scope release will be performed that synchronizes with a + * sequentially consistent system scope acquire performed by all + * queues being resumed. This ensures any memory updates performed by + * the thread calling this operation are visible to the resumed + * queues. + * + * For each node that has a queue resumed, the instruction cache will + * be invalidated. This ensures any instruction code updates performed + * by the thread calling this operation are visible to the resumed + * queues. + * + * Pid is the process that owns the queues that are to be supended or + * resumed. If the value is -1 then the Pid of the process calling + * hsaKmtQueueSuspend or hsaKmtQueueResume is used. + * + * NumQueues is the number of queues that are being requested to + * suspend or resume. + * + * Queues is a pointer to an array with NumQueues entries of + * HSA_QUEUEID. The queues in the list must be for queues the exist + * for Pid, and can be a mixture of queues for different nodes. + * + * Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL. + * Returns: + * - HSAKMT_STATUS_SUCCESS if successful + * - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid. + */ + +HSAKMT_STATUS +HSAKMTAPI +hsaKmtQueueResume( + HSAuint32 Pid, // IN + HSAuint32 NumQueues, // IN + HSA_QUEUEID *Queues, // IN + HSAuint32 Flags) // IN +{ + + CHECK_KFD_OPEN(); + + return debug_trap(INVALID_NODEID, + KFD_IOC_DBG_TRAP_NODE_RESUME, + Flags, + NumQueues, + 0, + Pid, + (HSAuint64) Queues); } diff --git a/src/libhsakmt.ver b/src/libhsakmt.ver index 12bb825f7f..8e9b745042 100644 --- a/src/libhsakmt.ver +++ b/src/libhsakmt.ver @@ -62,8 +62,8 @@ hsaKmtDisableDebugTrap; hsaKmtSetDebugTrapData2; hsaKmtSetWaveLaunchTrapOverride; hsaKmtSetWaveLaunchMode; -hsaKmtNodeSuspend; -hsaKmtNodeResume; +hsaKmtQueueSuspend; +hsaKmtQueueResume; local: *; }; diff --git a/tests/kfdtest/src/KFDDBGTest.cpp b/tests/kfdtest/src/KFDDBGTest.cpp index 98a0e3db0d..e27868856b 100644 --- a/tests/kfdtest/src/KFDDBGTest.cpp +++ b/tests/kfdtest/src/KFDDBGTest.cpp @@ -208,7 +208,7 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) { ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; - HSAuint32 Flags = HSA_DBG_NODE_CONTROL_NO_GRACE_PERIOD; + HSAuint32 Flags = 0; HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); HsaMemoryBuffer iterateBuf(PAGE_SIZE, defaultGPUNode, true, false, false); HsaMemoryBuffer resultBuf(PAGE_SIZE, defaultGPUNode, true, false, false); @@ -222,6 +222,7 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) { m_pIsaGen->CompileShader(iterate_isa_gfx9, "iterate_isa", isaBuffer); PM4Queue queue1; + HSA_QUEUEID queue_ids[2]; ASSERT_SUCCESS(queue1.Create(defaultGPUNode)); @@ -242,9 +243,13 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) { // Submit the shader, queue1 dispatch1->Submit(queue1); + queue_ids[0] = 0; - ASSERT_SUCCESS(hsaKmtNodeSuspend(INVALID_PID, - defaultGPUNode, + ASSERT_SUCCESS(hsaKmtQueueSuspend( + INVALID_PID, + 1, // one queue + queue_ids, + 10, // grace period Flags)); syncStatus = dispatch1->SyncWithStatus(suspendTimeout); @@ -263,8 +268,10 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) { ASSERT_NE(iter[0], result[0]); - ASSERT_SUCCESS(hsaKmtNodeResume(INVALID_PID, - defaultGPUNode, + ASSERT_SUCCESS(hsaKmtQueueResume( + INVALID_PID, + 1, // Num queues + queue_ids, Flags)); dispatch1->Sync();