libhsakmt: Update wave suspend/resume API

This is updating to the new suspend and resume API for the
KFD and the thunk.  We now support passing in a list of queues
to suspend, and not just all of the queues for the process.

The kfdtest testcase was also updated so it still compiles.

Change-Id: I71d1b178476bd9df0c311bdedaa6a891528cebcf
Signed-off-by: Philip Cox <Philip.Cox@amd.com>
Este commit está contenido en:
Philip Cox
2019-03-29 14:42:53 -04:00
padre d21e9d5bbd
commit c2c1385e29
Se han modificado 6 ficheros con 244 adiciones y 108 borrados
+60 -26
Ver fichero
@@ -624,54 +624,88 @@ hsaKmtDbgAddressWatch(
/**
Suspend the execution of a set of queues. A queue that is suspended
allows the context save state to be inspected and modified. If a
allows the wave context save state to be inspected and modified. If a
queue is already suspended it remains suspended. A suspended queue
can be resumed by hsaKmtDbgQueueResume().
If NoGracePeriod is false then the default grace period used for
waiting for waves to complete before context switching is used. If
NoGracePeriod is true then no grace period us used and waves are
context saved as soon as possible.
If MemFence is true all queues being suspended will perform a
sequentially consistent system scope release that synchronizes with
a sequentially consistent system scope acquire performed by this
For each node that has a queue suspended, a sequentially consistent
system scope release will be performed that synchronizes with a
sequentially consistent system scope acquire performed by this
call. This ensures any memory updates performed by the suspended
queues are visible to the thread calling this operation.
Pid is the process that owns the queues that are to be supended or
resumed. If the value is -1 then the Pid of the process calling
hsaKmtQueueSuspend or hsaKmtQueueResume is used.
NumQueues is the number of queues that are being requested to
suspend or resume.
Queues is a pointer to an array with NumQueues entries of
HSA_QUEUEID. The queues in the list must be for queues that exist
for Pid, and can be a mixture of queues for different nodes.
GracePeriod to wait after initialiating context save before forcing
waves to context save. A value of 0 indicates no grace period.
It is ignored by hsaKmtQueueResume.
Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL.
Returns:
- HSAKMT_STATUS_SUCCESS if successful.
- HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid.
- HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid for Pid.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtNodeSuspend(
HSAuint32 Pid,
HSAuint32 NodeId,
HSAuint32 Flags);
HSAKMTAPI
hsaKmtQueueSuspend(
HSAuint32 Pid, // IN
HSAuint32 NumQueues, // IN
HSA_QUEUEID *Queues, // IN
HSAuint32 GracePeriod, // IN
HSAuint32 Flags); // IN
/**
Resume the execution of a set of queues. If a queue is not
suspended by hsaKmtDbgQueueSuspend() then it remains executing.
suspended by hsaKmtDbgQueueSuspend() then it remains executing. Any
changes to the wave state data will be used when the waves are
restored. Changes to the control stack data will have no effect.
If MemFence is true this call will perform a sequentially
consistent system scope release that synchronizes with a
For each node that has a queue resumed, a sequentially consistent
system scope release will be performed that synchronizes with a
sequentially consistent system scope acquire performed by all
queues being resumed. This ensures any memory updates performed by
the thread calling this operation are visible to the resumed
queues.
Returns:
- HSAKMT_STATUS_SUCCESS if successful.
For each node that has a queue resumed, the instruction cache will
be invalidated. This ensures any instruction code updates performed
by the thread calling this operation are visible to the resumed
queues.
Pid is the process that owns the queues that are to be supended or
resumed. If the value is -1 then the Pid of the process calling
hsaKmtQueueSuspend or hsaKmtQueueResume is used.
NumQueues is the number of queues that are being requested to
suspend or resume.
Queues is a pointer to an array with NumQueues entries of
HSA_QUEUEID. The queues in the list must be for queues that exist
for Pid, and can be a mixture of queues for different nodes.
Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL.
Returns:
- HSAKMT_STATUS_SUCCESS if successful
- HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtNodeResume(
HSAuint32 Pid,
HSAuint32 NodeId,
HSAuint32 Flags);
HSAKMTAPI
hsaKmtQueueResume(
HSAuint32 Pid, // IN
HSAuint32 NumQueues, // IN
HSA_QUEUEID *Queues, // IN
HSAuint32 Flags); // IN
/**
Enable debug trap for NodeId. If QueueId is INVALID_QUEUEID then
+4 -3
Ver fichero
@@ -804,10 +804,11 @@ typedef enum _HSA_DBG_WAVE_LAUNCH_MODE
HSA_DBG_WAVE_LAUNCH_MODE_NUM
} HSA_DBG_WAVE_LAUNCH_MODE;
/**
* There are no flags currently defined.
*/
typedef enum HSA_DBG_NODE_CONTROL {
HSA_DBG_NODE_CONTROL_NO_GRACE_PERIOD = 0x01,
HSA_DBG_NODE_CONTROL_MEMORY_FENCE = 0x02,
HSA_DBG_NODE_CONTROL_UPDATE_CONTEXT = 0x04,
HSA_DBG_NODE_CONTROL_FLAG_MAX = 0x01
} HSA_DBG_NODE_CONTROL;
+19 -22
Ver fichero
@@ -188,64 +188,61 @@ struct kfd_ioctl_dbg_wave_control_args {
};
/* KFD_IOC_DBG_TRAP_ENABLE:
* ptr: unused
* data1: 0=disable, 1=enable
* data2: queue ID (for future use)
* data3: unused
* data4: unused
*/
#define KFD_IOC_DBG_TRAP_ENABLE 0
/* KFD_IOC_DBG_TRAP_SET_TRAP_DATA:
* ptr: unused
* data1: SPI_GDBG_TRAP_DATA0
* data2: SPI_GDBG_TRAP_DATA1
* data3: unused
* data4: unused
*/
#define KFD_IOC_DBG_TRAP_SET_TRAP_DATA 1
/* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
* ptr: unused
* data1: override mode: 0=OR, 1=REPLACE
* data2: mask
* data3: unused
* data4: unused
*/
#define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE 2
/* KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
* ptr: unused
* data1: 0=normal, 1=halt, 2=kill, 3=singlestep, 4=disable
* data2: unused
* data3: unused
* data4: unused
*/
#define KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE 3
#define KFD__DBG_NODE_SUSPEND_NO_GRACE 0x01
#define KFD__DBG_NODE_SUSPEND_MEMORY_FENCE 0x02
#define KFD__DBG_NODE_SUSPEND_UPDATE_CONTEXT 0x04
/* KFD_IOC_DBG_TRAP_NODE_SUSPEND:
* data1: pid
* data2: nodeid
* data3: flags no_grace=0x01 memory_fence=0x02 update_context=0x04
* data4: unused
* ptr: pointer to an array of Queues IDs
* data1: flags
* data2: number of queues
* data3: grace period
*/
#define KFD_IOC_DBG_TRAP_NODE_SUSPEND 4
/* KFD_IOC_DBG_TRAP_NODE_RESUME:
* data1: pid
* data2: nodeid
* data3: flags no_grace=0x01 memory_fence=0x02 update_context=0x04
* data4: unused
* ptr: pointer to an array of Queues IDs
* data1: flags
* data2: number of queues
* data3: unused
*/
#define KFD_IOC_DBG_TRAP_NODE_RESUME 5
struct kfd_ioctl_dbg_trap_args {
__u32 gpu_id; /* to KFD */
__u32 op; /* to KFD */
__u32 data1; /* to KFD */
__u32 data2; /* to KFD */
__u32 data3; /* to KFD */
__u32 data4; /* to KFD */
__u64 ptr; /* to KFD -- used for pointer arguments: queue arrays */
__u32 pid; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 op; /* to KFD */
__u32 data1; /* to KFD */
__u32 data2; /* to KFD */
__u32 data3; /* to KFD */
};
/* Matching HSA_EVENTTYPE */
+147 -50
Ver fichero
@@ -273,7 +273,10 @@ static HSAKMT_STATUS debug_trap(HSAuint32 NodeId,
HSAuint32 op,
HSAuint32 data1,
HSAuint32 data2,
HSAuint32 data3)
HSAuint32 data3,
HSAuint32 pid,
HSAuint64 pointer
)
{
uint32_t gpu_id;
HSAKMT_STATUS result;
@@ -282,16 +285,29 @@ static HSAKMT_STATUS debug_trap(HSAuint32 NodeId,
CHECK_KFD_OPEN();
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_HANDLE;
if (op == KFD_IOC_DBG_TRAP_NODE_RESUME ||
op == KFD_IOC_DBG_TRAP_NODE_RESUME) {
if (NodeId != INVALID_NODEID)
return HSAKMT_STATUS_INVALID_HANDLE;
result = hsaKmtGetNodeProperties(NodeId, &NodeProperties);
// gpu_id is ignored for suspend/resume queues.
gpu_id = INVALID_NODEID;
} else {
if (validate_nodeid(NodeId, &gpu_id) != HSAKMT_STATUS_SUCCESS)
return HSAKMT_STATUS_INVALID_HANDLE;
if (result != HSAKMT_STATUS_SUCCESS)
return result;
result = hsaKmtGetNodeProperties(NodeId, &NodeProperties);
if (!NodeProperties.Capability.ui32.DebugTrapSupported)
return HSAKMT_STATUS_NOT_SUPPORTED;
if (result != HSAKMT_STATUS_SUCCESS)
return result;
if (!NodeProperties.Capability.ui32.DebugTrapSupported)
return HSAKMT_STATUS_NOT_SUPPORTED;
}
if (pid == INVALID_PID) {
pid = (HSAuint32) getpid();
}
memset(&args, 0x00, sizeof(args));
args.gpu_id = gpu_id;
@@ -299,7 +315,8 @@ static HSAKMT_STATUS debug_trap(HSAuint32 NodeId,
args.data1 = data1;
args.data2 = data2;
args.data3 = data3;
args.data4 = 0;
args.pid = pid;
args.ptr = pointer;
long err = kmtIoctl(kfd_fd, AMDKFD_IOC_DBG_TRAP, &args);
@@ -317,12 +334,24 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtEnableDebugTrap(HSAuint32 NodeId,
if (QueueId != INVALID_QUEUEID)
return HSAKMT_STATUS_NOT_SUPPORTED;
return debug_trap(NodeId, KFD_IOC_DBG_TRAP_ENABLE, 1, QueueId, 0);
return debug_trap(NodeId,
KFD_IOC_DBG_TRAP_ENABLE,
1,
QueueId,
0,
INVALID_PID,
0);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtDisableDebugTrap(HSAuint32 NodeId)
{
return debug_trap(NodeId, KFD_IOC_DBG_TRAP_ENABLE, 0, 0, 0);
return debug_trap(NodeId,
KFD_IOC_DBG_TRAP_ENABLE,
0,
0,
0,
INVALID_PID,
0);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtSetDebugTrapData2(HSAuint32 NodeId,
@@ -333,6 +362,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetDebugTrapData2(HSAuint32 NodeId,
KFD_IOC_DBG_TRAP_SET_TRAP_DATA,
TrapData0,
TrapData1,
0,
INVALID_PID,
0);
}
@@ -348,6 +379,8 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetWaveLaunchTrapOverride(
KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE,
TrapOverride,
TrapMask,
0,
INVALID_PID,
0);
}
@@ -359,51 +392,115 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtSetWaveLaunchMode(
KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE,
WaveLaunchMode,
0,
0,
INVALID_PID,
0);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtNodeSuspend(
HSAuint32 Pid,
HSAuint32 NodeId,
HSAuint32 Flags)
{
pid_t current_pid = INVALID_PID;
/**
* Suspend the execution of a set of queues. A queue that is suspended
* allows the wave context save state to be inspected and modified. If a
* queue is already suspended it remains suspended. A suspended queue
* can be resumed by hsaKmtDbgQueueResume().
*
* For each node that has a queue suspended, a sequentially consistent
* system scope release will be performed that synchronizes with a
* sequentially consistent system scope acquire performed by this
* call. This ensures any memory updates performed by the suspended
* queues are visible to the thread calling this operation.
*
* Pid is the process that owns the queues that are to be supended or
* resumed. If the value is -1 then the Pid of the process calling
* hsaKmtQueueSuspend or hsaKmtQueueResume is used.
*
* NumQueues is the number of queues that are being requested to
* suspend or resume.
*
* Queues is a pointer to an array with NumQueues entries of
* HSA_QUEUEID. The queues in the list must be for queues the exist
* for Pid, and can be a mixture of queues for different nodes.
*
* GracePeriod is the number of milliseconds to wait after
* initialiating context save before forcing waves to context save. A
* value of 0 indicates no grace period. It is ignored by
* hsaKmtQueueResume.
*
* Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL.
* Returns:
* - HSAKMT_STATUS_SUCCESS if successful.
* - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid for Pid.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueueSuspend(
HSAuint32 Pid, // IN
HSAuint32 NumQueues, // IN
HSA_QUEUEID *Queues, // IN
HSAuint32 GracePeriod, // IN
HSAuint32 Flags) // IN
{
CHECK_KFD_OPEN();
if (Pid == INVALID_PID) {
current_pid = getpid();
if (current_pid == 0)
return HSAKMT_STATUS_INVALID_HANDLE;
Pid = (HSAuint32) current_pid;
}
return debug_trap(NodeId,
KFD_IOC_DBG_TRAP_NODE_SUSPEND,
Pid,
NodeId,
Flags);
}
HSAKMT_STATUS HSAKMTAPI hsaKmtNodeResume(
HSAuint32 Pid,
HSAuint32 NodeId,
HSAuint32 Flags)
{
pid_t current_pid = INVALID_PID;
CHECK_KFD_OPEN();
if (Pid == INVALID_PID) {
current_pid = getpid();
if (current_pid == 0)
return HSAKMT_STATUS_INVALID_HANDLE;
Pid = (HSAuint32) current_pid;
}
return debug_trap(NodeId,
return debug_trap(INVALID_NODEID,
KFD_IOC_DBG_TRAP_NODE_RESUME,
Flags,
NumQueues,
GracePeriod,
Pid,
NodeId,
Flags);
(HSAuint64)Queues);
}
/**
* Resume the execution of a set of queues. If a queue is not
* suspended by hsaKmtDbgQueueSuspend() then it remains executing. Any
* changes to the wave state data will be used when the waves are
* restored. Changes to the control stack data will have no effect.
*
* For each node that has a queue resumed, a sequentially consistent
* system scope release will be performed that synchronizes with a
* sequentially consistent system scope acquire performed by all
* queues being resumed. This ensures any memory updates performed by
* the thread calling this operation are visible to the resumed
* queues.
*
* For each node that has a queue resumed, the instruction cache will
* be invalidated. This ensures any instruction code updates performed
* by the thread calling this operation are visible to the resumed
* queues.
*
* Pid is the process that owns the queues that are to be supended or
* resumed. If the value is -1 then the Pid of the process calling
* hsaKmtQueueSuspend or hsaKmtQueueResume is used.
*
* NumQueues is the number of queues that are being requested to
* suspend or resume.
*
* Queues is a pointer to an array with NumQueues entries of
* HSA_QUEUEID. The queues in the list must be for queues the exist
* for Pid, and can be a mixture of queues for different nodes.
*
* Flags is a bit set of the values defined by HSA_DBG_NODE_CONTROL.
* Returns:
* - HSAKMT_STATUS_SUCCESS if successful
* - HSAKMT_STATUS_INVALID_HANDLE if any QueueId is invalid.
*/
HSAKMT_STATUS
HSAKMTAPI
hsaKmtQueueResume(
HSAuint32 Pid, // IN
HSAuint32 NumQueues, // IN
HSA_QUEUEID *Queues, // IN
HSAuint32 Flags) // IN
{
CHECK_KFD_OPEN();
return debug_trap(INVALID_NODEID,
KFD_IOC_DBG_TRAP_NODE_RESUME,
Flags,
NumQueues,
0,
Pid,
(HSAuint64) Queues);
}
+2 -2
Ver fichero
@@ -62,8 +62,8 @@ hsaKmtDisableDebugTrap;
hsaKmtSetDebugTrapData2;
hsaKmtSetWaveLaunchTrapOverride;
hsaKmtSetWaveLaunchMode;
hsaKmtNodeSuspend;
hsaKmtNodeResume;
hsaKmtQueueSuspend;
hsaKmtQueueResume;
local: *;
};
+12 -5
Ver fichero
@@ -208,7 +208,7 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) {
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
HSAuint32 Flags = HSA_DBG_NODE_CONTROL_NO_GRACE_PERIOD;
HSAuint32 Flags = 0;
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer iterateBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
HsaMemoryBuffer resultBuf(PAGE_SIZE, defaultGPUNode, true, false, false);
@@ -222,6 +222,7 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) {
m_pIsaGen->CompileShader(iterate_isa_gfx9, "iterate_isa", isaBuffer);
PM4Queue queue1;
HSA_QUEUEID queue_ids[2];
ASSERT_SUCCESS(queue1.Create(defaultGPUNode));
@@ -242,9 +243,13 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) {
// Submit the shader, queue1
dispatch1->Submit(queue1);
queue_ids[0] = 0;
ASSERT_SUCCESS(hsaKmtNodeSuspend(INVALID_PID,
defaultGPUNode,
ASSERT_SUCCESS(hsaKmtQueueSuspend(
INVALID_PID,
1, // one queue
queue_ids,
10, // grace period
Flags));
syncStatus = dispatch1->SyncWithStatus(suspendTimeout);
@@ -263,8 +268,10 @@ TEST_F(KFDDBGTest, BasicDebuggerSuspendResume) {
ASSERT_NE(iter[0], result[0]);
ASSERT_SUCCESS(hsaKmtNodeResume(INVALID_PID,
defaultGPUNode,
ASSERT_SUCCESS(hsaKmtQueueResume(
INVALID_PID,
1, // Num queues
queue_ids,
Flags));
dispatch1->Sync();