kfdtest: Add KFD SDMA queue reset testing
The KFD can per-SDMA queue reset similar to compute queue reset. Add test.
This commit is contained in:
committed by
Kim, Jonathan
parent
d047708317
commit
ee890e7d2b
@@ -139,3 +139,127 @@ TEST_F(KFDNegativeTest, BasicPipeReset) {
|
||||
TEST_END
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic SDMA Reset
|
||||
*
|
||||
* To check SDMA queue reset, launch a healthy SDMA queue and a bad SDMA queue with
|
||||
* dispatches per SDMA engine.
|
||||
* Similar to compute queue reset, only processes that have bad SDMA queues should
|
||||
* be reset, leaving healthy SDMA queue unaffected.
|
||||
*
|
||||
*/
|
||||
TEST_F(KFDNegativeTest, BasicSDMAReset) {
|
||||
TEST_START(TESTPROFILE_RUNALL);
|
||||
|
||||
int gpuNode = m_NodeInfo.HsaDefaultGPUNode();
|
||||
ASSERT_GE(gpuNode, 0) << "failed to get default GPU Node";
|
||||
|
||||
const HsaNodeProperties *nodeProps = m_NodeInfo.GetNodeProperties(gpuNode);
|
||||
int totalEngines = nodeProps->NumSdmaEngines + nodeProps->NumSdmaXgmiEngines;
|
||||
bool perSDMAQueueResetSupported = nodeProps->Capability2.ui32.PerSDMAQueueResetSupported;
|
||||
|
||||
if (perSDMAQueueResetSupported) {
|
||||
int pipe1[2];
|
||||
int pipe2[2];
|
||||
pipe(pipe1);
|
||||
pipe(pipe2);
|
||||
|
||||
LOG() << std::dec << "Running SDMA queue reset on " << totalEngines
|
||||
<<" SDMA engines" << std::endl;
|
||||
|
||||
pid_t childPid = fork();
|
||||
|
||||
if (childPid == 0) {
|
||||
KFDBaseComponentTest::TearDown();
|
||||
KFDBaseComponentTest::SetUp();
|
||||
close(pipe1[1]); // Close write end of pipe1
|
||||
close(pipe2[0]); // Close read end of pipe2
|
||||
HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);
|
||||
unsigned int *dest = destBuf.As<unsigned int*>();
|
||||
for (int i = 0; i < totalEngines; i++) {
|
||||
HsaEvent *resetEvent;
|
||||
ASSERT_SUCCESS(CreateHWExceptionEvent(false, false, gpuNode, &resetEvent));
|
||||
|
||||
// wait for parent to schedule healthy queue on engine
|
||||
char buf1, buf2 ='x';
|
||||
read(pipe1[0], &buf1, 1);
|
||||
|
||||
// submit bad queue and destroy to trigger reset
|
||||
SDMAQueueByEngId queue(i);
|
||||
ASSERT_SUCCESS(queue.Create(gpuNode));
|
||||
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), &dest[0], 0, 6));
|
||||
Delay(50);
|
||||
LOG() << std::dec << "Reset SDMA queue on engine " << i << std::endl;
|
||||
queue.Destroy();
|
||||
|
||||
// child expects hw exception event
|
||||
EXPECT_SUCCESS(hsaKmtWaitOnEvent(resetEvent, g_TestTimeOut));
|
||||
EXPECT_EQ(resetEvent->EventData.EventType, HSA_EVENTTYPE_HW_EXCEPTION);
|
||||
hsaKmtDestroyEvent(resetEvent);
|
||||
|
||||
// ack reset to parent and wait for parent to check healthy queue
|
||||
write(pipe2[1], &buf2, 1);
|
||||
read(pipe1[0], &buf1, 1);
|
||||
}
|
||||
|
||||
close(pipe1[0]);
|
||||
close(pipe2[1]);
|
||||
LOG() << "Child ==> Complete" << std::endl;
|
||||
exit(0);
|
||||
} else {
|
||||
int childStatus = 0;
|
||||
close(pipe1[0]); // Close read end of pipe1
|
||||
close(pipe2[1]); // Close write end of pipe2
|
||||
|
||||
// parent process should not intercept reset event on child queue reset
|
||||
HsaMemoryBuffer pollBuf(PAGE_SIZE, gpuNode, false);
|
||||
HsaMemoryBuffer destBuf(PAGE_SIZE, gpuNode, false);
|
||||
unsigned int *poll = pollBuf.As<unsigned int*>();
|
||||
unsigned int *dest = destBuf.As<unsigned int*>();
|
||||
uint32_t targetDestValue = 0x12345678;
|
||||
|
||||
for (int i = 0; i < totalEngines; i++) {
|
||||
poll[0] = 0;
|
||||
dest[0] = 0;
|
||||
HsaEvent *event;
|
||||
HsaEvent *resetEvent;
|
||||
ASSERT_SUCCESS(CreateHWExceptionEvent(false, false, gpuNode, &resetEvent));
|
||||
ASSERT_SUCCESS(CreateQueueTypeEvent(false, false, gpuNode, &event));
|
||||
|
||||
SDMAQueueByEngId queue(i);
|
||||
ASSERT_SUCCESS(queue.Create(gpuNode));
|
||||
|
||||
// submit write on poll to maintain non-zero read/write pointer
|
||||
// in engine during reset
|
||||
queue.PlaceAndSubmitPacket(SDMAPollRegMemPacket(&poll[0], 1));
|
||||
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), &dest[0], targetDestValue));
|
||||
|
||||
// wait for for child to trigger reset on engine
|
||||
char buf1 = 'x', buf2;
|
||||
write(pipe1[1], &buf1, 1);
|
||||
read(pipe2[0], &buf2, 1);
|
||||
|
||||
// expect no reset event, then update poll to trigger write completion check
|
||||
EXPECT_NE(HSAKMT_STATUS_SUCCESS, hsaKmtWaitOnEvent(resetEvent, 100));
|
||||
poll[0] = 1;
|
||||
queue.Wait4PacketConsumption();
|
||||
EXPECT_TRUE(WaitOnValue(&dest[0], targetDestValue));
|
||||
hsaKmtDestroyEvent(event);
|
||||
hsaKmtDestroyEvent(resetEvent);
|
||||
EXPECT_SUCCESS(queue.Destroy());
|
||||
write(pipe1[1], &buf1, 1);
|
||||
}
|
||||
|
||||
waitpid(childPid, &childStatus, 0);
|
||||
close(pipe1[1]);
|
||||
close(pipe2[0]);
|
||||
LOG() << "Parent ==> Complete" << std::endl;
|
||||
}
|
||||
} else {
|
||||
LOG() << "Skipping test: Family ID 0x" << m_FamilyId
|
||||
<< " with per-sdma queue reset support = "
|
||||
<< perSDMAQueueResetSupported << std::endl;
|
||||
}
|
||||
|
||||
TEST_END
|
||||
}
|
||||
|
||||
@@ -28,6 +28,8 @@
|
||||
|
||||
#include "PM4Queue.hpp"
|
||||
#include "KFDBaseComponentTest.hpp"
|
||||
#include "SDMAQueueByEngId.hpp"
|
||||
#include "SDMAPacket.hpp"
|
||||
|
||||
class KFDNegativeTest : public KFDBaseComponentTest {
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user