Create KFDTest for SDMA Fault

The purpose of this KFDTest is to investigate the behaviour of an
SDMA queue when an invalid memory address is used.

v2: Don't wait for SDMA queue to finish - it won't finish because of
the gpuvm fault.

v3: Create kfd event before SDMA queue submission. This fix the issue
that gpuvm fault happens earlier than kfd event is created then KFD
exception handler can't find the kfd event (to wake up kfd test)

v4: Instead of using 0x12345678 as the invalid VA, map one page of
FB to gpu and unmap it. Use the mapped GPUVA as the
invalid address

Change-Id: I58af1511f75d869adddede302b238c2725f3fe5a
Signed-off-by: Ori Messinger <Ori.Messinger@amd.com>
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Este commit está contenido en:
Ori Messinger
2019-11-18 10:40:47 -05:00
cometido por Oak Zeng
padre dc784ba482
commit 46f5e83066
Se han modificado 2 ficheros con 105 adiciones y 0 borrados
+104
Ver fichero
@@ -113,6 +113,59 @@ queuefail:
queue.Destroy();
}
void KFDExceptionTest::TestSdmaException(int defaultGPUNode, void *pDst) {
SDMAQueue queue;
HsaEvent *vmFaultEvent;
HSAuint64 faultAddress, page_mask = ~((HSAuint64)PAGE_SIZE - 1);
HsaEventDescriptor eventDesc;
eventDesc.EventType = HSA_EVENTTYPE_MEMORY;
eventDesc.NodeId = defaultGPUNode;
eventDesc.SyncVar.SyncVar.UserData = NULL;
eventDesc.SyncVar.SyncVarSize = 0;
m_ChildStatus = queue.Create(defaultGPUNode);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Queue create failed" << std::endl;
return;
}
m_ChildStatus = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Event create failed" << std::endl;
goto queuefail;
}
queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(),
reinterpret_cast<void *>(pDst),
0x02020202));
m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut);
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "Wait failed. No Exception triggered" << std::endl;
goto eventfail;
}
if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) {
WARN() << "Unexpected Event Received " << vmFaultEvent->EventData.EventType
<< std::endl;
m_ChildStatus = HSAKMT_STATUS_ERROR;
goto eventfail;
}
faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress;
if (faultAddress != ((HSAuint64)pDst & page_mask) ) {
WARN() << "Unexpected Fault Address " << faultAddress
<< " expected " << ((HSAuint64)pDst & page_mask) << std::endl;
m_ChildStatus = HSAKMT_STATUS_ERROR;
}
eventfail:
hsaKmtDestroyEvent(vmFaultEvent);
queuefail:
queue.Destroy();
}
/* Test Bad Address access in a child process */
TEST_F(KFDExceptionTest, AddressFault) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
@@ -244,3 +297,54 @@ TEST_F(KFDExceptionTest, FaultStorm) {
TEST_END
}
/*
*/
TEST_F(KFDExceptionTest, SdmaQueueException) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL)
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
if (m_FamilyId == FAMILY_RV) {
LOG() << "Skipping test: IOMMU issues on Raven." << std::endl;
return;
}
HSAKMT_STATUS status;
m_ChildPid = fork();
if (m_ChildPid == 0) {
unsigned int* pDb = NULL;
unsigned int *nullPtr = NULL;
m_ChildStatus = hsaKmtOpenKFD();
if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) {
WARN() << "KFD open failed in child process" << std::endl;
return;
}
m_MemoryFlags.ui32.NonPaged = 1;
ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE, m_MemoryFlags,
reinterpret_cast<void**>(&pDb)));
// verify that pDb is not null before it's being used
ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer";
ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL));
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb));
TestSdmaException(defaultGPUNode, pDb);
EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, PAGE_SIZE));
} else {
int childStatus;
waitpid(m_ChildPid, &childStatus, 0);
if (is_dgpu()) {
EXPECT_EQ(WIFEXITED(childStatus), true);
EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS);
} else {
EXPECT_EQ(WIFSIGNALED(childStatus), true);
EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV);
}
}
TEST_END
}
+1
Ver fichero
@@ -54,6 +54,7 @@ class KFDExceptionTest : public KFDBaseComponentTest {
void TestMemoryException(int defaultGPUNode, HSAuint64 pSrc, HSAuint64 pDst,
unsigned int dimX = 1, unsigned int dimY = 1,
unsigned int dimZ = 1);
void TestSdmaException(int defaultGPUNode, void *pDst);
protected: // Members
pid_t m_ChildPid;