From 46f5e830660ea88a54e29f75e7e4ece06f5ae681 Mon Sep 17 00:00:00 2001 From: Ori Messinger Date: Mon, 18 Nov 2019 10:40:47 -0500 Subject: [PATCH] Create KFDTest for SDMA Fault The purpose of this KFDTest is to investigate the behaviour of an SDMA queue when an invalid memory address is used. v2: Don't wait for SDMA queue to finish - it won't finish because of the gpuvm fault. v3: Create kfd event before SDMA queue submission. This fix the issue that gpuvm fault happens earlier than kfd event is created then KFD exception handler can't find the kfd event (to wake up kfd test) v4: Instead of using 0x12345678 as the invalid VA, map one page of FB to gpu and unmap it. Use the mapped GPUVA as the invalid address Change-Id: I58af1511f75d869adddede302b238c2725f3fe5a Signed-off-by: Ori Messinger Signed-off-by: Oak Zeng --- tests/kfdtest/src/KFDExceptionTest.cpp | 104 +++++++++++++++++++++++++ tests/kfdtest/src/KFDExceptionTest.hpp | 1 + 2 files changed, 105 insertions(+) diff --git a/tests/kfdtest/src/KFDExceptionTest.cpp b/tests/kfdtest/src/KFDExceptionTest.cpp index 6656317304..4976d0e319 100644 --- a/tests/kfdtest/src/KFDExceptionTest.cpp +++ b/tests/kfdtest/src/KFDExceptionTest.cpp @@ -113,6 +113,59 @@ queuefail: queue.Destroy(); } +void KFDExceptionTest::TestSdmaException(int defaultGPUNode, void *pDst) { + SDMAQueue queue; + HsaEvent *vmFaultEvent; + HSAuint64 faultAddress, page_mask = ~((HSAuint64)PAGE_SIZE - 1); + + + HsaEventDescriptor eventDesc; + eventDesc.EventType = HSA_EVENTTYPE_MEMORY; + eventDesc.NodeId = defaultGPUNode; + eventDesc.SyncVar.SyncVar.UserData = NULL; + eventDesc.SyncVar.SyncVarSize = 0; + + m_ChildStatus = queue.Create(defaultGPUNode); + if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) { + WARN() << "Queue create failed" << std::endl; + return; + } + + m_ChildStatus = hsaKmtCreateEvent(&eventDesc, true, false, &vmFaultEvent); + if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) { + WARN() << "Event create failed" << std::endl; + goto queuefail; + } + + queue.PlaceAndSubmitPacket(SDMAWriteDataPacket(queue.GetFamilyId(), + reinterpret_cast(pDst), + 0x02020202)); + + m_ChildStatus = hsaKmtWaitOnEvent(vmFaultEvent, g_TestTimeOut); + if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) { + WARN() << "Wait failed. No Exception triggered" << std::endl; + goto eventfail; + } + + if (vmFaultEvent->EventData.EventType != HSA_EVENTTYPE_MEMORY) { + WARN() << "Unexpected Event Received " << vmFaultEvent->EventData.EventType + << std::endl; + m_ChildStatus = HSAKMT_STATUS_ERROR; + goto eventfail; + } + faultAddress = vmFaultEvent->EventData.EventData.MemoryAccessFault.VirtualAddress; + if (faultAddress != ((HSAuint64)pDst & page_mask) ) { + WARN() << "Unexpected Fault Address " << faultAddress + << " expected " << ((HSAuint64)pDst & page_mask) << std::endl; + m_ChildStatus = HSAKMT_STATUS_ERROR; + } + +eventfail: + hsaKmtDestroyEvent(vmFaultEvent); +queuefail: + queue.Destroy(); +} + /* Test Bad Address access in a child process */ TEST_F(KFDExceptionTest, AddressFault) { TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX); @@ -244,3 +297,54 @@ TEST_F(KFDExceptionTest, FaultStorm) { TEST_END } + +/* + */ +TEST_F(KFDExceptionTest, SdmaQueueException) { + TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX); + TEST_START(TESTPROFILE_RUNALL) + + int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); + ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; + + if (m_FamilyId == FAMILY_RV) { + LOG() << "Skipping test: IOMMU issues on Raven." << std::endl; + return; + } + + HSAKMT_STATUS status; + + m_ChildPid = fork(); + if (m_ChildPid == 0) { + unsigned int* pDb = NULL; + unsigned int *nullPtr = NULL; + m_ChildStatus = hsaKmtOpenKFD(); + if (m_ChildStatus != HSAKMT_STATUS_SUCCESS) { + WARN() << "KFD open failed in child process" << std::endl; + return; + } + m_MemoryFlags.ui32.NonPaged = 1; + ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, PAGE_SIZE, m_MemoryFlags, + reinterpret_cast(&pDb))); + // verify that pDb is not null before it's being used + ASSERT_NE(nullPtr, pDb) << "hsaKmtAllocMemory returned a null pointer"; + ASSERT_SUCCESS(hsaKmtMapMemoryToGPU(pDb, PAGE_SIZE, NULL)); + EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(pDb)); + + TestSdmaException(defaultGPUNode, pDb); + EXPECT_SUCCESS(hsaKmtFreeMemory(pDb, PAGE_SIZE)); + } else { + int childStatus; + + waitpid(m_ChildPid, &childStatus, 0); + if (is_dgpu()) { + EXPECT_EQ(WIFEXITED(childStatus), true); + EXPECT_EQ(WEXITSTATUS(childStatus), HSAKMT_STATUS_SUCCESS); + } else { + EXPECT_EQ(WIFSIGNALED(childStatus), true); + EXPECT_EQ(WTERMSIG(childStatus), SIGSEGV); + } + } + + TEST_END +} diff --git a/tests/kfdtest/src/KFDExceptionTest.hpp b/tests/kfdtest/src/KFDExceptionTest.hpp index bf6bd7812b..00b45fe5db 100644 --- a/tests/kfdtest/src/KFDExceptionTest.hpp +++ b/tests/kfdtest/src/KFDExceptionTest.hpp @@ -54,6 +54,7 @@ class KFDExceptionTest : public KFDBaseComponentTest { void TestMemoryException(int defaultGPUNode, HSAuint64 pSrc, HSAuint64 pDst, unsigned int dimX = 1, unsigned int dimY = 1, unsigned int dimZ = 1); + void TestSdmaException(int defaultGPUNode, void *pDst); protected: // Members pid_t m_ChildPid;