/* * Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * */ #include "KFDIPCTest.hpp" #include #include #include #include #include #include #include #include #include "PM4Queue.hpp" #include "PM4Packet.hpp" #include "SDMAQueue.hpp" #include "SDMAPacket.hpp" void KFDIPCTest::SetUp() { ROUTINE_START KFDBaseComponentTest::SetUp(); ROUTINE_END } void KFDIPCTest::TearDown() { ROUTINE_START KFDBaseComponentTest::TearDown(); ROUTINE_END } KFDIPCTest::~KFDIPCTest(void) { /* exit() is necessary for the child process. Otherwise when the * child process finishes, gtest assumes the test has finished and * starts the next test while the parent is still active. */ if (m_ChildPid == 0) exit(::testing::UnitTest::GetInstance()->current_test_info()->result()->Failed()); } /* Import shared Local Memory from parent process. Check for the pattern * filled in by the parent process. Then fill a new pattern. * * Check import handle has same HsaMemFlags as export handle to verify thunk and KFD * import export handle ioctl pass HsaMemFlags correctly. */ void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd, HsaMemFlags mflags) { /* Open KFD device for child process. This needs to called before * any memory definitions */ if (HSAKMT_STATUS_SUCCESS != hsaKmtOpenKFD()) exit(1); SDMAQueue sdmaQueue; HsaSharedMemoryHandle sharedHandleLM; HSAuint64 size = PAGE_SIZE, sharedSize; HsaMemoryBuffer tempSysBuffer(size, defaultGPUNode, false); HSAuint32 *sharedLocalBuffer = NULL; HsaMemMapFlags mapFlags = {0}; /* Read from Pipe the shared Handle. Import shared Local Memory */ ASSERT_GE(read(pipefd[0], reinterpret_cast(&sharedHandleLM), sizeof(sharedHandleLM)), 0); ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM, reinterpret_cast(&sharedLocalBuffer), &sharedSize)); ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sharedLocalBuffer, sharedSize, NULL, mapFlags, 1, reinterpret_cast(&defaultGPUNode))); /* Check for pattern in the shared Local Memory */ ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode)); size = size < sharedSize ? size : sharedSize; sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), tempSysBuffer.As(), sharedLocalBuffer, size)); sdmaQueue.Wait4PacketConsumption(); EXPECT_TRUE(WaitOnValue(tempSysBuffer.As(), 0xAAAAAAAA)); /* Fill in the Local Memory with different pattern */ sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sdmaQueue.GetFamilyId(), sharedLocalBuffer, 0xBBBBBBBB)); sdmaQueue.Wait4PacketConsumption(); HsaPointerInfo ptrInfo; EXPECT_SUCCESS(hsaKmtQueryPointerInfo(sharedLocalBuffer, &ptrInfo)); EXPECT_EQ(ptrInfo.Type, HSA_POINTER_REGISTERED_SHARED); EXPECT_EQ(ptrInfo.Node, (HSAuint32)defaultGPUNode); EXPECT_EQ(ptrInfo.GPUAddress, (HSAuint64)sharedLocalBuffer); EXPECT_EQ(ptrInfo.SizeInBytes, sharedSize); EXPECT_EQ(ptrInfo.MemFlags.Value, mflags.Value); /* Clean up */ EXPECT_SUCCESS(sdmaQueue.Destroy()); EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(sharedLocalBuffer)); EXPECT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer)); } /* Fill a pattern into Local Memory and share with the child process. * Then wait until Child process to exit and check for the new pattern * filled in by the child process. */ void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd, HsaMemFlags mflags) { HSAuint64 size = PAGE_SIZE, sharedSize; int status; HSAuint64 AlternateVAGPU; void *toShareLocalBuffer; HsaMemoryBuffer tempSysBuffer(PAGE_SIZE, defaultGPUNode, false); SDMAQueue sdmaQueue; HsaSharedMemoryHandle sharedHandleLM; HsaMemMapFlags mapFlags = {0}; ASSERT_SUCCESS(hsaKmtAllocMemory(defaultGPUNode, size, mflags, &toShareLocalBuffer)); /* Fill a Local Buffer with a pattern */ ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(toShareLocalBuffer, size, &AlternateVAGPU, mapFlags, 1, reinterpret_cast(&defaultGPUNode))); tempSysBuffer.Fill(0xAAAAAAAA); /* Copy pattern in Local Memory before sharing it */ ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode)); sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), toShareLocalBuffer, tempSysBuffer.As(), size)); sdmaQueue.Wait4PacketConsumption(); /* Share it with the child process */ ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer, size, &sharedHandleLM)); ASSERT_GE(write(pipefd[1], reinterpret_cast(&sharedHandleLM), sizeof(sharedHandleLM)), 0); /* Wait for the child to finish */ waitpid(cpid, &status, 0); EXPECT_EQ(WIFEXITED(status), 1); EXPECT_EQ(WEXITSTATUS(status), 0); /* Check for the new pattern filled in by child process */ sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(sdmaQueue.GetFamilyId(), tempSysBuffer.As(), toShareLocalBuffer, size)); sdmaQueue.Wait4PacketConsumption(); EXPECT_TRUE(WaitOnValue(tempSysBuffer.As(), 0xBBBBBBBB)); /* Clean up */ EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(toShareLocalBuffer)); EXPECT_SUCCESS(sdmaQueue.Destroy()); } /* Test IPC memory. * 1. Parent Process [Create/Fill] LocalMemory (LM) --share--> Child Process * 2. Child Process import LM and check for the pattern. * 3. Child Process fill in a new pattern and quit. * 4. Parent Process wait for the Child process to finish and then check for * the new pattern in LM * * IPC support is limited to Local Memory. */ TEST_F(KFDIPCTest, BasicTest) { TEST_START(TESTPROFILE_RUNALL) const std::vector& GpuNodes = m_NodeInfo.GetNodesWithGPU(); int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); int pipefd[2]; HsaMemFlags mflags = {0}; ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; if (!GetVramSize(defaultGPUNode)) { LOG() << "Skipping test: No VRAM found." << std::endl; return; } /* Test libhsakmt fork() clean up by defining some buffers. These * buffers gets duplicated in the child process but not are not valid * as it doesn't have proper mapping in GPU. The clean up code in libhsakmt * should handle it */ volatile HSAuint32 stackData[1]; HsaMemoryBuffer tmpSysBuffer(PAGE_SIZE, defaultGPUNode, false); HsaMemoryBuffer tmpUserptrBuffer((void *)&stackData[0], sizeof(HSAuint32)); /* Create Pipes for communicating shared handles */ ASSERT_EQ(pipe(pipefd), 0); /* Create a child process and share the above Local Memory with it */ mflags.ui32.NonPaged = 1; mflags.ui32.CoarseGrain = 1; m_ChildPid = fork(); if (m_ChildPid == 0) BasicTestChildProcess(defaultGPUNode, pipefd, mflags); /* Child Process */ else BasicTestParentProcess(defaultGPUNode, m_ChildPid, pipefd, mflags); /* Parent proces */ /* Code path executed by both parent and child with respective fds */ close(pipefd[1]); close(pipefd[0]); TEST_END } /* Cross Memory Attach Test. Memory Descriptor Array. * The following 2 2D-arrays describe the source and destination memory arrays used * by CMA test. The entry is only valid if Size != 0. Each of these buffers will be * filled intially with "FillPattern". After the test the srcRange is still expected * to have the same pattern. The dstRange is expected to have srcRange pattern. * * For e.g. for TEST_COUNT = 1, * srcRange has 2 buffers of size 0x1800. Buf1 filled with 0xA5A5A5A5 and Buf2 * filled with 0xAAAAAAAA * dstRange has 3 buffers of size 0x1000. All of them filled 0xFFFFFFFF. * After Copy: dstBuf1[0-0x1000] is expected to be 0xA5A5A5A5 * dstBuf2[0-0x800] is expected to be 0xA5A5A5A5 * dstBuf3[0x800-0x1000] is expected to be 0xAAAAAAAA * and dstBuf4[0x0-0x1000] is expected to be 0xAAAAAAAA * * For this CMA test, after copying only the first and the last of dstBuf is checked */ static testMemoryDescriptor srcRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = { { /* Memory Type Size FillPattern FirstItem Last item */ { CMA_MEM_TYPE_USERPTR, 0x801800, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_USERPTR, 0x1800, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA }, { CMA_MEM_TYPE_USERPTR, 0x0, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_USERPTR, 0x0, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, }, { { CMA_MEM_TYPE_SYSTEM, 0x208000, 0xDEADBEEF, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_SYSTEM, 0x4000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_SYSTEM, 0x6000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_SYSTEM, 0x2000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, }, { { CMA_MEM_TYPE_LOCAL_MEM, 0x800000, 0xDEADBEEF, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 }, } }; static testMemoryDescriptor dstRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = { { /* Memory Type Size FillPattern FirstItem Last item */ { CMA_MEM_TYPE_USERPTR, 0x801000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_USERPTR, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xAAAAAAAA }, { CMA_MEM_TYPE_USERPTR, 0x1000, 0xFFFFFFFF, 0xAAAAAAAA, 0xAAAAAAAA }, { CMA_MEM_TYPE_USERPTR, 0x0, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 }, }, { { CMA_MEM_TYPE_SYSTEM, 0x202000, 0xFFFFFFFF, 0xDEADBEEF, 0xDEADBEEF }, { CMA_MEM_TYPE_SYSTEM, 0x4000, 0xFFFFFFFF, 0xDEADBEEF, 0xDEADBEEF }, { CMA_MEM_TYPE_SYSTEM, 0x8000, 0xFFFFFFFF, 0xDEADBEEF, 0xA5A5A5A5 }, { CMA_MEM_TYPE_SYSTEM, 0x6000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 }, }, { { CMA_MEM_TYPE_LOCAL_MEM, 0x800000, 0xFFFFFFFF, 0xDEADBEEF, 0xDEADBEEF }, { CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 }, { CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 }, } }; KFDCMAArray::KFDCMAArray() : m_ValidCount(0), m_QueueArray(HSA_QUEUE_SDMA) { memset(m_MemArray, 0, sizeof(m_MemArray)); memset(m_HsaMemoryRange, 0, sizeof(m_HsaMemoryRange)); } CMA_TEST_STATUS KFDCMAArray::Destroy() { for (int i = 0; i < m_ValidCount; i++) { if (m_MemArray[i]) { void *userPtr; userPtr = m_MemArray[i]->GetUserPtr(); delete m_MemArray[i]; if (userPtr) free(userPtr); } } memset(m_MemArray, 0, sizeof(m_MemArray)); memset(m_HsaMemoryRange, 0, sizeof(m_HsaMemoryRange)); m_ValidCount = 0; return CMA_TEST_SUCCESS; } /* Initialize KFDCMAArray based on array of testMemoryDescriptor. Usually testMemoryDescriptor[] is * statically defined array by the user. Only items with non-zero size are considered valid */ CMA_TEST_STATUS KFDCMAArray::Init(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE], int node) { CMA_TEST_STATUS err = CMA_TEST_SUCCESS; memset(m_MemArray, 0, sizeof(m_MemArray)); memset(m_HsaMemoryRange, 0, sizeof(m_HsaMemoryRange)); m_ValidCount = 0; for (int i = 0; i < CMA_MEMORY_TEST_ARRAY_SIZE; i++) { if ((*memDescriptor)[i].m_MemSize == 0) continue; switch ((*memDescriptor)[i].m_MemType) { case CMA_MEM_TYPE_SYSTEM: m_MemArray[i] = new HsaMemoryBuffer((*memDescriptor)[i].m_MemSize, node); break; case CMA_MEM_TYPE_USERPTR: { void *userPtr = malloc((*memDescriptor)[i].m_MemSize); m_MemArray[i] = new HsaMemoryBuffer(userPtr, (*memDescriptor)[i].m_MemSize); break; } case CMA_MEM_TYPE_LOCAL_MEM: m_MemArray[i] = new HsaMemoryBuffer((*memDescriptor)[i].m_MemSize, node, false, true); break; } if (m_MemArray[i]) { m_HsaMemoryRange[i].MemoryAddress = m_MemArray[i]->As(); m_HsaMemoryRange[i].SizeInBytes = m_MemArray[i]->Size(); m_ValidCount++; } else { err = CMA_TEST_NOMEM; break; } } return err; } /* Fill each buffer of KFDCMAArray with the pattern described by testMemoryDescriptor[] */ void KFDCMAArray::FillPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]) { SDMAQueue sdmaQueue; bool queueCreated = false; unsigned int queueNode; for (int i = 0; i < m_ValidCount; i++) { if (m_MemArray[i]->isLocal()) m_MemArray[i]->Fill((*memDescriptor)[i].m_FillPattern, *m_QueueArray.GetQueue(m_MemArray[i]->Node())); else m_MemArray[i]->Fill((*memDescriptor)[i].m_FillPattern); } } /* Check the first and last item of each buffer in KFDCMAArray with the pattern described by * testMemoryDescriptor[]. Return 0 on success. */ CMA_TEST_STATUS KFDCMAArray::checkPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]) { HSAuint64 lastItem; CMA_TEST_STATUS ret = CMA_TEST_SUCCESS; unsigned int queueNode = 0; bool queueCreated = false; HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */); volatile HSAuint32 *tmp = tmpBuffer.As(); for (int i = 0; i < m_ValidCount; i++) { lastItem = m_MemArray[i]->Size(); lastItem -= sizeof(HSAuint32); if (m_MemArray[i]->isLocal()) { BaseQueue *sdmaQueue = m_QueueArray.GetQueue(m_MemArray[i]->Node()); if (!m_MemArray[i]->IsPattern(0, (*memDescriptor)[i].m_CheckFirstWordPattern, *sdmaQueue, tmp) || !m_MemArray[i]->IsPattern(lastItem, (*memDescriptor)[i].m_CheckLastWordPattern, *sdmaQueue, tmp)) { ret = CMA_CHECK_PATTERN_ERROR; break; } } else { if (!m_MemArray[i]->IsPattern(0, (*memDescriptor)[i].m_CheckFirstWordPattern) || !m_MemArray[i]->IsPattern(lastItem, (*memDescriptor)[i].m_CheckLastWordPattern)) { ret = CMA_CHECK_PATTERN_ERROR; break; } } } return ret; } /* Non-blocking read and write to avoid Test from hanging (block indefinitely) * if either server or client process exits due to assert failure */ static int write_non_block(int fd, const void *buf, int size) { int total_bytes = 0, cur_bytes = 0; int retries = 5; struct timespec tm = { 0, 10000000ULL }; const char *ptr = (const char *)buf; do { cur_bytes = write(fd, ptr, (size - total_bytes)); if (cur_bytes < 0 && errno != EAGAIN) return cur_bytes; if (cur_bytes > 0) { total_bytes += cur_bytes; ptr += cur_bytes; } if (total_bytes < size) nanosleep(&tm, NULL); } while (total_bytes < size && retries--); /* Check for overflow */ if (total_bytes > size) return -1; return total_bytes; } static int read_non_block(int fd, void *buf, int size) { int total_bytes = 0, cur_bytes = 0; int retries = 5; struct timespec tm = { 0, 100000000ULL }; char *ptr = reinterpret_cast(buf); do { cur_bytes = read(fd, ptr, (size - total_bytes)); if (cur_bytes < 0 && errno != EAGAIN) return cur_bytes; if (cur_bytes > 0) { total_bytes += cur_bytes; ptr += cur_bytes; } if (total_bytes < size) nanosleep(&tm, NULL); } while (total_bytes < size && retries--); if (total_bytes > size) return -1; return total_bytes; } /* Send HsaMemoryRange to another process that is connected via writePipe */ CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) { if (write_non_block(writePipe, reinterpret_cast(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) != sizeof(m_HsaMemoryRange)) return CMA_IPC_PIPE_ERROR; return CMA_TEST_SUCCESS; } /* Send HsaMemoryRange from another process and initialize KFDCMAArray */ CMA_TEST_STATUS KFDCMAArray::recvCMAArray(int readPipe) { int i; if (read_non_block(readPipe, reinterpret_cast(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) != sizeof(m_HsaMemoryRange)) return CMA_IPC_PIPE_ERROR; for (i = 0; i < CMA_MEMORY_TEST_ARRAY_SIZE; i++) { if (m_HsaMemoryRange[i].SizeInBytes) m_ValidCount++; } return CMA_TEST_SUCCESS; } CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachChildProcess(int defaultGPUNode, int writePipe, int readPipe, CMA_TEST_TYPE testType) { KFDCMAArray cmaLocalArray; char msg[16]; int testNo; CMA_TEST_STATUS status; /* Initialize and fill Local Buffer Array with a pattern. * READ_TEST: Send the Array to parent process. Wait for the parent * to finish reading and checking. Then move to next text case or * quit if last one. * WRITE_TEST: Send Local Buffer Array to parent process and and wait * for parent to write to it. Check for new pattern. Then move to next * case or quit if last one. */ for (testNo = 0; testNo < CMA_TEST_COUNT; testNo++) { if (testType == CMA_READ_TEST) { cmaLocalArray.Init(&srcRange[testNo], defaultGPUNode); cmaLocalArray.FillPattern(&srcRange[testNo]); } else { cmaLocalArray.Init(&dstRange[testNo], defaultGPUNode); cmaLocalArray.FillPattern(&dstRange[testNo]); } if (cmaLocalArray.sendCMAArray(writePipe) < 0) { status = CMA_IPC_PIPE_ERROR; break; } /* Wait until the test is over */ memset(msg, 0, sizeof(msg)); if (read_non_block(readPipe, msg, 4) < 0) { status = CMA_IPC_PIPE_ERROR; break; } if (!strcmp(msg, "CHCK")) status = cmaLocalArray.checkPattern(&dstRange[testNo]); else if (!strcmp(msg, "NEXT")) status = CMA_TEST_SUCCESS; else if (!strcmp(msg, "EXIT")) status = CMA_TEST_ABORT; else status = CMA_PARENT_FAIL; cmaLocalArray.Destroy(); if (status != CMA_TEST_SUCCESS) break; } return status; } CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachParentProcess(int defaultGPUNode, pid_t cid, int writePipe, int readPipe, CMA_TEST_TYPE testType) { KFDCMAArray cmaLocalArray, cmaRemoteArray; HSAuint64 copied = 0; int testNo; CMA_TEST_STATUS status; /* Receive buffer array from child and then initialize and fill in Local Buffer Array. * READ_TEST: Copy remote buffer array into Local Buffer Array and then check * for the new pattern. * WRITE_TEST: Write Local Buffer Array into remote buffer array. Notify child to * to check for the new pattern. */ for (testNo = 0; testNo < CMA_TEST_COUNT; testNo++) { status = cmaRemoteArray.recvCMAArray(readPipe); if (status != CMA_TEST_SUCCESS) break; if (testType == CMA_READ_TEST) { status = cmaLocalArray.Init(&dstRange[testNo], defaultGPUNode); if (status != CMA_TEST_SUCCESS) break; cmaLocalArray.FillPattern(&dstRange[testNo]); if (hsaKmtProcessVMRead(cid, cmaLocalArray.getMemoryRange(), cmaLocalArray.getValidRangeCount(), cmaRemoteArray.getMemoryRange(), cmaRemoteArray.getValidRangeCount(), &copied) != HSAKMT_STATUS_SUCCESS) { status = CMA_TEST_HSA_READ_FAIL; break; } status = cmaLocalArray.checkPattern(&dstRange[testNo]); if (status != CMA_TEST_SUCCESS) break; cmaLocalArray.Destroy(); cmaRemoteArray.Destroy(); if (write_non_block(writePipe, "NEXT", 4) < 0) { status = CMA_IPC_PIPE_ERROR; break; } } else { status = cmaLocalArray.Init(&srcRange[testNo], defaultGPUNode); if (status != CMA_TEST_SUCCESS) break; cmaLocalArray.FillPattern(&srcRange[testNo]); if (hsaKmtProcessVMWrite(cid, cmaLocalArray.getMemoryRange(), cmaLocalArray.getValidRangeCount(), cmaRemoteArray.getMemoryRange(), cmaRemoteArray.getValidRangeCount(), &copied) != HSAKMT_STATUS_SUCCESS) { status = CMA_TEST_HSA_WRITE_FAIL; break; } cmaLocalArray.Destroy(); cmaRemoteArray.Destroy(); if (write_non_block(writePipe, "CHCK", 4) < 0) { status = CMA_IPC_PIPE_ERROR; break; } } } /* for loop */ return status; } /* Test Cross Memory Attach * hsaKmtProcessVMRead and hsaKmtProcessVMWrite are GPU address equivalent to * process_vm_readv and process_vm_writev. These calls transfer data between * the address space of the calling process ("the local process") and the process * identified by pid ("the remote process"). * * In the tests parent process will be the local process and child will be * the remote. */ TEST_F(KFDIPCTest, CrossMemoryAttachTest) { TEST_START(TESTPROFILE_RUNALL) int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); int pipeCtoP[2], pipePtoC[2]; int status; ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; if (!GetVramSize(defaultGPUNode)) { LOG() << "Skipping test: No VRAM found." << std::endl; return; } /* Create Pipes for communicating shared handles */ ASSERT_EQ(pipe2(pipeCtoP, O_NONBLOCK), 0); ASSERT_EQ(pipe2(pipePtoC, O_NONBLOCK), 0); /* Create a child process and share the above Local Memory with it */ m_ChildPid = fork(); if (m_ChildPid == 0 && hsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS) { /* Child Process */ status = CrossMemoryAttachChildProcess(defaultGPUNode, pipeCtoP[1], pipePtoC[0], CMA_READ_TEST); EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Child: Read Test Fail"; status = CrossMemoryAttachChildProcess(defaultGPUNode, pipeCtoP[1], pipePtoC[0], CMA_WRITE_TEST); EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Child: Write Test Fail"; } else { int childStatus; status = CrossMemoryAttachParentProcess(defaultGPUNode, m_ChildPid, pipePtoC[1], pipeCtoP[0], CMA_READ_TEST); /* Parent proces */ EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Parent: Read Test Fail"; status = CrossMemoryAttachParentProcess(defaultGPUNode, m_ChildPid, pipePtoC[1], pipeCtoP[0], CMA_WRITE_TEST); EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Parent: Write Test Fail"; waitpid(m_ChildPid, &childStatus, 0); EXPECT_EQ(WIFEXITED(childStatus), true); EXPECT_EQ(WEXITSTATUS(childStatus), 0); } /* Code path executed by both parent and child with respective fds */ close(pipeCtoP[1]); close(pipeCtoP[0]); close(pipePtoC[1]); close(pipePtoC[0]); TEST_END } /* Test Cross Memory Attach * * hsaKmtProcessVMRead and hsaKmtProcessVMWrite are GPU address equivalent to * process_vm_readv and process_vm_writev. These calls are used to transfer data * between the address space of the calling process ("the local process") and the process * identified by pid ("the remote process"). However, these functions should also work * with a single process and single BO. */ TEST_F(KFDIPCTest, CMABasicTest) { TEST_START(TESTPROFILE_RUNALL) int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode(); HSAuint64 size = PAGE_SIZE; SDMAQueue sdmaQueue; HsaMemoryRange srcRange, dstRange; HSAuint64 copied; const int PATTERN1 = 0xA5A5A5A5, PATTERN2 = 0xFFFFFFFF; HSAKMT_STATUS status; ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node"; if (!GetVramSize(defaultGPUNode)) { LOG() << "Skipping test: No VRAM found." << std::endl; return; } ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode)); HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */); volatile HSAuint32 *tmp = tmpBuffer.As(); /* Initialize test buffer. Fill first half and second half with * different pattern */ HsaMemoryBuffer testLocalBuffer(size, defaultGPUNode, false, true); testLocalBuffer.Fill(PATTERN1, sdmaQueue, 0, size/2); testLocalBuffer.Fill(PATTERN2, sdmaQueue, size/2, size/2); /* Test1. Copy (or overwrite) buffer onto itself */ srcRange.MemoryAddress = testLocalBuffer.As(); srcRange.SizeInBytes = size; dstRange.MemoryAddress = testLocalBuffer.As(); dstRange.SizeInBytes = size; ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied)); EXPECT_EQ(copied, size); EXPECT_TRUE(testLocalBuffer.IsPattern(0, PATTERN1, sdmaQueue, tmp)); EXPECT_TRUE(testLocalBuffer.IsPattern(size - 4, PATTERN2, sdmaQueue, tmp)); /* Test2. Test unaligned byte copy. Write 3 bytes to an unaligned destination address */ const int unaligned_offset = 1; const int unaligned_size = 3; const int unaligned_mask = (((1 << (unaligned_size * 8)) - 1) << (unaligned_offset * 8)); HSAuint32 expected_pattern; srcRange.MemoryAddress = testLocalBuffer.As(); /* Deliberately set to value > unaligned_size. Only unaligned_size * should be copied since dstRange.SizeInBytes == unaligned_size */ srcRange.SizeInBytes = size; dstRange.MemoryAddress = reinterpret_cast(testLocalBuffer.As() + (size / 2) + unaligned_offset); dstRange.SizeInBytes = unaligned_size; ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied)); EXPECT_EQ(copied, unaligned_size); expected_pattern = (PATTERN2 & ~unaligned_mask | (PATTERN1 & unaligned_mask)); EXPECT_TRUE(testLocalBuffer.IsPattern(size/2, expected_pattern, sdmaQueue, tmp)); /* Test3. Test overflow and expect failure */ srcRange.MemoryAddress = testLocalBuffer.As(); srcRange.SizeInBytes = size; dstRange.MemoryAddress = reinterpret_cast(testLocalBuffer.As() + 4); dstRange.SizeInBytes = size; /* This should overflow since offset is VA + 4 */ status = hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied); EXPECT_NE(status, HSAKMT_STATUS_SUCCESS); EXPECT_LE(copied, (size - 4)); EXPECT_SUCCESS(sdmaQueue.Destroy()); TEST_END }