d8009b4fd3
On small bar multi-gpu system, hsaKmtMemoryMapToGPU will fail due to latest kernel P2P sanity check. Swith to use hsaKmtMemoryMapToGPUNodes to fix the failure Change-Id: Id8b6329d1243df0e908cc9a171b5c7f9156f4a8b Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
738 строки
27 KiB
C++
738 строки
27 KiB
C++
/*
|
|
* Copyright (C) 2017-2018 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include "KFDIPCTest.hpp"
|
|
#include <sys/types.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <errno.h>
|
|
#include <vector>
|
|
#include "PM4Queue.hpp"
|
|
#include "PM4Packet.hpp"
|
|
#include "SDMAQueue.hpp"
|
|
#include "SDMAPacket.hpp"
|
|
|
|
void KFDIPCTest::SetUp() {
|
|
ROUTINE_START
|
|
|
|
KFDBaseComponentTest::SetUp();
|
|
|
|
ROUTINE_END
|
|
}
|
|
|
|
void KFDIPCTest::TearDown() {
|
|
ROUTINE_START
|
|
|
|
KFDBaseComponentTest::TearDown();
|
|
|
|
ROUTINE_END
|
|
}
|
|
|
|
KFDIPCTest::~KFDIPCTest(void) {
|
|
/* exit() is necessary for the child process. Otherwise when the
|
|
* child process finishes, gtest assumes the test has finished and
|
|
* starts the next test while the parent is still active.
|
|
*/
|
|
if (m_ChildPid == 0)
|
|
exit(0);
|
|
}
|
|
|
|
/* Import shared Local Memory from parent process. Check for the pattern
|
|
* filled in by the parent process. Then fill a new pattern.
|
|
*/
|
|
void KFDIPCTest::BasicTestChildProcess(int defaultGPUNode, int *pipefd) {
|
|
/* Open KFD device for child process. This needs to called before
|
|
* any memory definitions
|
|
*/
|
|
if (HSAKMT_STATUS_SUCCESS != hsaKmtOpenKFD())
|
|
exit(1);
|
|
|
|
SDMAQueue sdmaQueue;
|
|
HsaSharedMemoryHandle sharedHandleLM;
|
|
HSAuint64 size = PAGE_SIZE, sharedSize;
|
|
HsaMemoryBuffer tempSysBuffer(size, defaultGPUNode, false);
|
|
HSAuint32 *sharedLocalBuffer = NULL;
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
/* Read from Pipe the shared Handle. Import shared Local Memory */
|
|
ASSERT_GE(read(pipefd[0], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);
|
|
|
|
ASSERT_SUCCESS(hsaKmtRegisterSharedHandle(&sharedHandleLM,
|
|
reinterpret_cast<void**>(&sharedLocalBuffer), &sharedSize));
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(sharedLocalBuffer, sharedSize, NULL,
|
|
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
|
|
|
|
/* Check for pattern in the shared Local Memory */
|
|
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
|
|
size = size < sharedSize ? size : sharedSize;
|
|
sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(tempSysBuffer.As<HSAuint32*>(),
|
|
sharedLocalBuffer, size));
|
|
sdmaQueue.Wait4PacketConsumption();
|
|
EXPECT_TRUE(WaitOnValue(tempSysBuffer.As<HSAuint32*>(), 0xAAAAAAAA));
|
|
|
|
/* Fill in the Local Memory with different pattern */
|
|
sdmaQueue.PlaceAndSubmitPacket(SDMAWriteDataPacket(sharedLocalBuffer, 0xBBBBBBBB));
|
|
sdmaQueue.Wait4PacketConsumption();
|
|
|
|
/* Clean up */
|
|
EXPECT_SUCCESS(sdmaQueue.Destroy());
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(sharedLocalBuffer));
|
|
EXPECT_SUCCESS(hsaKmtDeregisterMemory(sharedLocalBuffer));
|
|
}
|
|
|
|
/* Fill a pattern into Local Memory and share with the child process.
|
|
* Then wait until Child process to exit and check for the new pattern
|
|
* filled in by the child process.
|
|
*/
|
|
|
|
void KFDIPCTest::BasicTestParentProcess(int defaultGPUNode, pid_t cpid, int *pipefd) {
|
|
HSAuint64 size = PAGE_SIZE, sharedSize;
|
|
int status;
|
|
HSAuint64 AlternateVAGPU;
|
|
HsaMemoryBuffer toShareLocalBuffer(size, defaultGPUNode, false, true);
|
|
HsaMemoryBuffer tempSysBuffer(PAGE_SIZE, defaultGPUNode, false);
|
|
SDMAQueue sdmaQueue;
|
|
HsaSharedMemoryHandle sharedHandleLM;
|
|
HsaMemMapFlags mapFlags = {0};
|
|
|
|
/* Fill a Local Buffer with a pattern */
|
|
ASSERT_SUCCESS(hsaKmtMapMemoryToGPUNodes(toShareLocalBuffer.As<void*>(), toShareLocalBuffer.Size(), &AlternateVAGPU,
|
|
mapFlags, 1, reinterpret_cast<HSAuint32 *>(&defaultGPUNode)));
|
|
tempSysBuffer.Fill(0xAAAAAAAA);
|
|
|
|
/* Copy pattern in Local Memory before sharing it */
|
|
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
|
|
sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(toShareLocalBuffer.As<HSAuint32*>(),
|
|
tempSysBuffer.As<HSAuint32*>(), size));
|
|
sdmaQueue.Wait4PacketConsumption();
|
|
|
|
/* Share it with the child process */
|
|
ASSERT_SUCCESS(hsaKmtShareMemory(toShareLocalBuffer.As<void*>(), size, &sharedHandleLM));
|
|
|
|
ASSERT_GE(write(pipefd[1], reinterpret_cast<void*>(&sharedHandleLM), sizeof(sharedHandleLM)), 0);
|
|
|
|
/* Wait for the child to finish */
|
|
waitpid(cpid, &status, 0);
|
|
|
|
EXPECT_EQ(WIFEXITED(status), 1);
|
|
EXPECT_EQ(WEXITSTATUS(status), 0);
|
|
|
|
/* Check for the new pattern filled in by child process */
|
|
sdmaQueue.PlaceAndSubmitPacket(SDMACopyDataPacket(tempSysBuffer.As<HSAuint32*>(),
|
|
toShareLocalBuffer.As<HSAuint32*>(), size));
|
|
sdmaQueue.Wait4PacketConsumption();
|
|
EXPECT_TRUE(WaitOnValue(tempSysBuffer.As<HSAuint32*>(), 0xBBBBBBBB));
|
|
|
|
/* Clean up */
|
|
EXPECT_SUCCESS(hsaKmtUnmapMemoryToGPU(toShareLocalBuffer.As<void*>()));
|
|
EXPECT_SUCCESS(sdmaQueue.Destroy());
|
|
}
|
|
|
|
/* Test IPC memory.
|
|
* 1. Parent Process [Create/Fill] LocalMemory (LM) --share--> Child Process
|
|
* 2. Child Process import LM and check for the pattern.
|
|
* 3. Child Process fill in a new pattern and quit.
|
|
* 4. Parent Process wait for the Child process to finish and then check for
|
|
* the new pattern in LM
|
|
*
|
|
* IPC support is limited to Local Memory.
|
|
*/
|
|
|
|
TEST_F(KFDIPCTest, BasicTest) {
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
const std::vector<int>& GpuNodes = m_NodeInfo.GetNodesWithGPU();
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
int pipefd[2];
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
if (!GetVramSize(defaultGPUNode)) {
|
|
LOG() << "Skipping test: No VRAM found." << std::endl;
|
|
return;
|
|
}
|
|
|
|
/* Test libhsakmt fork() clean up by defining some buffers. These
|
|
* buffers gets duplicated in the child process but not are not valid
|
|
* as it doesn't have proper mapping in GPU. The clean up code in libhsakmt
|
|
* should handle it
|
|
*/
|
|
volatile HSAuint32 stackData[1];
|
|
HsaMemoryBuffer tmpSysBuffer(PAGE_SIZE, defaultGPUNode, false);
|
|
HsaMemoryBuffer tmpUserptrBuffer((void *)&stackData[0], sizeof(HSAuint32));
|
|
|
|
/* Create Pipes for communicating shared handles */
|
|
ASSERT_EQ(pipe(pipefd), 0);
|
|
|
|
/* Create a child process and share the above Local Memory with it */
|
|
|
|
m_ChildPid = fork();
|
|
if (m_ChildPid == 0)
|
|
BasicTestChildProcess(defaultGPUNode, pipefd); /* Child Process */
|
|
else
|
|
BasicTestParentProcess(defaultGPUNode, m_ChildPid, pipefd); /* Parent proces */
|
|
|
|
/* Code path executed by both parent and child with respective fds */
|
|
close(pipefd[1]);
|
|
close(pipefd[0]);
|
|
|
|
TEST_END
|
|
}
|
|
|
|
/* Cross Memory Attach Test. Memory Descriptor Array.
|
|
* The following 2 2D-arrays describe the source and destination memory arrays used
|
|
* by CMA test. The entry is only valid if Size != 0. Each of these buffers will be
|
|
* filled intially with "FillPattern". After the test the srcRange is still expected
|
|
* to have the same pattern. The dstRange is expected to have srcRange pattern.
|
|
*
|
|
* For e.g. for TEST_COUNT = 1,
|
|
* srcRange has 2 buffers of size 0x1800. Buf1 filled with 0xA5A5A5A5 and Buf2
|
|
* filled with 0xAAAAAAAA
|
|
* dstRange has 3 buffers of size 0x1000. All of them filled 0xFFFFFFFF.
|
|
* After Copy: dstBuf1[0-0x1000] is expected to be 0xA5A5A5A5
|
|
* dstBuf2[0-0x800] is expected to be 0xA5A5A5A5
|
|
* dstBuf3[0x800-0x1000] is expected to be 0xAAAAAAAA
|
|
* and dstBuf4[0x0-0x1000] is expected to be 0xAAAAAAAA
|
|
*
|
|
* For this CMA test, after copying only the first and the last of dstBuf is checked
|
|
*/
|
|
|
|
static testMemoryDescriptor srcRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = {
|
|
{ /* Memory Type Size FillPattern FirstItem Last item */
|
|
{ CMA_MEM_TYPE_USERPTR, 0x801800, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_USERPTR, 0x1800, 0xAAAAAAAA, 0xAAAAAAAA, 0xAAAAAAAA },
|
|
{ CMA_MEM_TYPE_USERPTR, 0x0, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_USERPTR, 0x0, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
},
|
|
{
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x208000, 0xDEADBEEF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x4000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x6000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x2000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
},
|
|
{
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x800000, 0xDEADBEEF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xA5A5A5A5, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
}
|
|
};
|
|
|
|
static testMemoryDescriptor dstRange[CMA_TEST_COUNT][CMA_MEMORY_TEST_ARRAY_SIZE] = {
|
|
{
|
|
/* Memory Type Size FillPattern FirstItem Last item */
|
|
{ CMA_MEM_TYPE_USERPTR, 0x801000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_USERPTR, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xAAAAAAAA },
|
|
{ CMA_MEM_TYPE_USERPTR, 0x1000, 0xFFFFFFFF, 0xAAAAAAAA, 0xAAAAAAAA },
|
|
{ CMA_MEM_TYPE_USERPTR, 0x0, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
},
|
|
{
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x202000, 0xFFFFFFFF, 0xDEADBEEF, 0xDEADBEEF },
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x4000, 0xFFFFFFFF, 0xDEADBEEF, 0xDEADBEEF },
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x8000, 0xFFFFFFFF, 0xDEADBEEF, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_SYSTEM, 0x6000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
},
|
|
{
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x800000, 0xFFFFFFFF, 0xDEADBEEF, 0xDEADBEEF },
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
{ CMA_MEM_TYPE_LOCAL_MEM, 0x1000, 0xFFFFFFFF, 0xA5A5A5A5, 0xA5A5A5A5 },
|
|
}
|
|
};
|
|
|
|
KFDCMAArray::KFDCMAArray() : m_ValidCount(0), m_QueueArray(HSA_QUEUE_SDMA) {
|
|
memset(m_MemArray, 0, sizeof(m_MemArray));
|
|
memset(m_HsaMemoryRange, 0, sizeof(m_HsaMemoryRange));
|
|
}
|
|
|
|
CMA_TEST_STATUS KFDCMAArray::Destroy() {
|
|
for (int i = 0; i < m_ValidCount; i++) {
|
|
if (m_MemArray[i]) {
|
|
void *userPtr;
|
|
|
|
userPtr = m_MemArray[i]->GetUserPtr();
|
|
delete m_MemArray[i];
|
|
|
|
if (userPtr)
|
|
free(userPtr);
|
|
}
|
|
}
|
|
|
|
memset(m_MemArray, 0, sizeof(m_MemArray));
|
|
memset(m_HsaMemoryRange, 0, sizeof(m_HsaMemoryRange));
|
|
m_ValidCount = 0;
|
|
|
|
return CMA_TEST_SUCCESS;
|
|
}
|
|
|
|
/* Initialize KFDCMAArray based on array of testMemoryDescriptor. Usually testMemoryDescriptor[] is
|
|
* statically defined array by the user. Only items with non-zero size are considered valid
|
|
*/
|
|
CMA_TEST_STATUS KFDCMAArray::Init(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE], int node) {
|
|
CMA_TEST_STATUS err = CMA_TEST_SUCCESS;
|
|
memset(m_MemArray, 0, sizeof(m_MemArray));
|
|
memset(m_HsaMemoryRange, 0, sizeof(m_HsaMemoryRange));
|
|
|
|
m_ValidCount = 0;
|
|
for (int i = 0; i < CMA_MEMORY_TEST_ARRAY_SIZE; i++) {
|
|
if ((*memDescriptor)[i].m_MemSize == 0)
|
|
continue;
|
|
|
|
switch ((*memDescriptor)[i].m_MemType) {
|
|
case CMA_MEM_TYPE_SYSTEM:
|
|
m_MemArray[i] = new HsaMemoryBuffer((*memDescriptor)[i].m_MemSize, node);
|
|
break;
|
|
|
|
case CMA_MEM_TYPE_USERPTR:
|
|
{
|
|
void *userPtr = malloc((*memDescriptor)[i].m_MemSize);
|
|
m_MemArray[i] = new HsaMemoryBuffer(userPtr, (*memDescriptor)[i].m_MemSize);
|
|
break;
|
|
}
|
|
|
|
case CMA_MEM_TYPE_LOCAL_MEM:
|
|
m_MemArray[i] = new HsaMemoryBuffer((*memDescriptor)[i].m_MemSize, node, false, true);
|
|
break;
|
|
}
|
|
|
|
if (m_MemArray[i]) {
|
|
m_HsaMemoryRange[i].MemoryAddress = m_MemArray[i]->As<void*>();
|
|
m_HsaMemoryRange[i].SizeInBytes = m_MemArray[i]->Size();
|
|
m_ValidCount++;
|
|
} else {
|
|
err = CMA_TEST_NOMEM;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* Fill each buffer of KFDCMAArray with the pattern described by testMemoryDescriptor[] */
|
|
void KFDCMAArray::FillPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]) {
|
|
SDMAQueue sdmaQueue;
|
|
bool queueCreated = false;
|
|
unsigned int queueNode;
|
|
|
|
for (int i = 0; i < m_ValidCount; i++) {
|
|
if (m_MemArray[i]->isLocal())
|
|
m_MemArray[i]->Fill((*memDescriptor)[i].m_FillPattern, *m_QueueArray.GetQueue(m_MemArray[i]->Node()));
|
|
else
|
|
m_MemArray[i]->Fill((*memDescriptor)[i].m_FillPattern);
|
|
}
|
|
}
|
|
|
|
/* Check the first and last item of each buffer in KFDCMAArray with the pattern described by
|
|
* testMemoryDescriptor[]. Return 0 on success.
|
|
*/
|
|
CMA_TEST_STATUS KFDCMAArray::checkPattern(testMemoryDescriptor(*memDescriptor)[CMA_MEMORY_TEST_ARRAY_SIZE]) {
|
|
HSAuint64 lastItem;
|
|
CMA_TEST_STATUS ret = CMA_TEST_SUCCESS;
|
|
unsigned int queueNode = 0;
|
|
bool queueCreated = false;
|
|
HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
|
|
volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
|
|
|
|
for (int i = 0; i < m_ValidCount; i++) {
|
|
lastItem = m_MemArray[i]->Size();
|
|
lastItem -= sizeof(HSAuint32);
|
|
|
|
if (m_MemArray[i]->isLocal()) {
|
|
BaseQueue *sdmaQueue = m_QueueArray.GetQueue(m_MemArray[i]->Node());
|
|
|
|
if (!m_MemArray[i]->IsPattern(0, (*memDescriptor)[i].m_CheckFirstWordPattern, *sdmaQueue, tmp) ||
|
|
!m_MemArray[i]->IsPattern(lastItem, (*memDescriptor)[i].m_CheckLastWordPattern, *sdmaQueue, tmp)) {
|
|
ret = CMA_CHECK_PATTERN_ERROR;
|
|
break;
|
|
}
|
|
|
|
} else {
|
|
if (!m_MemArray[i]->IsPattern(0, (*memDescriptor)[i].m_CheckFirstWordPattern) ||
|
|
!m_MemArray[i]->IsPattern(lastItem, (*memDescriptor)[i].m_CheckLastWordPattern)) {
|
|
ret = CMA_CHECK_PATTERN_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
/* Non-blocking read and write to avoid Test from hanging (block indefinitely)
|
|
* if either server or client process exits due to assert failure
|
|
*/
|
|
static int write_non_block(int fd, const void *buf, int size) {
|
|
int total_bytes = 0, cur_bytes = 0;
|
|
int retries = 5;
|
|
struct timespec tm = { 0, 10000000ULL };
|
|
const char *ptr = (const char *)buf;
|
|
|
|
do {
|
|
cur_bytes = write(fd, ptr, (size - total_bytes));
|
|
|
|
if (cur_bytes < 0 && errno != EAGAIN)
|
|
return cur_bytes;
|
|
|
|
if (cur_bytes > 0) {
|
|
total_bytes += cur_bytes;
|
|
ptr += cur_bytes;
|
|
}
|
|
|
|
if (total_bytes < size)
|
|
nanosleep(&tm, NULL);
|
|
} while (total_bytes < size && retries--);
|
|
|
|
/* Check for overflow */
|
|
if (total_bytes > size)
|
|
return -1;
|
|
|
|
return total_bytes;
|
|
}
|
|
|
|
static int read_non_block(int fd, void *buf, int size) {
|
|
int total_bytes = 0, cur_bytes = 0;
|
|
int retries = 5;
|
|
struct timespec tm = { 0, 100000000ULL };
|
|
char *ptr = reinterpret_cast<char *>(buf);
|
|
|
|
do {
|
|
cur_bytes = read(fd, ptr, (size - total_bytes));
|
|
|
|
if (cur_bytes < 0 && errno != EAGAIN)
|
|
return cur_bytes;
|
|
|
|
if (cur_bytes > 0) {
|
|
total_bytes += cur_bytes;
|
|
ptr += cur_bytes;
|
|
}
|
|
|
|
if (total_bytes < size)
|
|
nanosleep(&tm, NULL);
|
|
} while (total_bytes < size && retries--);
|
|
|
|
if (total_bytes > size)
|
|
return -1;
|
|
|
|
return total_bytes;
|
|
}
|
|
|
|
|
|
/* Send HsaMemoryRange to another process that is connected via writePipe */
|
|
CMA_TEST_STATUS KFDCMAArray::sendCMAArray(int writePipe) {
|
|
if (write_non_block(writePipe, reinterpret_cast<void*>(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) !=
|
|
sizeof(m_HsaMemoryRange))
|
|
return CMA_IPC_PIPE_ERROR;
|
|
return CMA_TEST_SUCCESS;
|
|
}
|
|
|
|
/* Send HsaMemoryRange from another process and initialize KFDCMAArray */
|
|
CMA_TEST_STATUS KFDCMAArray::recvCMAArray(int readPipe) {
|
|
int i;
|
|
|
|
if (read_non_block(readPipe, reinterpret_cast<void*>(&m_HsaMemoryRange), sizeof(m_HsaMemoryRange)) !=
|
|
sizeof(m_HsaMemoryRange))
|
|
return CMA_IPC_PIPE_ERROR;
|
|
|
|
for (i = 0; i < CMA_MEMORY_TEST_ARRAY_SIZE; i++) {
|
|
if (m_HsaMemoryRange[i].SizeInBytes)
|
|
m_ValidCount++;
|
|
}
|
|
return CMA_TEST_SUCCESS;
|
|
}
|
|
|
|
|
|
CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachChildProcess(int defaultGPUNode, int writePipe,
|
|
int readPipe, CMA_TEST_TYPE testType) {
|
|
KFDCMAArray cmaLocalArray;
|
|
char msg[16];
|
|
int testNo;
|
|
CMA_TEST_STATUS status;
|
|
|
|
/* Initialize and fill Local Buffer Array with a pattern.
|
|
* READ_TEST: Send the Array to parent process. Wait for the parent
|
|
* to finish reading and checking. Then move to next text case or
|
|
* quit if last one.
|
|
* WRITE_TEST: Send Local Buffer Array to parent process and and wait
|
|
* for parent to write to it. Check for new pattern. Then move to next
|
|
* case or quit if last one.
|
|
*/
|
|
for (testNo = 0; testNo < CMA_TEST_COUNT; testNo++) {
|
|
if (testType == CMA_READ_TEST) {
|
|
cmaLocalArray.Init(&srcRange[testNo], defaultGPUNode);
|
|
cmaLocalArray.FillPattern(&srcRange[testNo]);
|
|
} else {
|
|
cmaLocalArray.Init(&dstRange[testNo], defaultGPUNode);
|
|
cmaLocalArray.FillPattern(&dstRange[testNo]);
|
|
}
|
|
|
|
if (cmaLocalArray.sendCMAArray(writePipe) < 0) {
|
|
status = CMA_IPC_PIPE_ERROR;
|
|
break;
|
|
}
|
|
|
|
/* Wait until the test is over */
|
|
memset(msg, 0, sizeof(msg));
|
|
if (read_non_block(readPipe, msg, 4) < 0) {
|
|
status = CMA_IPC_PIPE_ERROR;
|
|
break;
|
|
}
|
|
|
|
if (!strcmp(msg, "CHCK"))
|
|
status = cmaLocalArray.checkPattern(&dstRange[testNo]);
|
|
else if (!strcmp(msg, "NEXT"))
|
|
status = CMA_TEST_SUCCESS;
|
|
else if (!strcmp(msg, "EXIT"))
|
|
status = CMA_TEST_ABORT;
|
|
else
|
|
status = CMA_PARENT_FAIL;
|
|
|
|
cmaLocalArray.Destroy();
|
|
if (status != CMA_TEST_SUCCESS)
|
|
break;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
CMA_TEST_STATUS KFDIPCTest::CrossMemoryAttachParentProcess(int defaultGPUNode, pid_t cid,
|
|
int writePipe, int readPipe,
|
|
CMA_TEST_TYPE testType) {
|
|
KFDCMAArray cmaLocalArray, cmaRemoteArray;
|
|
HSAuint64 copied = 0;
|
|
int testNo;
|
|
CMA_TEST_STATUS status;
|
|
|
|
/* Receive buffer array from child and then initialize and fill in Local Buffer Array.
|
|
* READ_TEST: Copy remote buffer array into Local Buffer Array and then check
|
|
* for the new pattern.
|
|
* WRITE_TEST: Write Local Buffer Array into remote buffer array. Notify child to
|
|
* to check for the new pattern.
|
|
*/
|
|
for (testNo = 0; testNo < CMA_TEST_COUNT; testNo++) {
|
|
status = cmaRemoteArray.recvCMAArray(readPipe);
|
|
if (status != CMA_TEST_SUCCESS)
|
|
break;
|
|
|
|
if (testType == CMA_READ_TEST) {
|
|
status = cmaLocalArray.Init(&dstRange[testNo], defaultGPUNode);
|
|
if (status != CMA_TEST_SUCCESS)
|
|
break;
|
|
cmaLocalArray.FillPattern(&dstRange[testNo]);
|
|
|
|
if (hsaKmtProcessVMRead(cid, cmaLocalArray.getMemoryRange(),
|
|
cmaLocalArray.getValidRangeCount(),
|
|
cmaRemoteArray.getMemoryRange(),
|
|
cmaRemoteArray.getValidRangeCount(),
|
|
&copied) != HSAKMT_STATUS_SUCCESS) {
|
|
status = CMA_TEST_HSA_READ_FAIL;
|
|
break;
|
|
}
|
|
|
|
status = cmaLocalArray.checkPattern(&dstRange[testNo]);
|
|
if (status != CMA_TEST_SUCCESS)
|
|
break;
|
|
|
|
cmaLocalArray.Destroy();
|
|
cmaRemoteArray.Destroy();
|
|
|
|
if (write_non_block(writePipe, "NEXT", 4) < 0) {
|
|
status = CMA_IPC_PIPE_ERROR;
|
|
break;
|
|
}
|
|
} else {
|
|
status = cmaLocalArray.Init(&srcRange[testNo], defaultGPUNode);
|
|
if (status != CMA_TEST_SUCCESS)
|
|
break;
|
|
cmaLocalArray.FillPattern(&srcRange[testNo]);
|
|
|
|
if (hsaKmtProcessVMWrite(cid, cmaLocalArray.getMemoryRange(),
|
|
cmaLocalArray.getValidRangeCount(),
|
|
cmaRemoteArray.getMemoryRange(),
|
|
cmaRemoteArray.getValidRangeCount(),
|
|
&copied) != HSAKMT_STATUS_SUCCESS) {
|
|
status = CMA_TEST_HSA_WRITE_FAIL;
|
|
break;
|
|
}
|
|
|
|
cmaLocalArray.Destroy();
|
|
cmaRemoteArray.Destroy();
|
|
if (write_non_block(writePipe, "CHCK", 4) < 0) {
|
|
status = CMA_IPC_PIPE_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
} /* for loop */
|
|
|
|
return status;
|
|
}
|
|
|
|
/* Test Cross Memory Attach
|
|
* hsaKmtProcessVMRead and hsaKmtProcessVMWrite are GPU address equivalent to
|
|
* process_vm_readv and process_vm_writev. These calls transfer data between
|
|
* the address space of the calling process ("the local process") and the process
|
|
* identified by pid ("the remote process").
|
|
*
|
|
* In the tests parent process will be the local process and child will be
|
|
* the remote.
|
|
*/
|
|
TEST_F(KFDIPCTest, CrossMemoryAttachTest) {
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
int pipeCtoP[2], pipePtoC[2];
|
|
int status;
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
if (!GetVramSize(defaultGPUNode)) {
|
|
LOG() << "Skipping test: No VRAM found." << std::endl;
|
|
return;
|
|
}
|
|
|
|
/* Create Pipes for communicating shared handles */
|
|
ASSERT_EQ(pipe2(pipeCtoP, O_NONBLOCK), 0);
|
|
ASSERT_EQ(pipe2(pipePtoC, O_NONBLOCK), 0);
|
|
|
|
/* Create a child process and share the above Local Memory with it */
|
|
m_ChildPid = fork();
|
|
if (m_ChildPid == 0 && hsaKmtOpenKFD() == HSAKMT_STATUS_SUCCESS) {
|
|
/* Child Process */
|
|
status = CrossMemoryAttachChildProcess(defaultGPUNode, pipeCtoP[1],
|
|
pipePtoC[0], CMA_READ_TEST);
|
|
EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Child: Read Test Fail";
|
|
status = CrossMemoryAttachChildProcess(defaultGPUNode, pipeCtoP[1],
|
|
pipePtoC[0], CMA_WRITE_TEST);
|
|
EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Child: Write Test Fail";
|
|
} else {
|
|
int childStatus;
|
|
|
|
status = CrossMemoryAttachParentProcess(defaultGPUNode, m_ChildPid,
|
|
pipePtoC[1], pipeCtoP[0], CMA_READ_TEST); /* Parent proces */
|
|
EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Parent: Read Test Fail";
|
|
status = CrossMemoryAttachParentProcess(defaultGPUNode, m_ChildPid,
|
|
pipePtoC[1], pipeCtoP[0], CMA_WRITE_TEST);
|
|
EXPECT_EQ(status, CMA_TEST_SUCCESS) << "Parent: Write Test Fail";
|
|
|
|
waitpid(m_ChildPid, &childStatus, 0);
|
|
EXPECT_EQ(WIFEXITED(childStatus), true);
|
|
EXPECT_EQ(WEXITSTATUS(childStatus), 0);
|
|
}
|
|
|
|
/* Code path executed by both parent and child with respective fds */
|
|
close(pipeCtoP[1]);
|
|
close(pipeCtoP[0]);
|
|
close(pipePtoC[1]);
|
|
close(pipePtoC[0]);
|
|
TEST_END
|
|
}
|
|
|
|
/* Test Cross Memory Attach
|
|
*
|
|
* hsaKmtProcessVMRead and hsaKmtProcessVMWrite are GPU address equivalent to
|
|
* process_vm_readv and process_vm_writev. These calls are used to transfer data
|
|
* between the address space of the calling process ("the local process") and the process
|
|
* identified by pid ("the remote process"). However, these functions should also work
|
|
* with a single process and single BO.
|
|
*/
|
|
TEST_F(KFDIPCTest, CMABasicTest) {
|
|
TEST_START(TESTPROFILE_RUNALL)
|
|
|
|
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
|
|
HSAuint64 size = PAGE_SIZE;
|
|
SDMAQueue sdmaQueue;
|
|
HsaMemoryRange srcRange, dstRange;
|
|
HSAuint64 copied;
|
|
const int PATTERN1 = 0xA5A5A5A5, PATTERN2 = 0xFFFFFFFF;
|
|
HSAKMT_STATUS status;
|
|
|
|
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
|
|
|
|
if (!GetVramSize(defaultGPUNode)) {
|
|
LOG() << "Skipping test: No VRAM found." << std::endl;
|
|
return;
|
|
}
|
|
|
|
ASSERT_SUCCESS(sdmaQueue.Create(defaultGPUNode));
|
|
HsaMemoryBuffer tmpBuffer(PAGE_SIZE, 0, true /* zero */);
|
|
volatile HSAuint32 *tmp = tmpBuffer.As<volatile HSAuint32 *>();
|
|
|
|
/* Initialize test buffer. Fill first half and second half with
|
|
* different pattern
|
|
*/
|
|
HsaMemoryBuffer testLocalBuffer(size, defaultGPUNode, false, true);
|
|
testLocalBuffer.Fill(PATTERN1, sdmaQueue, 0, size/2);
|
|
testLocalBuffer.Fill(PATTERN2, sdmaQueue, size/2, size/2);
|
|
|
|
/* Test1. Copy (or overwrite) buffer onto itself */
|
|
srcRange.MemoryAddress = testLocalBuffer.As<void*>();
|
|
srcRange.SizeInBytes = size;
|
|
dstRange.MemoryAddress = testLocalBuffer.As<void*>();
|
|
dstRange.SizeInBytes = size;
|
|
ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied));
|
|
EXPECT_EQ(copied, size);
|
|
|
|
EXPECT_TRUE(testLocalBuffer.IsPattern(0, PATTERN1, sdmaQueue, tmp));
|
|
EXPECT_TRUE(testLocalBuffer.IsPattern(size - 4, PATTERN2, sdmaQueue, tmp));
|
|
|
|
|
|
/* Test2. Test unaligned byte copy. Write 3 bytes to an unaligned destination address */
|
|
const int unaligned_offset = 1;
|
|
const int unaligned_size = 3;
|
|
const int unaligned_mask = (((1 << (unaligned_size * 8)) - 1) << (unaligned_offset * 8));
|
|
HSAuint32 expected_pattern;
|
|
|
|
srcRange.MemoryAddress = testLocalBuffer.As<void*>();
|
|
|
|
/* Deliberately set to value > unaligned_size. Only unaligned_size
|
|
* should be copied since dstRange.SizeInBytes == unaligned_size
|
|
*/
|
|
srcRange.SizeInBytes = size;
|
|
|
|
dstRange.MemoryAddress = reinterpret_cast<void *>(testLocalBuffer.As<char*>() + (size / 2) + unaligned_offset);
|
|
dstRange.SizeInBytes = unaligned_size;
|
|
ASSERT_SUCCESS(hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied));
|
|
EXPECT_EQ(copied, unaligned_size);
|
|
|
|
expected_pattern = (PATTERN2 & ~unaligned_mask | (PATTERN1 & unaligned_mask));
|
|
EXPECT_TRUE(testLocalBuffer.IsPattern(size/2, expected_pattern, sdmaQueue, tmp));
|
|
|
|
|
|
/* Test3. Test overflow and expect failure */
|
|
srcRange.MemoryAddress = testLocalBuffer.As<void*>();
|
|
srcRange.SizeInBytes = size;
|
|
dstRange.MemoryAddress = reinterpret_cast<void *>(testLocalBuffer.As<char*>() + 4);
|
|
dstRange.SizeInBytes = size; /* This should overflow since offset is VA + 4 */
|
|
status = hsaKmtProcessVMRead(getpid(), &dstRange, 1, &srcRange, 1, &copied);
|
|
EXPECT_NE(status, HSAKMT_STATUS_SUCCESS);
|
|
EXPECT_LE(copied, (size - 4));
|
|
|
|
EXPECT_SUCCESS(sdmaQueue.Destroy());
|
|
|
|
TEST_END
|
|
}
|