kfdtest: Update KFDMultiProcessTest class to support kfdtest run on multiple gpu

Update KFDMultiProcessTest class to fork process on gpu wise.

Signed-off-by: Xiaogang Chen <Xiaogang.Chen@amd.com>
Change-Id: Ibb12d64b4cbc5f082d737fd8d8a74233b75be13e
Этот коммит содержится в:
Xiaogang Chen
2024-12-12 17:31:06 -06:00
коммит произвёл Xiaogang Chen
родитель b4943d718b
Коммит c69e660e7a
6 изменённых файлов: 98 добавлений и 58 удалений
+9 -9
Просмотреть файл
@@ -339,12 +339,12 @@ TEST_F(KFDEvictTest, BasicTest) {
}
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
ForkChildProcesses(defaultGPUNode, N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
return;
}
@@ -362,7 +362,7 @@ TEST_F(KFDEvictTest, BasicTest) {
LOG() << m_psName << "free buffer" << std::endl;
FreeBuffers(pBuffers, vramBufSize);
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
TEST_END
}
@@ -432,12 +432,12 @@ TEST_F(KFDEvictTest, QueueTest) {
ASSERT_LE(count, PAGE_SIZE/sizeof(unsigned int *));
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
ForkChildProcesses(defaultGPUNode, N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
return;
}
@@ -502,7 +502,7 @@ TEST_F(KFDEvictTest, QueueTest) {
for (i = 0; i < wavefront_num; i++)
EXPECT_EQ(0x5678, *(result + i));
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
TEST_END
}
@@ -549,12 +549,12 @@ TEST_F(KFDEvictTest, BurstyTest) {
}
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
ForkChildProcesses(defaultGPUNode, N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
return;
}
@@ -577,7 +577,7 @@ TEST_F(KFDEvictTest, BurstyTest) {
EXPECT_SUCCESS(pm4Queue.Destroy());
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
TEST_END
}
+28 -15
Просмотреть файл
@@ -39,20 +39,20 @@ void KFDHWSTest::TearDown() {
ROUTINE_END
}
void KFDHWSTest::RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops) {
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
ASSERT_GE(defaultGPUNode, 0) << "failed to get default GPU Node";
void KFDHWSTest::RunTest_GPU(int gpuNode, unsigned nProcesses, unsigned nQueues, unsigned nLoops) {
int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(gpuNode);
unsigned q, l;
bool timeout = false;
/* Fork the child processes */
ForkChildProcesses(nProcesses);
/* Fork the child processes for gpuNode */
ForkChildProcesses(gpuNode, nProcesses);
// Create queues
PM4Queue *queues = new PM4Queue[nQueues];
for (q = 0; q < nQueues; q++)
ASSERT_SUCCESS(queues[q].Create(defaultGPUNode));
ASSERT_SUCCESS_GPU(queues[q].Create(gpuNode), gpuNode);
// Create dispatch pointers. Each loop iteration creates fresh dispatches
Dispatch **dispatch = new Dispatch*[nQueues];
@@ -60,10 +60,14 @@ void KFDHWSTest::RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops)
dispatch[q] = NULL;
// Logging: Each process prints its index after each loop iteration, all in one line.
std::ostream &log = LOG() << std::dec << "Process " << m_ProcessIndex << " starting." << std::endl;
std::ostream &log = LOG() << std::dec << "gpuNode: " << gpuNode << " Process: " << m_ProcessIndex[gpuIndex] << " starting." << std::endl;
// Run work on all queues
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer isaBuffer(PAGE_SIZE, gpuNode, true/*zero*/, false/*local*/, true/*exec*/);
Assembler* m_pAsm;
m_pAsm = GetAssemblerFromNodeId(gpuNode);
ASSERT_NOTNULL_GPU(m_pAsm, gpuNode);
ASSERT_SUCCESS(m_pAsm->RunAssembleBuf(NoopIsa, isaBuffer.As<char*>()));
@@ -81,15 +85,15 @@ void KFDHWSTest::RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops)
if (timeout)
goto timeout;
}
log << m_ProcessIndex;
log << m_ProcessIndex[gpuIndex];
}
timeout:
log << std::endl;
if (timeout) {
WARN() << "Process " << m_ProcessIndex << " timeout." << std::endl;
WARN() << "gpuNode: " << gpuNode << " Process: " << m_ProcessIndex[gpuIndex] << " timeout." << std::endl;
} else {
LOG() << "Process " << m_ProcessIndex << " done. Waiting ..." << std::endl;
LOG() << "gpuNode: " << gpuNode << " Process " << m_ProcessIndex[gpuIndex] << " done. Waiting ..." << std::endl;
// Wait here before destroying queues. If another process' queues
// are soft-hanging, destroying queues can resolve the soft-hang
@@ -101,9 +105,9 @@ timeout:
// Destroy queues and dispatches. Destroying the queues first
// ensures that the memory allocated by the Dispatch is no longer
// accessed by the GPU.
LOG() << "Process " << m_ProcessIndex << " cleaning up." << std::endl;
LOG() << "gpuNode: " << gpuNode << " Process " << m_ProcessIndex[gpuIndex] << " cleaning up." << std::endl;
for (q = 0; q < nQueues; q++) {
EXPECT_SUCCESS(queues[q].Destroy());
EXPECT_SUCCESS_GPU(queues[q].Destroy(), gpuNode);
if (dispatch[q])
delete dispatch[q];
}
@@ -116,14 +120,23 @@ timeout:
// parent.
ASSERT_FALSE(timeout);
WaitChildProcesses();
WaitChildProcesses(gpuNode);
}
void RunTest(KFDTEST_PARAMETERS* pTestParamters) {
int gpuNode = pTestParamters->gpuNode;
KFDHWSTest* pKKFDHWSTest = (KFDHWSTest*)pTestParamters->pTestObject;
pKKFDHWSTest->RunTest_GPU(gpuNode, 3, 13, 40);
}
TEST_F(KFDHWSTest, MultiProcessOversubscribed) {
TEST_REQUIRE_ENV_CAPABILITIES(ENVCAPS_64BITLINUX);
TEST_START(TESTPROFILE_RUNALL);
RunTest(3, 13, 40);
ASSERT_SUCCESS(KFDTest_Launch(RunTest));
TEST_END
}
+3 -1
Просмотреть файл
@@ -35,11 +35,13 @@ class KFDHWSTest : public KFDMultiProcessTest {
KFDHWSTest() {}
~KFDHWSTest() {}
friend void RunTest(KFDTEST_PARAMETERS* pTestParamters);
protected:
virtual void SetUp();
virtual void TearDown();
void RunTest(unsigned nProcesses, unsigned nQueues, unsigned nLoops);
void RunTest_GPU(int gpuNode, unsigned nProcesses, unsigned nQueues, unsigned nLoops);
};
#endif // __KFD_QCM_TEST__H__
+20 -14
Просмотреть файл
@@ -23,8 +23,9 @@
#include "KFDMultiProcessTest.hpp"
void KFDMultiProcessTest::ForkChildProcesses(int nprocesses) {
void KFDMultiProcessTest::ForkChildProcesses(unsigned int nodeId, int nprocesses) {
int i;
int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(nodeId);
for (i = 0; i < nprocesses - 1; ++i) {
pid_t pid = fork();
@@ -35,43 +36,48 @@ void KFDMultiProcessTest::ForkChildProcesses(int nprocesses) {
/* Cleanup file descriptors copied from parent process
* then call SetUp->hsaKmtOpenKFD to create new process
*/
m_psName = "Test process " + std::to_string(i) + " ";
m_psName[gpuIndex] = "Child Test process " + std::to_string(i) +
" on gpuNode: " + std::to_string(gpuIndex) + " ";
TearDown();
SetUp();
m_ChildPids.clear();
m_IsParent = false;
m_ProcessIndex = i;
m_ChildPids[gpuIndex].clear();
m_IsParent[gpuIndex] = false;
m_ProcessIndex[gpuIndex] = i;
return;
}
/* Parent process */
m_ChildPids.push_back(pid);
m_ChildPids[gpuIndex].push_back(pid);
}
m_psName = "Test process " + std::to_string(i) + " ";
m_ProcessIndex = i;
m_psName[gpuIndex] = "Parent Test process " + std::to_string(i) +
" on gpuNode: " + std::to_string(gpuIndex) + " ";
m_ProcessIndex[gpuIndex] = i;
}
void KFDMultiProcessTest::WaitChildProcesses() {
if (m_IsParent) {
void KFDMultiProcessTest::WaitChildProcesses(unsigned int nodeId) {
int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(nodeId);
if (m_IsParent[gpuIndex]) {
/* Only run by parent process */
int childStatus;
int childExitOkNum = 0;
int size = m_ChildPids.size();
int size = m_ChildPids[gpuIndex].size();
for (HSAuint32 i = 0; i < size; i++) {
pid_t pid = m_ChildPids.front();
pid_t pid = m_ChildPids[gpuIndex].front();
waitpid(pid, &childStatus, 0);
if (WIFEXITED(childStatus) == 1 && WEXITSTATUS(childStatus) == 0)
childExitOkNum++;
m_ChildPids.erase(m_ChildPids.begin());
m_ChildPids[gpuIndex].erase(m_ChildPids[gpuIndex].begin());
}
EXPECT_EQ(childExitOkNum, size);
}
/* Child process or parent process finished successfully */
m_ChildStatus = HSAKMT_STATUS_SUCCESS;
m_ChildStatus[gpuIndex] = HSAKMT_STATUS_SUCCESS;
}
+31 -15
Просмотреть файл
@@ -28,35 +28,51 @@
#include <vector>
#include "KFDBaseComponentTest.hpp"
extern unsigned int g_TestGPUsNum;
// @class KFDMultiProcessTest
// Base class for tests forking multiple child processes
class KFDMultiProcessTest : public KFDBaseComponentTest {
public:
KFDMultiProcessTest(void): m_ChildStatus(HSAKMT_STATUS_ERROR), m_IsParent(true) {}
KFDMultiProcessTest(void) {
for ( int i = 0; i < g_TestGPUsNum; i++) {
m_ChildStatus[i] = HSAKMT_STATUS_ERROR;
m_IsParent[i] = true;
}
}
~KFDMultiProcessTest(void) {
if (!m_IsParent) {
/* Child process has to exit
* otherwise gtest will continue other tests
*/
exit(m_ChildStatus);
for ( int i = 0; i < g_TestGPUsNum; i++) {
if (!m_IsParent[i]) {
/* Child process has to exit
* otherwise gtest will continue other tests
*/
exit(m_ChildStatus[i]);
}
}
try {
WaitChildProcesses();
} catch (...) {}
const std::vector<int> gpuNodes = m_NodeInfo.GetNodesWithGPU();
unsigned gpu_num = gpuNodes.size();
int gpu_node;
for (int i = 0; i < g_TestGPUsNum; i++) {
gpu_node = gpuNodes.at(i);
WaitChildProcesses(gpu_node);
}
} catch (...) {}
}
protected:
void ForkChildProcesses(int nprocesses);
void WaitChildProcesses();
void ForkChildProcesses(unsigned int nodeId, int nprocesses);
void WaitChildProcesses(unsigned int nodeId);
protected: // Members
std::string m_psName;
int m_ProcessIndex;
std::vector<pid_t> m_ChildPids;
HSAKMT_STATUS m_ChildStatus;
bool m_IsParent;
std::string m_psName[MAX_GPU];
int m_ProcessIndex[MAX_GPU];
std::vector<pid_t> m_ChildPids[MAX_GPU];
HSAKMT_STATUS m_ChildStatus[MAX_GPU];
bool m_IsParent[MAX_GPU];
};
#endif // __KFD_MULTI_PROCESS_TEST__H__
+7 -4
Просмотреть файл
@@ -267,20 +267,23 @@ TEST_F(KFDPCSamplingTest, MultiProcPcSamplingTest) {
samples[0].value = 0x100000; /* 1,048,576 usec */
/* Fork the child processes */
ForkChildProcesses(N_PROCESSES);
ForkChildProcesses(defaultGPUNode, N_PROCESSES);
int rn = FindDRMRenderNode(defaultGPUNode);
if (rn < 0) {
LOG() << "Skipping test: Could not find render node for default GPU." << std::endl;
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
return;
}
params.samples = samples;
params.test_name = m_psName;
int gpuIndex = m_NodeInfo.HsaGPUindexFromGpuNode(defaultGPUNode);
params.test_name = m_psName[gpuIndex];
PCSamplingProcRun(&params);
WaitChildProcesses();
WaitChildProcesses(defaultGPUNode);
if (info_buf)
free(info_buf);