kfdtest: Make eviction tests more robust
- Run more graphics command submissions with shorter delay between
them
- Synchronize after every graphics command submission
- Include the big VRAM BO in the BOList of the command submission
to trigger more evictions
- In QueueTest, run AMDGPU command submissions concurrently with
compute shader on the user mode queue
- Submit AMDGPU commands to GFX queue instead of compute queue to
avoid deadlocks between user-mode and kernel-mode queues on the
same pipe
- Allocate slightly less memory from KFD to avoid allocation errors
due to fragmentation or memory leaks in previous tests
- Running only two processes maximizes the number of KFD evictions
(probably because of lower chances of evicting non-KFD BOs)
Change-Id: If05d53f5fcf690b6488998a3f933f120ddaa71ee
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
[ROCm/ROCR-Runtime commit: c8d823eb10]
This commit is contained in:
@@ -30,7 +30,7 @@
|
||||
#include "SDMAQueue.hpp"
|
||||
#include "Dispatch.hpp"
|
||||
|
||||
#define N_PROCESSES (8) /* Number of processes running in parallel, must be at least 2 */
|
||||
#define N_PROCESSES (2) /* Number of processes running in parallel, must be at least 2 */
|
||||
#define ALLOCATE_BUF_SIZE_MB (64)
|
||||
#define ALLOCATE_RETRY_TIMES (3)
|
||||
|
||||
@@ -176,7 +176,7 @@ static inline int amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle
|
||||
return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
|
||||
}
|
||||
|
||||
void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) {
|
||||
void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn, amdgpu_bo_handle handle) {
|
||||
amdgpu_context_handle contextHandle;
|
||||
amdgpu_bo_handle ibResultHandle;
|
||||
void *ibResultCpu;
|
||||
@@ -197,7 +197,7 @@ void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) {
|
||||
&ibResultHandle, &ibResultCpu,
|
||||
&ibResultMcAddress, &vaHandle));
|
||||
|
||||
ASSERT_EQ(0, amdgpu_get_bo_list(m_RenderNodes[rn].device_handle, ibResultHandle, NULL,
|
||||
ASSERT_EQ(0, amdgpu_get_bo_list(m_RenderNodes[rn].device_handle, ibResultHandle, handle,
|
||||
&boList));
|
||||
|
||||
/* Fill Nop cammands in IB */
|
||||
@@ -210,7 +210,7 @@ void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) {
|
||||
ibInfo.size = 16;
|
||||
|
||||
memset(&ibsRequest, 0, sizeof(struct amdgpu_cs_request));
|
||||
ibsRequest.ip_type = AMDGPU_HW_IP_COMPUTE;
|
||||
ibsRequest.ip_type = AMDGPU_HW_IP_GFX;
|
||||
ibsRequest.ring = 0;
|
||||
ibsRequest.number_of_ibs = 1;
|
||||
ibsRequest.ibs = &ibInfo;
|
||||
@@ -218,21 +218,23 @@ void KFDEvictTest::AmdgpuCommandSubmissionComputeNop(int rn) {
|
||||
ibsRequest.fence_info.handle = NULL;
|
||||
|
||||
memset(&fenceStatus, 0, sizeof(struct amdgpu_cs_fence));
|
||||
for (int i = 0; i < ALLOCATE_RETRY_TIMES; i++) {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
ASSERT_EQ(0, amdgpu_cs_submit(contextHandle, 0, &ibsRequest, 1));
|
||||
sleep(1);
|
||||
Delay(50);
|
||||
|
||||
fenceStatus.context = contextHandle;
|
||||
fenceStatus.ip_type = AMDGPU_HW_IP_GFX;
|
||||
fenceStatus.ip_instance = 0;
|
||||
fenceStatus.ring = 0;
|
||||
fenceStatus.fence = ibsRequest.seq_no;
|
||||
|
||||
EXPECT_EQ(0, amdgpu_cs_query_fence_status(&fenceStatus,
|
||||
g_TestTimeOut*1000000,
|
||||
0, &expired));
|
||||
if (!expired)
|
||||
WARN() << "CS did not signal completion" << std::endl;
|
||||
}
|
||||
|
||||
fenceStatus.context = contextHandle;
|
||||
fenceStatus.ip_type = AMDGPU_HW_IP_COMPUTE;
|
||||
fenceStatus.ip_instance = 0;
|
||||
fenceStatus.ring = 0;
|
||||
fenceStatus.fence = ibsRequest.seq_no;
|
||||
|
||||
EXPECT_EQ(0, amdgpu_cs_query_fence_status(&fenceStatus,
|
||||
g_TestTimeOut,
|
||||
0, &expired));
|
||||
|
||||
EXPECT_EQ(0, amdgpu_bo_list_destroy(boList));
|
||||
|
||||
EXPECT_EQ(0, amdgpu_bo_unmap_and_free(ibResultHandle, vaHandle,
|
||||
@@ -331,7 +333,8 @@ TEST_F(KFDEvictTest, BasicTest) {
|
||||
LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB" << std::endl;
|
||||
}
|
||||
|
||||
HSAint32 count = vramSize / vramBufSize / N_PROCESSES;
|
||||
// Use 7/8 of VRAM between all processes
|
||||
HSAuint32 count = vramSize * 7 / (8* vramBufSize * N_PROCESSES);
|
||||
|
||||
LOG() << "Found System RAM of " << std::dec << (GetSysMemSize() >> 20) << "MB" << std::endl;
|
||||
|
||||
@@ -353,7 +356,7 @@ TEST_F(KFDEvictTest, BasicTest) {
|
||||
amdgpu_bo_handle handle;
|
||||
AllocAmdgpuBo(rn, size, handle);
|
||||
|
||||
AmdgpuCommandSubmissionComputeNop(rn);
|
||||
AmdgpuCommandSubmissionComputeNop(rn, handle);
|
||||
|
||||
FreeAmdgpuBo(handle);
|
||||
LOG() << m_psName << "free buffer" << std::endl;
|
||||
@@ -533,7 +536,8 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
LOG() << "Found VRAM of " << std::dec << (vramSize >> 20) << "MB." << std::endl;
|
||||
}
|
||||
|
||||
HSAuint32 count = vramSize / vramBufSize / N_PROCESSES;
|
||||
// Use 7/8 of VRAM between all processes
|
||||
HSAuint32 count = vramSize * 7 / (8 * vramBufSize * N_PROCESSES);
|
||||
|
||||
LOG() << "Found System RAM of " << std::dec << (GetSysMemSize() >> 20) << "MB" << std::endl;
|
||||
|
||||
@@ -568,8 +572,6 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
amdgpu_bo_handle handle;
|
||||
AllocAmdgpuBo(rn, size, handle);
|
||||
|
||||
AmdgpuCommandSubmissionComputeNop(rn);
|
||||
|
||||
unsigned int wavefront_num = pBuffers.size();
|
||||
LOG() << m_psName << "wavefront number " << wavefront_num << std::endl;
|
||||
|
||||
@@ -590,8 +592,7 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
/* Submit the packet and start shader */
|
||||
dispatch0.Submit(pm4Queue);
|
||||
|
||||
/* Doing evict/restore queue test for 5 seconds while queue is running */
|
||||
sleep(5);
|
||||
AmdgpuCommandSubmissionComputeNop(rn, handle);
|
||||
|
||||
/* Uncomment this line for debugging */
|
||||
// LOG() << m_psName << "notify shader to quit" << std::endl;
|
||||
@@ -600,7 +601,7 @@ TEST_F(KFDEvictTest, QueueTest) {
|
||||
addrBuffer.Fill(0x5678);
|
||||
|
||||
/* Wait for shader to finish or timeout if shader has vm page fault */
|
||||
dispatch0.SyncWithStatus(120000);
|
||||
EXPECT_EQ(0, dispatch0.SyncWithStatus(120000));
|
||||
|
||||
EXPECT_SUCCESS(pm4Queue.Destroy());
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ class KFDEvictTest : public KFDLocalMemoryTest {
|
||||
void FreeBuffers(std::vector<void *> &pBuffers, HSAuint64 vramBufSize);
|
||||
void AllocAmdgpuBo(int rn, HSAuint64 vramBufSize, amdgpu_bo_handle &handle);
|
||||
void FreeAmdgpuBo(amdgpu_bo_handle handle);
|
||||
void AmdgpuCommandSubmissionComputeNop(int rn);
|
||||
void AmdgpuCommandSubmissionComputeNop(int rn, amdgpu_bo_handle handle);
|
||||
void ForkChildProcesses(int nprocesses);
|
||||
void WaitChildProcesses();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user