From 78e754ca5ba05812ee581758b871ddd94253bd1d Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Thu, 25 Jul 2019 12:02:46 -0400 Subject: [PATCH] KFDTest: Make shader compatiable for gfx9 and gfx10 Remove the CHIP name from the shader ISA and add wave_size(32) to make the same shader can be used for both GFX9 and GFX10 Change-Id: I16ea72f87980c3d9c11298e20c06a0a073fe9a28 Signed-off-by: shaoyunl --- tests/kfdtest/src/KFDCWSRTest.cpp | 31 +------------ tests/kfdtest/src/KFDEvictTest.cpp | 64 +++----------------------- tests/kfdtest/src/KFDMemoryTest.cpp | 70 ++++------------------------- tests/kfdtest/src/KFDQMTest.cpp | 2 + 4 files changed, 18 insertions(+), 149 deletions(-) diff --git a/tests/kfdtest/src/KFDCWSRTest.cpp b/tests/kfdtest/src/KFDCWSRTest.cpp index d06ebc1d26..0fbb425f72 100644 --- a/tests/kfdtest/src/KFDCWSRTest.cpp +++ b/tests/kfdtest/src/KFDCWSRTest.cpp @@ -52,35 +52,10 @@ LOOP:\n\ end\n\ "; +//This shader can be used by gfx9 and gfx10 static const char* iterate_isa_gfx9 = \ "\ shader iterate_isa\n\ -asic(GFX9)\n\ -type(CS)\n\ -/*copy the parameters from scalar registers to vector registers*/\n\ - v_mov_b32 v0, s0\n\ - v_mov_b32 v1, s1\n\ - v_mov_b32 v2, s2\n\ - v_mov_b32 v3, s3\n\ - flat_load_dword v4, v[0:1] slc /*load target iteration value*/\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - v_mov_b32 v5, 0\n\ -LOOP:\n\ - v_add_co_u32 v5, vcc, 1, v5\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - /*compare the result value (v5) to iteration value (v4), and jump if equal (i.e. if VCC is not zero after the comparison)*/\n\ - v_cmp_lt_u32 vcc, v5, v4\n\ - s_cbranch_vccnz LOOP\n\ - flat_store_dword v[2,3], v5\n\ - s_waitcnt vmcnt(0)&lgkmcnt(0)\n\ - s_endpgm\n\ -end\n\ -"; - -static const char* iterate_isa_gfx10 = \ -"\ -shader iterate_isa\n\ -asic(GFX10)\n\ wave_size(32)\n\ type(CS)\n\ /*copy the parameters from scalar registers to vector registers*/\n\ @@ -160,10 +135,8 @@ TEST_F(KFDCWSRTest, BasicTest) { if (m_FamilyId < FAMILY_AI) pIterateIsa = iterate_isa_gfx8; - else if (m_FamilyId < FAMILY_NV) - pIterateIsa = iterate_isa_gfx9; else - pIterateIsa = iterate_isa_gfx10; + pIterateIsa = iterate_isa_gfx9; m_pIsaGen->CompileShader(pIterateIsa, "iterate_isa", isaBuffer); diff --git a/tests/kfdtest/src/KFDEvictTest.cpp b/tests/kfdtest/src/KFDEvictTest.cpp index 01efd86573..b74aedba35 100644 --- a/tests/kfdtest/src/KFDEvictTest.cpp +++ b/tests/kfdtest/src/KFDEvictTest.cpp @@ -364,11 +364,15 @@ TEST_F(KFDEvictTest, BasicTest) { * v[2:3] - address of corresponding local buf address offset: s[0:1] + v0 * 8 * v[4:5] - corresponding output buf address: s[2:3] + v0 * 4 * v[6:7] - local buf address used for read test + * + * This shader can be used by gfx9 and gfx10 + * */ + static const char* gfx9_ReadMemory = "\ shader ReadMemory\n\ - asic(GFX9)\n\ + wave_size(32)\n\ type(CS)\n\ \n\ // compute address of corresponding output buffer\n\ @@ -469,67 +473,11 @@ L_QUIT:\n\ end\n\ "; - -static const char* gfx10_ReadMemory = -"\ - shader ReadMemory\n\ - asic(GFX10)\n\ - wave_size(32)\n\ - type(CS)\n\ - \n\ - // compute address of corresponding output buffer\n\ - v_mov_b32 v0, s4 // use workgroup id as index\n\ - v_lshlrev_b32 v0, 2, v0 // v0 *= 4\n\ - v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4\n\ - v_mov_b32 v5, s3\n\ - v_add_co_u32 v5, vcc, v5, vcc_lo\n\ - \n\ - // compute input buffer offset used to store corresponding local buffer address\n\ - v_lshlrev_b32 v0, 1, v0 // v0 *= 8\n\ - v_add_co_u32 v2, vcc, s0, v0 // v[2:3] = s[0:1] + v0 * 8\n\ - v_mov_b32 v3, s1\n\ - v_add_co_u32 v3, vcc, v3, vcc_lo\n\ - \n\ - // load 64bit local buffer address stored at v[2:3] to v[6:7]\n\ - flat_load_dwordx2 v[6:7], v[2:3] slc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - \n\ - v_mov_b32 v8, 0x5678\n\ - s_movk_i32 s8, 0x5678\n\ -L_REPEAT:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish\n\ - s_cmp_eq_i32 s16, s8\n\ - s_cbranch_scc1 L_QUIT // if notified to quit by host\n\ - // loop read 64M local buffer starting at v[6:7]\n\ - // every 4k page only read once\n\ - v_mov_b32 v9, 0\n\ - v_mov_b32 v10, 0x1000 // 4k page\n\ - v_mov_b32 v11, 0x4000000 // 64M size\n\ - v_mov_b32 v12, v6\n\ - v_mov_b32 v13, v7\n\ -L_LOOP_READ:\n\ - flat_load_dwordx2 v[14:15], v[12:13] slc\n\ - v_add_co_u32 v9, vcc, v9, v10 \n\ - v_add_co_u32 v12, vcc, v12, v10\n\ - v_add_co_u32 v13, vcc, v13, vcc_lo\n\ - v_cmp_lt_u32 vcc, v9, v11\n\ - s_cbranch_vccnz L_LOOP_READ\n\ - s_branch L_REPEAT\n\ -L_QUIT:\n\ - flat_store_dword v[4:5], v8\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory writes to finish\n\ - s_endpgm\n\ - end\n\ -"; - std::string KFDEvictTest::CreateShader() { if (m_FamilyId < FAMILY_AI) return gfx8_ReadMemory; - else if (m_FamilyId < FAMILY_NV) - return gfx9_ReadMemory; else - return gfx10_ReadMemory; + return gfx9_ReadMemory; } /* Evict and restore queue test diff --git a/tests/kfdtest/src/KFDMemoryTest.cpp b/tests/kfdtest/src/KFDMemoryTest.cpp index 58c14c94c0..aa3b75f9b3 100644 --- a/tests/kfdtest/src/KFDMemoryTest.cpp +++ b/tests/kfdtest/src/KFDMemoryTest.cpp @@ -118,23 +118,7 @@ end\n\ const char* gfx9_PollMemory = "\ shader ReadMemory\n\ -asic(GFX9)\n\ -type(CS)\n\ -/* Assume src address in s0, s1 and dst address in s2, s3*/\n\ - s_movk_i32 s18, 0x5678\n\ - LOOP:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 LOOP\n\ - s_store_dword s18, s[2:3], 0x0 glc\n\ - s_endpgm\n\ - end\n\ -"; - -const char* gfx10_PollMemory = -"\ -shader ReadMemory\n\ -asic(GFX10)\n\ +wave_size(32)\n\ type(CS)\n\ /* Assume src address in s0, s1 and dst address in s2, s3*/\n\ s_movk_i32 s18, 0x5678\n\ @@ -159,7 +143,7 @@ type(CS)\n\ const char* gfx9_CopyOnSignal = "\ shader CopyOnSignal\n\ -asic(GFX9)\n\ +wave_size(32)\n\ type(CS)\n\ /* Assume input buffer in s0, s1 */\n\ s_mov_b32 s18, 0xcafe\n\ @@ -175,27 +159,6 @@ POLLSIGNAL:\n\ end\n\ "; -const char* gfx10_CopyOnSignal = -"\ -shader CopyOnSignal\n\ -asic(GFX10)\n\ -type(CS)\n\ -/* Assume input buffer in s0, s1 */\n\ - s_mov_b32 s18, 0xcafe\n\ -POLLSIGNAL:\n\ - s_load_dword s16, s[0:1], 0x0 glc\n\ - s_cmp_eq_i32 s16, s18\n\ - s_cbranch_scc0 POLLSIGNAL\n\ - s_load_dword s17, s[0:1], 0x4 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_store_dword s17, s[0:1], 0x8 glc\n\ - s_waitcnt vmcnt(0) & lgkmcnt(0)\n\ - s_endpgm\n\ - end\n\ -"; - - - /* Input0: A buffer of at least 2 dwords. * DW0: used as a signal. Write 0xcafe to signal * DW1: Write to this buffer for other device to read. @@ -204,7 +167,7 @@ POLLSIGNAL:\n\ const char* gfx9_WriteAndSignal = "\ shader WriteAndSignal\n\ -asic(GFX9)\n\ +wave_size(32)\n\ type(CS)\n\ /* Assume input buffer in s0, s1 */\n\ s_mov_b32 s18, 0xbeef\n\ @@ -217,21 +180,7 @@ type(CS)\n\ end\n\ "; -const char* gfx10_WriteAndSignal = -"\ -shader WriteAndSignal\n\ -asic(GFX10)\n\ -type(CS)\n\ -/* Assume input buffer in s0, s1 */\n\ - s_mov_b32 s18, 0xbeef\n\ - s_store_dword s18, s[0:1], 0x4 glc\n\ - s_mov_b32 s18, 0x1\n\ - s_store_dword s18, s[2:3], 0 glc\n\ - s_mov_b32 s18, 0xcafe\n\ - s_store_dword s18, s[0:1], 0x0 glc\n\ - s_endpgm\n\ - end\n\ -"; +//These gfx9_PullMemory, gfx9_CopyOnSignal, gfx9_WriteAndSignal shaders can be used by both gfx9 and gfx10 void KFDMemoryTest::SetUp() { ROUTINE_START @@ -364,7 +313,7 @@ TEST_F(KFDMemoryTest, MapUnmapToNodes) { HsaMemoryBuffer srcBuffer(PAGE_SIZE, defaultGPUNode); HsaMemoryBuffer dstBuffer(PAGE_SIZE, defaultGPUNode); - m_pIsaGen->CompileShader((m_FamilyId < FAMILY_NV) ? gfx9_PollMemory : gfx10_PollMemory, "ReadMemory", isaBuffer); + m_pIsaGen->CompileShader(gfx9_PollMemory, "ReadMemory", isaBuffer); PM4Queue pm4Queue; ASSERT_SUCCESS(pm4Queue.Create(defaultGPUNode)); @@ -1961,8 +1910,7 @@ TEST_F(KFDMemoryTest, HostHdpFlush) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(defaultGPUNode)); HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader((m_FamilyId < FAMILY_NV) ? gfx9_CopyOnSignal : gfx10_CopyOnSignal, - "CopyOnSignal", isaBuffer); + m_pIsaGen->CompileShader(gfx9_CopyOnSignal, "CopyOnSignal", isaBuffer); Dispatch dispatch0(isaBuffer); dispatch0.SetArgs(buffer, NULL); dispatch0.Submit(queue); @@ -2081,8 +2029,7 @@ TEST_F(KFDMemoryTest, DeviceHdpFlush) { PM4Queue queue; ASSERT_SUCCESS(queue.Create(nodes[0])); HsaMemoryBuffer isaBuffer(PAGE_SIZE, nodes[0], true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader((queue.GetFamilyId() < FAMILY_NV) ? gfx9_CopyOnSignal : gfx10_CopyOnSignal, - "CopyOnSignal", isaBuffer); + m_pIsaGen->CompileShader(gfx9_CopyOnSignal, "CopyOnSignal", isaBuffer); Dispatch dispatch(isaBuffer); dispatch.SetArgs(buffer, NULL); dispatch.Submit(queue); @@ -2090,8 +2037,7 @@ TEST_F(KFDMemoryTest, DeviceHdpFlush) { PM4Queue queue0; ASSERT_SUCCESS(queue0.Create(nodes[1])); HsaMemoryBuffer isaBuffer0(PAGE_SIZE, nodes[1], true/*zero*/, false/*local*/, true/*exec*/); - m_pIsaGen->CompileShader((queue0.GetFamilyId() < FAMILY_NV) ? gfx9_WriteAndSignal : gfx10_WriteAndSignal, - "WriteAndSignal", isaBuffer0); + m_pIsaGen->CompileShader(gfx9_WriteAndSignal, "WriteAndSignal", isaBuffer0); Dispatch dispatch0(isaBuffer0); dispatch0.SetArgs(buffer, mmioBase); dispatch0.Submit(queue0); diff --git a/tests/kfdtest/src/KFDQMTest.cpp b/tests/kfdtest/src/KFDQMTest.cpp index 942f426ff7..48e4fbb098 100644 --- a/tests/kfdtest/src/KFDQMTest.cpp +++ b/tests/kfdtest/src/KFDQMTest.cpp @@ -489,10 +489,12 @@ TEST_F(KFDQMTest, OverSubscribeCpQueues) { /* A simple isa loop program with dense mathematic operations * s1 controls the number iterations of the loop + * This shader can be used by GFX8, GFX9 and GFX10 */ static const char *loop_isa = \ "\ shader loop_isa\n\ +wave_size(32)\n\ type(CS)\n\ s_movk_i32 s0, 0x0008\n\ s_movk_i32 s1, 0x00ff\n\