Files
rocm-systems/tests/kfdtest/src/KFDCWSRTest.cpp
T
Kent Russell 83d80074f7 Merge gfx90a into amd-staging
Conflicts:
	CMakeLists.txt
	include/hsakmt.h
	src/libhsakmt.h
	src/libhsakmt.ver
	src/queues.c
	src/topology.c
	tests/kfdtest/src/KFDMemoryTest.cpp
	tests/kfdtest/src/KFDTestUtil.hpp

Signed-off-by: Kent Russell <kent.russell@amd.com>
Change-Id: Ic2732e7c0b5e42c1a3a91223f65a65064b602181
2021-03-02 07:48:22 -05:00

283 строки
8.9 KiB
C++

/*
* Copyright (C) 2015-2018 Advanced Micro Devices, Inc. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "KFDCWSRTest.hpp"
#include "Dispatch.hpp"
/* Initial state:
* s[0:1] - 64 bits iteration number; only the lower 32 bits are useful.
* s[2:3] - result buffer base address
* s4 - workgroup id
* v0 - workitem id, always 0 because
* NUM_THREADS_X(number of threads) in workgroup set to 1
* Registers:
* v0 - calculated workitem = v0 + s4 * NUM_THREADS_X, which is s4
* v2 - = s0, 32 bits iteration number
* v[4:5] - corresponding output buf address: s[2:3] + v0 * 4
* v6 - counter
*/
static const char* iterate_isa_gfx8 = \
"\
shader iterate_isa\n\
wave_size(32)\n\
type(CS)\n\
// copy the parameters from scalar registers to vector registers\n\
v_mov_b32 v2, s0 // v[2:3] = s[0:1] \n\
v_mov_b32 v3, s1 // v[2:3] = s[0:1] \n\
v_mov_b32 v0, s4 // use workgroup id as index \n\
v_lshlrev_b32 v0, 2, v0 // v0 *= 4 \n\
v_add_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4 \n\
v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 \n\
v_add_u32 v5, vcc, v5, vcc_lo // v[4:5] = s[2:3] + v0 * 4 \n\
v_mov_b32 v6, 0 \n\
LOOP: \n\
v_add_u32 v6, vcc, 1, v6 \n\
// compare the result value (v6) to iteration value (v2), and \n\
// jump if equal (i.e. if VCC is not zero after the comparison) \n\
v_cmp_lt_u32 vcc, v6, v2 \n\
s_cbranch_vccnz LOOP \n\
flat_store_dword v[4:5], v6 \n\
s_waitcnt vmcnt(0)&lgkmcnt(0) \n\
s_endpgm \n\
end \n\
";
//This shader can be used by gfx9 and gfx10
static const char* iterate_isa_gfx9 = \
"\
shader iterate_isa\n\
wave_size(32)\n\
type(CS)\n\
// copy the parameters from scalar registers to vector registers\n\
v_mov_b32 v2, s0 // v[2:3] = s[0:1] \n\
v_mov_b32 v3, s1 // v[2:3] = s[0:1] \n\
v_mov_b32 v0, s4 // use workgroup id as index \n\
v_lshlrev_b32 v0, 2, v0 // v0 *= 4 \n\
v_add_co_u32 v4, vcc, s2, v0 // v[4:5] = s[2:3] + v0 * 4 \n\
v_mov_b32 v5, s3 // v[4:5] = s[2:3] + v0 * 4 \n\
v_add_co_u32 v5, vcc, v5, vcc_lo // v[4:5] = s[2:3] + v0 * 4 \n\
v_mov_b32 v6, 0 \n\
LOOP: \n\
v_add_co_u32 v6, vcc, 1, v6 \n\
// compare the result value (v6) to iteration value (v2), and \n\
// jump if equal (i.e. if VCC is not zero after the comparison) \n\
v_cmp_lt_u32 vcc, v6, v2 \n\
s_cbranch_vccnz LOOP \n\
flat_store_dword v[4:5], v6 \n\
s_waitcnt vmcnt(0)&lgkmcnt(0) \n\
s_endpgm \n\
end \n\
";
static const char* infinite_isa = \
"\
shader infinite_isa \n\
wave_size(32) \n\
type(CS) \n\
LOOP: \n\
s_branch LOOP \n\
end \n\
";
void KFDCWSRTest::SetUp() {
ROUTINE_START
KFDBaseComponentTest::SetUp();
m_pIsaGen = IsaGenerator::Create(m_FamilyId);
wave_number = 1;
ROUTINE_END
}
void KFDCWSRTest::TearDown() {
ROUTINE_START
if (m_pIsaGen)
delete m_pIsaGen;
m_pIsaGen = NULL;
KFDBaseComponentTest::TearDown();
ROUTINE_END
}
bool isOnEmulator() {
uint32_t isEmuMode = 0;
fscanf_dec("/sys/module/amdgpu/parameters/emu_mode", &isEmuMode);
return isEmuMode;
}
/**
* KFDCWSRTest.BasicTest
*
* This test dispatches the loop_inc_isa shader and lets it run, ensuring its destination pointer gets incremented.
* It then triggers CWSR and ensures the shader stops running.
* It then resumes the shader, ensures that it's running again and terminates it.
*/
TEST_F(KFDCWSRTest, BasicTest) {
TEST_START(TESTPROFILE_RUNALL);
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if (m_FamilyId >= FAMILY_VI) {
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
HsaMemoryBuffer resultBuf1(PAGE_SIZE, defaultGPUNode, true, false, false);
HsaMemoryBuffer resultBuf2(PAGE_SIZE, defaultGPUNode, true, false, false);
int count1 = 40000000;
int count2 = 20000000;
if (isOnEmulator()) {
// Divide the iterator times by 1000 so that the test can
// finish in a reasonable time.
count1 /= 1000;
count2 /= 1000;
LOG() << "On Emulators" << std::endl;
}
unsigned int* result1 = resultBuf1.As<unsigned int*>();
unsigned int* result2 = resultBuf2.As<unsigned int*>();
const char *pIterateIsa;
if (m_FamilyId < FAMILY_AI)
pIterateIsa = iterate_isa_gfx8;
else
pIterateIsa = iterate_isa_gfx9;
m_pIsaGen->CompileShader(pIterateIsa, "iterate_isa", isaBuffer);
PM4Queue queue1, queue2;
ASSERT_SUCCESS(queue1.Create(defaultGPUNode));
Dispatch *dispatch1, *dispatch2;
dispatch1 = new Dispatch(isaBuffer);
dispatch2 = new Dispatch(isaBuffer);
dispatch1->SetArgs(reinterpret_cast<void *>(count1), result1);
dispatch1->SetDim(wave_number, 1, 1);
dispatch2->SetArgs(reinterpret_cast<void *>(count2), result2);
dispatch2->SetDim(wave_number, 1, 1);
// Submit the shader, queue1
dispatch1->Submit(queue1);
// Create queue2 during queue1 still running will trigger the CWSR
ASSERT_SUCCESS(queue2.Create(defaultGPUNode));
// Submit the shader
dispatch2->Submit(queue2);
dispatch1->Sync();
dispatch2->Sync();
// Ensure all the waves complete as expected
int i;
for (i = 0 ; i < wave_number; ++i) {
if (result1[i] != count1) {
LOG() << "Dispatch 1, work item [" << std::dec << i << "] "
<< result1[i] << " != " << count1 << std::endl;
break;
}
if (result2[i] != count2) {
LOG() << "Dispatch 2, work item [" << std::dec << i << "] "
<< result2[i] << " != " << count2 << std::endl;
break;
}
}
EXPECT_EQ(i, wave_number);
EXPECT_SUCCESS(queue1.Destroy());
EXPECT_SUCCESS(queue2.Destroy());
delete dispatch1;
delete dispatch2;
} else {
LOG() << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
}
TEST_END
}
/**
* KFDCWSRTest.InterruptRestore
*
* This test verifies that CP can preempt an HQD while it is restoring a dispatch.
* Create queue 1.
* Start a dispatch on queue 1 which runs indefinitely and fills all CU wave slots.
* Create queue 2, triggering context save on queue 1.
* Start a dispatch on queue 2 which runs indefinitely and fills all CU wave slots.
* Create queue 3, triggering context save and restore on queues 1 and 2.
* Preempt runlist. One or both queues must interrupt context restore to preempt.
*/
TEST_F(KFDCWSRTest, InterruptRestore) {
TEST_START(TESTPROFILE_RUNALL);
int defaultGPUNode = m_NodeInfo.HsaDefaultGPUNode();
if (m_FamilyId >= FAMILY_VI) {
HsaMemoryBuffer isaBuffer(PAGE_SIZE, defaultGPUNode, true/*zero*/, false/*local*/, true/*exec*/);
m_pIsaGen->CompileShader(infinite_isa, "infinite_isa", isaBuffer);
PM4Queue queue1, queue2, queue3;
ASSERT_SUCCESS(queue1.Create(defaultGPUNode));
Dispatch *dispatch1, *dispatch2;
dispatch1 = new Dispatch(isaBuffer);
dispatch2 = new Dispatch(isaBuffer);
dispatch1->SetDim(0x10000, 1, 1);
dispatch2->SetDim(0x10000, 1, 1);
dispatch1->Submit(queue1);
ASSERT_SUCCESS(queue2.Create(defaultGPUNode));
dispatch2->Submit(queue2);
// Give waves time to launch.
Delay(1);
ASSERT_SUCCESS(queue3.Create(defaultGPUNode));
EXPECT_SUCCESS(queue1.Destroy());
EXPECT_SUCCESS(queue2.Destroy());
EXPECT_SUCCESS(queue3.Destroy());
delete dispatch1;
delete dispatch2;
} else {
LOG() << "Skipping test: No CWSR present for family ID 0x" << m_FamilyId << "." << std::endl;
}
TEST_END
}