From b2d09b7e8dbccfb30c5bd91e5cb8847421d23f49 Mon Sep 17 00:00:00 2001 From: David Belanger Date: Wed, 26 Jun 2024 15:53:42 -0400 Subject: [PATCH] kfdtest: Fix DeviceHdpFlush on GFX12 Fix register COMPUTE_PGM_RSRC2 in Dispatch code. Bit 6 (called TRAP_PRESENT on pre-GFX12) should not be set on GFX12 as it has a different meaning (DYNAMIC_VGPR). Minor instructions changes for CopyOnSignalIsa and WriteAndSignalIsa shaders. Change-Id: Ib4e75e3c92f220210bc45778738d81b91efb9d5e Signed-off-by: David Belanger Signed-off-by: Chris Freehill [ROCm/ROCR-Runtime commit: 611911020c68b7b2f8565e96bd5671766a10ff77] --- .../libhsakmt/tests/kfdtest/src/Dispatch.cpp | 28 +++++++++++------ .../tests/kfdtest/src/ShaderStore.cpp | 30 ++++++++++++------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/Dispatch.cpp b/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/Dispatch.cpp index b35484d733..1b4d08e949 100644 --- a/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/Dispatch.cpp +++ b/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/Dispatch.cpp @@ -127,13 +127,30 @@ void Dispatch::BuildIb() { 0, // COMPUTE_PERFCOUNT_ENABLE }; + /* + * For some special asics in the list of DEGFX11_12113 + * COMPUTE_PGM_RSRC needs priv=1 to prevent hardware traps + */ + const bool priv = m_NeedCwsrWA; + + unsigned int pgmRsrc1 = + (0xc0 << COMPUTE_PGM_RSRC1__FLOAT_MODE__SHIFT) | + ((m_SpiPriority & 3) << COMPUTE_PGM_RSRC1__PRIORITY__SHIFT) | + (priv << COMPUTE_PGM_RSRC1__PRIV__SHIFT) | + ((m_FamilyId < FAMILY_GFX12) ? (0x2 << COMPUTE_PGM_RSRC1__SGPRS__SHIFT) : 0) | + (0x4 << COMPUTE_PGM_RSRC1__VGPRS__SHIFT); // 4 * 8 = 32 VGPRs + unsigned int pgmRsrc2 = 0; pgmRsrc2 |= (m_ScratchEn << COMPUTE_PGM_RSRC2__SCRATCH_EN__SHIFT) & COMPUTE_PGM_RSRC2__SCRATCH_EN_MASK; pgmRsrc2 |= ((m_scratch_base ? 6 : 4) << COMPUTE_PGM_RSRC2__USER_SGPR__SHIFT) & COMPUTE_PGM_RSRC2__USER_SGPR_MASK; - pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT) + + if (m_FamilyId < FAMILY_GFX12) { + pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT) & COMPUTE_PGM_RSRC2__TRAP_PRESENT_MASK; + } + pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TGID_X_EN__SHIFT) & COMPUTE_PGM_RSRC2__TGID_X_EN_MASK; pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__TIDIG_COMP_CNT__SHIFT) @@ -143,15 +160,8 @@ void Dispatch::BuildIb() { pgmRsrc2 |= (1 << COMPUTE_PGM_RSRC2__EXCP_EN_MSB__SHIFT) & COMPUTE_PGM_RSRC2__EXCP_EN_MSB_MASK; - /* - * For some special asics in the list of DEGFX11_12113 - * COMPUTE_PGM_RSRC needs priv=1 to prevent hardware traps - */ - const bool priv = m_NeedCwsrWA; const unsigned int COMPUTE_PGM_RSRC[] = { - // PGM_RSRC1 = { VGPRS: 16 SGPRS: 16 PRIORITY: m_SpiPriority FLOAT_MODE: c0 - // PRIV: 0 (1 for GFX11) DX10_CLAMP: 0 DEBUG_MODE: 0 IEEE_MODE: 0 BULKY: 0 CDBG_USER: 0 } - 0x000c0084 | ((m_SpiPriority & 3) << 10) | (priv << 20), + pgmRsrc1, pgmRsrc2 }; diff --git a/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/ShaderStore.cpp b/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/ShaderStore.cpp index 9ce905b75c..20681fe173 100644 --- a/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/ShaderStore.cpp +++ b/projects/rocr-runtime/libhsakmt/tests/kfdtest/src/ShaderStore.cpp @@ -318,16 +318,15 @@ const char *CopyOnSignalIsa = .if (.amdgcn.gfx_generation_number >= 12) POLLSIGNAL: - s_load_dword s16, s[0:1], 0x0 scope:SCOPE_CU + s_load_dword s16, s[0:1], 0x0 scope:SCOPE_SYS s_cmp_eq_i32 s16, s18 s_cbranch_scc0 POLLSIGNAL - s_load_dword s17, s[0:1], 0x4 scope:SCOPE_CU + s_load_dword s17, s[0:1], 0x4 scope:SCOPE_SYS s_wait_kmcnt 0 - v_mov_b32 v2, s17 - flat_store_dword v[4:5], v2 scope:SCOPE_CU - s_wait_storecnt 0 + v_mov_b32 v2, s17 + flat_store_dword v[4:5], v2 scope:SCOPE_SYS .else POLLSIGNAL: @@ -442,12 +441,21 @@ const char *WriteAndSignalIsa = v_mov_b32 v3, s3 v_mov_b32 v4, s4 v_mov_b32 v5, s5 - v_mov_b32 v18, 0xbeef - flat_store_dword v[4:5], v18 glc - v_mov_b32 v18, 0x1 - flat_store_dword v[2:3], v18 glc - v_mov_b32 v18, 0xcafe - flat_store_dword v[0:1], v18 glc + .if (.amdgcn.gfx_generation_number >= 12) + v_mov_b32 v18, 0xbeef + flat_store_dword v[4:5], v18 scope:SCOPE_SYS + v_mov_b32 v18, 0x1 + flat_store_dword v[2:3], v18 scope:SCOPE_SYS + v_mov_b32 v18, 0xcafe + flat_store_dword v[0:1], v18 scope:SCOPE_SYS + .else + v_mov_b32 v18, 0xbeef + flat_store_dword v[4:5], v18 glc + v_mov_b32 v18, 0x1 + flat_store_dword v[2:3], v18 glc + v_mov_b32 v18, 0xcafe + flat_store_dword v[0:1], v18 glc + .endif .else s_mov_b32 s18, 0xbeef s_store_dword s18, s[0:1], 0x4 glc