From 08a91d3db078bda5c1f78e70fe2ca9ea87c63492 Mon Sep 17 00:00:00 2001 From: Lancelot SIX Date: Wed, 22 May 2024 14:38:50 +0100 Subject: [PATCH] trap_handler_gfx12: Do not override STATE_PRIV.BARRIER_COMPLETE The value of STATE_PRIV is captured by the 1st level trap handler, and passed on to the second level trap handler. The value is to be restored before exit. However it is possible for the value of STATE_PRIV.BARRIER_COMPLETE to change while the wave is in the trap handler (all the other waves in the workgroup has signaled the work-gropu barrier), and in this case restoring STATE_PRIV in full would result in STATE_PRIV.BARRIER_COMPLETE to be cleared. Restore every bits of STATE_PRIV except for BARRIER_COMPLETE before return to prevent this race. Change-Id: I76c875bced7d23c58670b28f257d22c933f99fc5 Signed-off-by: Chris Freehill [ROCm/ROCR-Runtime commit: 9e625307d2d22c6dbc43b4d21706620a45999f7e] --- .../core/runtime/trap_handler/trap_handler_gfx12.s | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler_gfx12.s b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler_gfx12.s index 25f76d03b3..2289c57d7d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler_gfx12.s +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler_gfx12.s @@ -63,6 +63,7 @@ .set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 4 .set SQ_WAVE_STATE_PRIV_HALT_BFE , (SQ_WAVE_STATE_PRIV_HALT_SHIFT | (1 << 16)) .set SQ_WAVE_STATE_PRIV_HALT_SHIFT , 14 +.set SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT , 2 .set TRAP_ID_ABORT , 2 .set TRAP_ID_DEBUGTRAP , 3 .set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT) @@ -208,7 +209,9 @@ trap_entry: // Restore SQ_WAVE_STATUS. s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 - s_setreg_b32 hwreg(HW_REG_STATE_PRIV), ttmp12 + s_setreg_b32 hwreg(HW_REG_STATE_PRIV, 0, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT), ttmp12 + s_lshr_b32 ttmp12, ttmp12, (SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1) + s_setreg_b32 hwreg(HW_REG_STATE_PRIV, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1, 32 - SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT - 1), ttmp12 // Return to original (possibly modified) PC. s_rfe_b64 [ttmp0, ttmp1]