From 4c8a849772af16cc09275d75befec5d7402039fc Mon Sep 17 00:00:00 2001 From: Lancelot SIX Date: Fri, 1 Dec 2023 16:42:01 +0000 Subject: [PATCH] trap_handler: Set status.skip_export when halting a wave When inspecting waves on architectures where SPI may not initialize TTMP registers, the debugger cannot reliably know if the trap handler was entered and if it saved valuable information in TTMP registers. This patch uses the status.skip_export bit (unused by the compute shaders) to indicate that it got executed before halting a wave. This is done except for gfx940, where ttmp11[31] can be used (as long as TTMP registers are always initialized by SPI for this architecture). It could be possible to be more selective as architectures always initializing TTMP registers do not require this step, but always doing is makes maintenance simpler. Change-Id: I314db6b37772f7daa8bd405e6662a86658d3f5e0 [ROCm/ROCR-Runtime commit: c5db063b2fb6b6e95124f2b13d74c124b0de1a99] --- .../hsa-runtime/core/runtime/trap_handler/trap_handler.s | 6 ++++++ .../rocr-runtime/runtime/hsa-runtime/loader/executable.cpp | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s index 0936786c5e..175a4c3e4d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s @@ -47,6 +47,7 @@ .set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 8 .set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16)) .set SQ_WAVE_STATUS_HALT_SHIFT , 13 +.set SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT , 18 .set SQ_WAVE_STATUS_HALT_BFE , (SQ_WAVE_STATUS_HALT_SHIFT | (1 << 16)) .set SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT , 8 .set SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT , 11 @@ -64,6 +65,7 @@ .set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 4)) .set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 5)) +.set TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT , 31 .set TTMP6_WAVE_STOPPED_SHIFT , 30 .set TTMP6_SAVED_STATUS_HALT_SHIFT , 29 .set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT) @@ -243,12 +245,16 @@ trap_entry: // Halt the wavefront upon restoring STATUS below. s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_HALT_SHIFT + // Set WAVE.SKIP_EXPORT as a maker so the debugger knows the trap handler was + // entered and has decided to halt the wavee. + s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4) s_bitcmp1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT s_cbranch_scc1 .ttmps_initialized s_mov_b32 ttmp4, 0 s_mov_b32 ttmp5, 0 + s_bitset0_b32 ttmp6, TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT s_bitset1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT .ttmps_initialized: .endif diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp index 2a3b96f243..9a1682afbf 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp @@ -81,8 +81,10 @@ __attribute__((noinline)) static void _loader_debug_state() { // 6: New trap handler ABI. ttmp6[25:0] contains dispatch index modulo queue size // 7: New trap handler ABI. Send interrupts as a bitmask, coalescing concurrent exceptions. // 8: New trap handler ABI. for gfx940: Initialize ttmp[4:5] if ttmp11[31] == 0. -// 9: New trap handler API. For gfx11: Save PC in ttmp11[22:7] ttmp6[31:0], and park the wave if stopped. -HSA_API r_debug _amdgpu_r_debug = {9, +// 9: New trap handler ABI. For gfx11: Save PC in ttmp11[22:7] ttmp6[31:0], and park the wave if stopped. +// 10: New trap handler ABI. Set status.skip_export when halting the wave. +// For gfx940, set ttmp6[31] = 0 if ttmp11[31] == 0. +HSA_API r_debug _amdgpu_r_debug = {10, nullptr, reinterpret_cast(&_loader_debug_state), r_debug::RT_CONSISTENT,