diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s index 0936786c5e..175a4c3e4d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/trap_handler/trap_handler.s @@ -47,6 +47,7 @@ .set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 8 .set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16)) .set SQ_WAVE_STATUS_HALT_SHIFT , 13 +.set SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT , 18 .set SQ_WAVE_STATUS_HALT_BFE , (SQ_WAVE_STATUS_HALT_SHIFT | (1 << 16)) .set SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT , 8 .set SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT , 11 @@ -64,6 +65,7 @@ .set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 4)) .set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 5)) +.set TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT , 31 .set TTMP6_WAVE_STOPPED_SHIFT , 30 .set TTMP6_SAVED_STATUS_HALT_SHIFT , 29 .set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT) @@ -243,12 +245,16 @@ trap_entry: // Halt the wavefront upon restoring STATUS below. s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_HALT_SHIFT + // Set WAVE.SKIP_EXPORT as a maker so the debugger knows the trap handler was + // entered and has decided to halt the wavee. + s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT .if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4) s_bitcmp1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT s_cbranch_scc1 .ttmps_initialized s_mov_b32 ttmp4, 0 s_mov_b32 ttmp5, 0 + s_bitset0_b32 ttmp6, TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT s_bitset1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT .ttmps_initialized: .endif diff --git a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp index 2a3b96f243..9a1682afbf 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/loader/executable.cpp @@ -81,8 +81,10 @@ __attribute__((noinline)) static void _loader_debug_state() { // 6: New trap handler ABI. ttmp6[25:0] contains dispatch index modulo queue size // 7: New trap handler ABI. Send interrupts as a bitmask, coalescing concurrent exceptions. // 8: New trap handler ABI. for gfx940: Initialize ttmp[4:5] if ttmp11[31] == 0. -// 9: New trap handler API. For gfx11: Save PC in ttmp11[22:7] ttmp6[31:0], and park the wave if stopped. -HSA_API r_debug _amdgpu_r_debug = {9, +// 9: New trap handler ABI. For gfx11: Save PC in ttmp11[22:7] ttmp6[31:0], and park the wave if stopped. +// 10: New trap handler ABI. Set status.skip_export when halting the wave. +// For gfx940, set ttmp6[31] = 0 if ttmp11[31] == 0. +HSA_API r_debug _amdgpu_r_debug = {10, nullptr, reinterpret_cast(&_loader_debug_state), r_debug::RT_CONSISTENT,