From 00da82f951c181f1f44605fca2b65c65a6821912 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Wed, 22 Apr 2020 20:33:36 -0700 Subject: [PATCH] Add debugger support for wave halted at launch New trap handler ABI: Record in ttmp11[8:7] the event that caused the trap handler to be entered. We currently record 2 events, trap_raised if an s_trap instruction was executed, or excp_raised if an exception (MEM_VIOL or ILLEGAL_INST) was raised. Change-Id: Ie278c8277437b3b67c2737dcd1a12fe6511df428 --- .../hsa-runtime/core/inc/amd_gpu_shaders.h | 111 ++++++++++-------- runtime/hsa-runtime/loader/executable.cpp | 5 +- 2 files changed, 66 insertions(+), 50 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h b/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h index c8c4243bae..1711b6c85b 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h @@ -125,7 +125,9 @@ static const unsigned int kCodeTrapHandler9[] = { .set MAX_NUM_DOORBELLS_MASK , ((1 << 10) - 1) .set SENDMSG_M0_DOORBELL_ID_BITS , 12 .set SENDMSG_M0_DOORBELL_ID_MASK , ((1 << SENDMSG_M0_DOORBELL_ID_BITS) - 1) - .set TTMP11_DEBUG_TRAP_BIT , 7 + .set TTMP11_TRAP_RAISED_BIT , 7 + .set TTMP11_EXCP_RAISED_BIT , 8 + .set TTMP11_EVENTS_MASK , (1 << TTMP11_TRAP_RAISED_BIT) | (1 << TTMP11_EXCP_RAISED_BIT) .set DEBUG_INTERRUPT_CONTEXT_ID_BIT , 23 .set INSN_S_ENDPGM_OPCODE , 0xBF810000 @@ -151,9 +153,9 @@ static const unsigned int kCodeTrapHandler9[] = { // ttmp14 = TMA[31:0] // ttmp15 = TMA[63:32] // gfx9: - // ttmp11 = SQ_WAVE_IB_STS[20:15], 0[17:0], DebugTrap[0], NoScratch[0], WaveIdInWG[5:0] + // ttmp11 = SQ_WAVE_IB_STS[20:15], 0[16:0], TrapRaised[0], ExcpRaised[0], NoScratch[0], WaveIdInWG[5:0] // gfx10: - // ttmp11 = SQ_WAVE_IB_STS[25], SQ_WAVE_IB_STS[21:15], 0[15:0], DebugTrap[0], NoScratch[0], WaveIdInWG[5:0] + // ttmp11 = SQ_WAVE_IB_STS[25], SQ_WAVE_IB_STS[21:15], 0[14:0], TrapRaised[0], ExcpRaised[0], NoScratch[0], WaveIdInWG[5:0] .macro mGetDoorbellId s_mov_b32 exec_lo, 0x80000000 @@ -200,9 +202,12 @@ static const unsigned int kCodeTrapHandler9[] = { s_cbranch_scc1 .excp_raised // Otherwise trap entered due to single step exception. - .signal_debugger: - s_bitset1_b32 ttmp11, TTMP11_DEBUG_TRAP_BIT + s_branch .signal_debugger + .signal_trap_debugger: + s_bitset1_b32 ttmp11, TTMP11_TRAP_RAISED_BIT + + .signal_debugger: // Fetch doorbell index for our queue. s_mov_b32 ttmp2, exec_lo s_mov_b32 ttmp3, exec_hi @@ -215,7 +220,7 @@ static const unsigned int kCodeTrapHandler9[] = { s_mov_b32 exec_lo, ttmp2 // Set the debug interrupt context id. - // FIXME: Make conditional on TTMP11_DEBUG_TRAP_BIT when exceptions are handled. + // FIXME: Make conditional when exceptions are handled. s_bitset1_b32 ttmp3, DEBUG_INTERRUPT_CONTEXT_ID_BIT // Send an interrupt to trigger event notification. @@ -244,12 +249,12 @@ static const unsigned int kCodeTrapHandler9[] = { s_and_b32 ttmp3, ttmp2, (SQ_WAVE_TRAPSTS_MEM_VIOL_MASK | SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK) s_cmp_eq_u32 ttmp3, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK s_mov_b32 ttmp3, SIGNAL_CODE_MEM_VIOL - s_cbranch_scc1 .signal_error + s_cbranch_scc1 .signal_excp_error // If illegal instruction then signal queue error. s_and_b32 ttmp3, ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK s_mov_b32 ttmp3, SIGNAL_CODE_ILLEGAL_INST - s_cbranch_scc1 .signal_error + s_cbranch_scc1 .signal_excp_error // Otherwise (memory violation with XNACK error) return to shader. s_branch .exit_trap @@ -257,18 +262,25 @@ static const unsigned int kCodeTrapHandler9[] = { .trap_raised: // If debugger trap (s_trap >= 3) then signal debugger. s_cmp_ge_u32 ttmp2, 0x3; - s_cbranch_scc1 .signal_debugger + s_cbranch_scc1 .signal_trap_debugger // If llvm.trap (s_trap 2) then signal queue error. s_cmp_eq_u32 ttmp2, 0x2 s_mov_b32 ttmp3, SIGNAL_CODE_LLVM_TRAP - s_cbranch_scc1 .signal_error + s_cbranch_scc1 .signal_trap_error // For other traps advance PC and return to shader. s_add_u32 ttmp0, ttmp0, 0x4 s_addc_u32 ttmp1, ttmp1, 0x0 s_branch .exit_trap + .signal_trap_error: + s_bitset1_b32 ttmp11, TTMP11_TRAP_RAISED_BIT + s_branch .signal_error + + .signal_excp_error: + s_bitset1_b32 ttmp11, TTMP11_EXCP_RAISED_BIT + .signal_error: // FIXME: don't trash ttmp4/ttmp5 when exception handling is unified. s_mov_b32 ttmp4, ttmp3 @@ -321,26 +333,26 @@ static const unsigned int kCodeTrapHandler9[] = { .exit_trap: mExitTrap */ - - 0x92eeff6d, 0x00080010, 0xbf850036, 0xb8eef803, 0x866fff6e, 0x00000900, - 0xbf850026, 0xbef71a87, 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, - 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x867eff7e, - 0x00000fff, 0xbeef007e, 0xbefe006e, 0xbeef1a97, 0xbeee007c, 0xbefc006f, - 0xbf800000, 0xbf900001, 0xbefc006e, 0x866dff6d, 0x0000ffff, 0xc0021bb6, - 0x00000000, 0xbf8cc07f, 0xbf06ff6e, 0xbf810000, 0xbf850002, 0x8778ff78, - 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x001f8000, 0xb96ef807, 0x86fe7e7e, - 0x86ea6a6a, 0xb978f802, 0xbe801f6c, 0x866fff6e, 0x10000100, 0xbf06ff6f, - 0x00000100, 0xbeef00ff, 0x20000000, 0xbf85000e, 0x866fff6e, 0x00000800, - 0xbeef00f4, 0xbf85000a, 0xbf82002f, 0xbf09836e, 0xbf85ffcc, 0xbf06826e, - 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c, 0x826d806d, 0xbf820026, - 0xbef0006f, 0xbefe00ff, 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, - 0xbf84fffd, 0x866eff7e, 0x000003ff, 0x8e6e836e, 0xc0051bbd, 0x0000006e, - 0xbf8cc07f, 0xc0071bb7, 0x000000c0, 0xbf8cc07f, 0xbef10080, 0xc2831c37, - 0x00000008, 0xbf8cc07f, 0x87707170, 0xbf85000e, 0xc0071c37, 0x00000010, - 0xbf8cc07f, 0x86f07070, 0xbf840009, 0xc0031bb7, 0x00000018, 0xbf8cc07f, - 0xc0431bb8, 0x00000000, 0xbf8cc07f, 0xbefc0080, 0xbf800000, 0xbf900001, + 0x92eeff6d, 0x00080010, 0xbf850037, 0xb8eef803, 0x866fff6e, 0x00000900, + 0xbf850027, 0xbf820001, 0xbef71a87, 0xbeee007e, 0xbeef007f, 0xbefe00ff, + 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, + 0x867eff7e, 0x00000fff, 0xbeef007e, 0xbefe006e, 0xbeef1a97, 0xbeee007c, + 0xbefc006f, 0xbf800000, 0xbf900001, 0xbefc006e, 0x866dff6d, 0x0000ffff, + 0xc0021bb6, 0x00000000, 0xbf8cc07f, 0xbf06ff6e, 0xbf810000, 0xbf850002, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x001f8000, 0xb96ef807, - 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, + 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, 0x866fff6e, 0x10000100, + 0xbf06ff6f, 0x00000100, 0xbeef00ff, 0x20000000, 0xbf850010, 0x866fff6e, + 0x00000800, 0xbeef00f4, 0xbf85000c, 0xbf820032, 0xbf09836e, 0xbf85ffcc, + 0xbf06826e, 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c, 0x826d806d, + 0xbf820029, 0xbef71a87, 0xbf820001, 0xbef71a88, 0xbef0006f, 0xbefe00ff, + 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0x866eff7e, + 0x000003ff, 0x8e6e836e, 0xc0051bbd, 0x0000006e, 0xbf8cc07f, 0xc0071bb7, + 0x000000c0, 0xbf8cc07f, 0xbef10080, 0xc2831c37, 0x00000008, 0xbf8cc07f, + 0x87707170, 0xbf85000e, 0xc0071c37, 0x00000010, 0xbf8cc07f, 0x86f07070, + 0xbf840009, 0xc0031bb7, 0x00000018, 0xbf8cc07f, 0xc0431bb8, 0x00000000, + 0xbf8cc07f, 0xbefc0080, 0xbf800000, 0xbf900001, 0x8778ff78, 0x00002000, + 0x8f6e8b77, 0x866eff6e, 0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, + 0xb978f802, 0xbe801f6c }; static const unsigned int kCodeCopyAligned8[] = { @@ -456,26 +468,27 @@ static const unsigned int kCodeFill10[] = { }; static const unsigned int kCodeTrapHandler10[] = { - 0x93eeff6d, 0x00080010, 0xbf85003a, 0xb96ef803, 0x876fff6e, 0x00000900, - 0xbf85002a, 0xbef71d87, 0xbeee037e, 0xbeef037f, 0xbefe03ff, 0x80000000, - 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f, 0x877eff7e, - 0x00000fff, 0xbeef037e, 0xbefe036e, 0xbeef1d97, 0xbeee037c, 0xbefc036f, - 0xbf800000, 0xbf900001, 0xbefc036e, 0x876dff6d, 0x0000ffff, 0xf4001bb6, - 0xfa000000, 0xbf8cc07f, 0xbf06ff6e, 0xbf810000, 0xbf850002, 0x8878ff78, - 0x00002000, 0x906e8977, 0x876fff6e, 0x003f8000, 0x906e8677, 0x876eff6e, - 0x02000000, 0x886e6f6e, 0xb9eef807, 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, - 0xbe80226c, 0x876fff6e, 0x10000100, 0xbf06ff6f, 0x00000100, 0xbeef03ff, - 0x20000000, 0xbf85000e, 0x876fff6e, 0x00000800, 0xbeef03f4, 0xbf85000a, - 0xbf82002f, 0xbf09836e, 0xbf85ffc8, 0xbf06826e, 0xbeef03ff, 0x80000000, - 0xbf850003, 0x806c846c, 0x826d806d, 0xbf820026, 0xbef0036f, 0xbefe03ff, - 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0x876eff7e, - 0x000003ff, 0x8f6e836e, 0xf4051bbd, 0xdc000000, 0xbf8cc07f, 0xf4051bb7, - 0xfa0000c0, 0xbf8cc07f, 0xbef10380, 0xf6811c37, 0xfa000008, 0xbf8cc07f, - 0x88707170, 0xbf85000e, 0xf4051c37, 0xfa000010, 0xbf8cc07f, 0x87f07070, - 0xbf840009, 0xf4011bb7, 0xfa000018, 0xbf8cc07f, 0xf4411bb8, 0xfa000000, - 0xbf8cc07f, 0xbefc0380, 0xbf800000, 0xbf900001, 0x8878ff78, 0x00002000, - 0x906e8977, 0x876fff6e, 0x003f8000, 0x906e8677, 0x876eff6e, 0x02000000, - 0x886e6f6e, 0xb9eef807, 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c, + 0x93eeff6d, 0x00080010, 0xbf85003b, 0xb96ef803, 0x876fff6e, 0x00000900, + 0xbf85002b, 0xbf820001, 0xbef71d87, 0xbeee037e, 0xbeef037f, 0xbefe03ff, + 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f, + 0x877eff7e, 0x00000fff, 0xbeef037e, 0xbefe036e, 0xbeef1d97, 0xbeee037c, + 0xbefc036f, 0xbf800000, 0xbf900001, 0xbefc036e, 0x876dff6d, 0x0000ffff, + 0xf4001bb6, 0xfa000000, 0xbf8cc07f, 0xbf06ff6e, 0xbf810000, 0xbf850002, + 0x8878ff78, 0x00002000, 0x906e8977, 0x876fff6e, 0x003f8000, 0x906e8677, + 0x876eff6e, 0x02000000, 0x886e6f6e, 0xb9eef807, 0x87fe7e7e, 0x87ea6a6a, + 0xb9f8f802, 0xbe80226c, 0x876fff6e, 0x10000100, 0xbf06ff6f, 0x00000100, + 0xbeef03ff, 0x20000000, 0xbf850010, 0x876fff6e, 0x00000800, 0xbeef03f4, + 0xbf85000c, 0xbf820032, 0xbf09836e, 0xbf85ffc8, 0xbf06826e, 0xbeef03ff, + 0x80000000, 0xbf850003, 0x806c846c, 0x826d806d, 0xbf820029, 0xbef71d87, + 0xbf820001, 0xbef71d88, 0xbef0036f, 0xbefe03ff, 0x80000000, 0xbf90000a, + 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0x876eff7e, 0x000003ff, 0x8f6e836e, + 0xf4051bbd, 0xdc000000, 0xbf8cc07f, 0xf4051bb7, 0xfa0000c0, 0xbf8cc07f, + 0xbef10380, 0xf6811c37, 0xfa000008, 0xbf8cc07f, 0x88707170, 0xbf85000e, + 0xf4051c37, 0xfa000010, 0xbf8cc07f, 0x87f07070, 0xbf840009, 0xf4011bb7, + 0xfa000018, 0xbf8cc07f, 0xf4411bb8, 0xfa000000, 0xbf8cc07f, 0xbefc0380, + 0xbf800000, 0xbf900001, 0x8878ff78, 0x00002000, 0x906e8977, 0x876fff6e, + 0x003f8000, 0x906e8677, 0x876eff6e, 0x02000000, 0x886e6f6e, 0xb9eef807, + 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c, }; } // namespace amd diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index cdfa20774e..25c30736ac 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -72,7 +72,10 @@ __attribute__((noinline)) static void _loader_debug_state() { static volatile int function_needs_a_side_effect = 0; function_needs_a_side_effect ^= 1; } -HSA_API r_debug _amdgpu_r_debug = {1, +// r_version history: +// 1: Initial debug protocol +// 2: New trap handler ABI. The reason for halting a wave is recorded in ttmp11[8:7]. +HSA_API r_debug _amdgpu_r_debug = {2, nullptr, reinterpret_cast(&_loader_debug_state), r_debug::RT_CONSISTENT,