From 8aec53969fc129ccc27c7b369e985b980dd70ee0 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Thu, 21 Jan 2021 11:51:37 -0800 Subject: [PATCH] Don't terminate waves halted at s_endpgm To support single stepping the instruction preceding an s_endpgm, unwind the PC by 8 bytes and set ttmp11[9] to notify the debugger that the wave is halted with a modified PC. Bump the debug r_version for this new trap handler ABI. Change-Id: I55e4e0d65576f92da14a336266c31c513baab547 --- .../hsa-runtime/core/inc/amd_gpu_shaders.h | 62 +++++++++---------- runtime/hsa-runtime/loader/executable.cpp | 3 +- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h b/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h index 72e0875b7a..674ad7d00d 100644 --- a/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h +++ b/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h @@ -130,6 +130,7 @@ static const unsigned int kCodeTrapHandler9[] = { .set SENDMSG_M0_DOORBELL_ID_MASK , ((1 << SENDMSG_M0_DOORBELL_ID_BITS) - 1) .set TTMP11_TRAP_RAISED_BIT , 7 .set TTMP11_EXCP_RAISED_BIT , 8 + .set TTMP11_HALTED_AT_ENDPGM , 9 .set TTMP11_EVENTS_MASK , (1 << TTMP11_TRAP_RAISED_BIT) | (1 << TTMP11_EXCP_RAISED_BIT) .set INSN_S_ENDPGM_OPCODE , 0xBF810000 .set INSN_S_ENDPGM_MASK , 0xFFFF0000 @@ -252,14 +253,12 @@ static const unsigned int kCodeTrapHandler9[] = { s_cmp_eq_u32 ttmp2, INSN_S_ENDPGM_OPCODE s_cbranch_scc0 .halt_wave - // Since the 1st level trap handler calls the 2nd level handler when - // (mode.debug_en && !status.halt), we must clear mode.debug_en if we - // don't want to re-enter this handler indefinitely. - s_mov_b32 ttmp2, 0 - s_setreg_b32 hwreg(HW_REG_MODE, SQ_WAVE_MODE_DEBUG_EN_SHIFT, 1), ttmp2 - - s_and_b32 ttmp12, ttmp12, ~SQ_WAVE_STATUS_HALT_MASK - mExitTrap + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. The debugger compensates for this. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + s_bitset1_b32 ttmp11, TTMP11_HALTED_AT_ENDPGM + s_branch .halt_wave .endif .excp_raised: @@ -369,28 +368,27 @@ static const unsigned int kCodeTrapHandler9[] = { .exit_trap: mExitTrap */ - 0x92eeff6d, 0x00080010, 0xbf850040, 0xb8eef803, 0x866fff6e, 0x00000900, - 0xbf85002f, 0x866fff6e, 0x00007080, 0xbf840003, 0xbef71a88, 0xbf820001, + 0x92eeff6d, 0x00080010, 0xbf850038, 0xb8eef803, 0x866fff6e, 0x00000900, + 0xbf850027, 0x866fff6e, 0x00007080, 0xbf840003, 0xbef71a88, 0xbf820001, 0xbef71a87, 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x867eff7e, 0x00000fff, 0xbeef007e, 0xbefe006e, 0xbeef1a97, 0xbeee007c, 0xbefc006f, 0xbf800000, 0xbf900001, 0xbefc006e, 0x866dff6d, 0x0000ffff, 0xc0021bb6, 0x00000000, - 0xbf8cc07f, 0x866eff6e, 0xffff0000, 0xbf06ff6e, 0xbf810000, 0xbf84004d, - 0xbeee0080, 0xb96e02c1, 0x8678ff78, 0xffffdfff, 0x8f6e8b77, 0x866eff6e, - 0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, - 0xbef71a88, 0x866fff6e, 0x10000100, 0xbf06ff6f, 0x00000100, 0xbeef00ff, - 0x20000000, 0xbf85000f, 0x866fff6e, 0x00000800, 0xbeef00f4, 0xbf85000b, - 0xbf820034, 0xbf09836e, 0xbf85ffc7, 0xbf06826e, 0xbeef00ff, 0x80000000, - 0xbf850003, 0x806c846c, 0x826d806d, 0xbf82002d, 0xbef71a87, 0xbef0006f, - 0xbeee007e, 0xbeef007f, 0xbefe00ff, 0x80000000, 0xbf90000a, 0xbf800007, - 0xbf0c9f7e, 0xbf84fffd, 0xbeff006f, 0x867eff7e, 0x000003ff, 0x8e6f837e, - 0xbefe006e, 0xc0051bbd, 0x0000006f, 0xbf8cc07f, 0xc0071bb7, 0x000000c0, - 0xbf8cc07f, 0xbef10080, 0xc2831c37, 0x00000008, 0xbf8cc07f, 0x87707170, - 0xbf85000e, 0xc0071c37, 0x00000010, 0xbf8cc07f, 0x86f07070, 0xbf840009, - 0xc0031bb7, 0x00000018, 0xbf8cc07f, 0xc0431bb8, 0x00000000, 0xbf8cc07f, - 0xbefc0080, 0xbf800000, 0xbf900001, 0xbef00080, 0xbef10080, 0x8778ff78, - 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x001f8000, 0xb96ef807, 0x86fe7e7e, - 0x86ea6a6a, 0xb978f802, 0xbe801f6c, + 0xbf8cc07f, 0x866eff6e, 0xffff0000, 0xbf06ff6e, 0xbf810000, 0xbf840045, + 0x80ec886c, 0x82ed806d, 0xbef71a89, 0xbf820041, 0xbef71a88, 0x866fff6e, + 0x10000100, 0xbf06ff6f, 0x00000100, 0xbeef00ff, 0x20000000, 0xbf85000f, + 0x866fff6e, 0x00000800, 0xbeef00f4, 0xbf85000b, 0xbf820034, 0xbf09836e, + 0xbf85ffcf, 0xbf06826e, 0xbeef00ff, 0x80000000, 0xbf850003, 0x806c846c, + 0x826d806d, 0xbf82002d, 0xbef71a87, 0xbef0006f, 0xbeee007e, 0xbeef007f, + 0xbefe00ff, 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, + 0xbeff006f, 0x867eff7e, 0x000003ff, 0x8e6f837e, 0xbefe006e, 0xc0051bbd, + 0x0000006f, 0xbf8cc07f, 0xc0071bb7, 0x000000c0, 0xbf8cc07f, 0xbef10080, + 0xc2831c37, 0x00000008, 0xbf8cc07f, 0x87707170, 0xbf85000e, 0xc0071c37, + 0x00000010, 0xbf8cc07f, 0x86f07070, 0xbf840009, 0xc0031bb7, 0x00000018, + 0xbf8cc07f, 0xc0431bb8, 0x00000000, 0xbf8cc07f, 0xbefc0080, 0xbf800000, + 0xbf900001, 0xbef00080, 0xbef10080, 0x8778ff78, 0x00002000, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, 0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, + 0xbe801f6c, }; static const unsigned int kCodeCopyAligned8[] = { @@ -506,19 +504,17 @@ static const unsigned int kCodeFill10[] = { }; static const unsigned int kCodeTrapHandler1010[] = { - 0x93eeff6d, 0x00080010, 0xbf850044, 0xb96ef803, 0x876fff6e, 0x00000900, - 0xbf850033, 0x876fff6e, 0x00007080, 0xbf840003, 0xbef71d88, 0xbf820001, + 0x93eeff6d, 0x00080010, 0xbf850038, 0xb96ef803, 0x876fff6e, 0x00000900, + 0xbf850027, 0x876fff6e, 0x00007080, 0xbf840003, 0xbef71d88, 0xbf820001, 0xbef71d87, 0xbeee037e, 0xbeef037f, 0xbefe03ff, 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f, 0x877eff7e, 0x00000fff, 0xbeef037e, 0xbefe036e, 0xbeef1d96, 0xbeee037c, 0xbefc036f, 0xbf800000, 0xbf900001, 0xbefc036e, 0x876dff6d, 0x0000ffff, 0xf4001bb6, 0xfa000000, - 0xbf8cc07f, 0x876eff6e, 0xffff0000, 0xbf06ff6e, 0xbf810000, 0xbf840051, - 0xbeee0380, 0xb9ee02c1, 0x8778ff78, 0xffffdfff, 0x906e8977, 0x876fff6e, - 0x003f8000, 0x906e8677, 0x876eff6e, 0x02000000, 0x886e6f6e, 0xb9eef807, - 0x87fe7e7e, 0x87ea6a6a, 0xb9f8f802, 0xbe80226c, 0xbef71d88, 0x876fff6e, + 0xbf8cc07f, 0x876eff6e, 0xffff0000, 0xbf06ff6e, 0xbf810000, 0xbf840045, + 0x80ec886c, 0x82ed806d, 0xbef71d89, 0xbf820041, 0xbef71d88, 0x876fff6e, 0x10000100, 0xbf06ff6f, 0x00000100, 0xbeef03ff, 0x20000000, 0xbf85000f, 0x876fff6e, 0x00000800, 0xbeef03f4, 0xbf85000b, 0xbf820034, 0xbf09836e, - 0xbf85ffc3, 0xbf06826e, 0xbeef03ff, 0x80000000, 0xbf850003, 0x806c846c, + 0xbf85ffcf, 0xbf06826e, 0xbeef03ff, 0x80000000, 0xbf850003, 0x806c846c, 0x826d806d, 0xbf82002d, 0xbef71d87, 0xbef0036f, 0xbeee037e, 0xbeef037f, 0xbefe03ff, 0x80000000, 0xbf90000a, 0xbf800007, 0xbf0c9f7e, 0xbf84fffd, 0xbeff036f, 0x877eff7e, 0x000003ff, 0x8f6f837e, 0xbefe036e, 0xf4051bbd, diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index d53a098505..d8592c7fc0 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -75,7 +75,8 @@ __attribute__((noinline)) static void _loader_debug_state() { // r_version history: // 1: Initial debug protocol // 2: New trap handler ABI. The reason for halting a wave is recorded in ttmp11[8:7]. -HSA_API r_debug _amdgpu_r_debug = {2, +// 3: New trap handler ABI. A wave halted at S_ENDPGM rewinds its PC by 8 bytes, and sets ttmp11[9]=1. +HSA_API r_debug _amdgpu_r_debug = {3, nullptr, reinterpret_cast(&_loader_debug_state), r_debug::RT_CONSISTENT,