Replace gfx9 SP3 trap handler with LLVM, fix IB_STS restore

Assembler toolchains are moving from SP3 to LLVM. Replace trap handler
source code with LLVM equivalent.

Fix a trap issue with SQ_WAVE_IB_STS restore. Mostly harmless as all
traps are currently considered fatal to the wavefront.

Change-Id: Iacecd9dd31a1d96a083c8b8327f442f33c861f9f


[ROCm/ROCR-Runtime commit: 6ed686ee29]
This commit is contained in:
Jay Cornwall
2019-06-20 12:37:02 -05:00
committed by Sean Keely
orang tua 970cca3731
melakukan 822d838eae
4 mengubah file dengan 48 tambahan dan 121 penghapusan
@@ -206,8 +206,7 @@ class GpuAgent : public GpuAgentInt {
// @param [out] code_buf_size Size of code object buffer in bytes.
enum class AssembleTarget { ISA, AQL };
void AssembleShader(const char* src_sp3, const char* func_name,
AssembleTarget assemble_target, void*& code_buf,
void AssembleShader(const char* func_name, AssembleTarget assemble_target, void*& code_buf,
size_t& code_buf_size) const;
// @brief Frees code object created by AssembleShader.
@@ -110,100 +110,92 @@ static const unsigned int kCodeTrapHandler8[] = {
static const unsigned int kCodeTrapHandler9[] = {
/*
var SQ_WAVE_PC_HI_TRAP_ID_SHIFT = 16
var SQ_WAVE_PC_HI_TRAP_ID_SIZE = 8
var SQ_WAVE_PC_HI_TRAP_ID_BFE = (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x8000
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15
var IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26
.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT , 16
.set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 8
.set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
.set SQ_WAVE_STATUS_HALT_MASK , 0x2000
// ABI between first and second level trap handler.
var s_trap_info_lo = ttmp0
var s_trap_info_hi = ttmp1
var s_ib_sts_save = ttmp11 // [31:26] = SQ_WAVE_IB_STS[20:15]
var s_status_save = ttmp12
// SPI debug data is not present/needed in these registers.
var s_tmp0 = ttmp2
var s_tmp1 = ttmp3
var s_tmp2 = ttmp4
var s_tmp3 = ttmp5
shader main
type(CS)
.if .amdgcn.gfx_generation_number == 9
.set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 26
.set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15
.set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000
.else
.error "unsupported target"
.endif
trap_entry:
// If this is not a trap then return to the shader.
s_bfe_u32 s_tmp0, s_trap_info_hi, SQ_WAVE_PC_HI_TRAP_ID_BFE
s_cbranch_scc0 L_EXIT_TRAP
s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
s_cbranch_scc0 .exit_trap
// If llvm.trap then signal queue error.
s_cmp_eq_u32 s_tmp0, 0x2
s_cbranch_scc1 L_SIGNAL_QUEUE
s_cmp_eq_u32 ttmp2, 0x2
s_cbranch_scc1 .signal_error
// For other traps advance PC and return to shader.
s_add_u32 s_trap_info_lo, s_trap_info_lo, 0x4
s_addc_u32 s_trap_info_hi, s_trap_info_hi, 0x0
s_branch L_EXIT_TRAP
s_add_u32 ttmp0, ttmp0, 0x4
s_addc_u32 ttmp1, ttmp1, 0x0
s_branch .exit_trap
L_SIGNAL_QUEUE:
.signal_error:
// Retrieve queue_inactive_signal from amd_queue_t* passed in s[0:1].
s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0 glc:1
s_load_dwordx2 [ttmp2, ttmp3], s[0:1], 0xC0 glc
s_waitcnt lgkmcnt(0)
// Set queue signal value to unhandled exception error.
s_mov_b32 s_tmp2, 0x80000000
s_mov_b32 s_tmp3, 0x0
s_atomic_swap_x2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8 glc:1
s_mov_b32 ttmp4, 0x80000000
s_mov_b32 ttmp5, 0x0
s_atomic_swap_x2 [ttmp4, ttmp5], [ttmp2, ttmp3], 0x8 glc
s_waitcnt lgkmcnt(0)
// Skip event trigger if the signal value was already non-zero.
s_or_b32 s_tmp2, s_tmp2, s_tmp3
s_cbranch_scc1 L_SIGNAL_DONE
s_or_b32 ttmp4, ttmp4, ttmp5
s_cbranch_scc1 .signal_done
// Check for a non-NULL signal event mailbox.
s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x10 glc:1
s_load_dwordx2 [ttmp4, ttmp5], [ttmp2, ttmp3], 0x10 glc
s_waitcnt lgkmcnt(0)
s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3]
s_cbranch_scc0 L_SIGNAL_DONE
s_and_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], [ttmp4, ttmp5]
s_cbranch_scc0 .signal_done
// Load the signal event value.
s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x18 glc:1
s_load_dword ttmp2, [ttmp2, ttmp3], 0x18 glc
s_waitcnt lgkmcnt(0)
// Write the signal event value to the mailbox.
s_store_dword s_tmp0, [s_tmp2, s_tmp3], 0x0 glc:1
s_store_dword ttmp2, [ttmp4, ttmp5], 0x0 glc
s_waitcnt lgkmcnt(0)
// Send an interrupt to trigger event notification.
s_sendmsg sendmsg(MSG_INTERRUPT)
L_SIGNAL_DONE:
.signal_done:
// Halt the wavefront.
s_or_b32 s_status_save, s_status_save, SQ_WAVE_STATUS_HALT_MASK
s_or_b32 ttmp12, ttmp12, SQ_WAVE_STATUS_HALT_MASK
L_EXIT_TRAP:
.exit_trap:
// Restore SQ_WAVE_IB_STS.
s_lshr_b32 s_tmp0, s_ib_sts_save, (IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 s_tmp0, s_tmp0, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), s_tmp0
.if .amdgcn.gfx_generation_number == 9
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
.endif
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_status_save
s_setreg_b32 hwreg(HW_REG_STATUS), ttmp12
// Return to shader at unmodified PC.
s_rfe_b64 [s_trap_info_lo, s_trap_info_hi]
end
s_rfe_b64 [ttmp0, ttmp1]
*/
0x92eeff6d, 0x00080010, 0xbf84001e, 0xbf06826e, 0xbf850003, 0x806c846c,
0x826d806d, 0xbf820019, 0xc0071b80, 0x000000c0, 0xbf8cc07f, 0xbef000ff,
0x80000000, 0xbef10080, 0xc2831c37, 0x00000008, 0xbf8cc07f, 0x87707170,
0xbf85000c, 0xc0071c37, 0x00000010, 0xbf8cc07f, 0x86f07070, 0xbf840007,
0xc0031bb7, 0x00000018, 0xbf8cc07f, 0xc0431bb8, 0x00000000, 0xbf8cc07f,
0xbf900001, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x00008000,
0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, 0x00000000,
0xbf900001, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x001f8000,
0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
};
static const unsigned int kCodeCopyAligned8[] = {
@@ -554,8 +554,7 @@ hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
for (auto kernel_name : kernel_names) {
KernelCode& kernel = kernels_[kernel_name.first];
gpuAgent.AssembleShader(kBlitKernelSource.c_str(), kernel_name.second,
GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
gpuAgent.AssembleShader(kernel_name.second, GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
kernel.code_buf_size_);
}
@@ -163,9 +163,8 @@ GpuAgent::~GpuAgent() {
regions_.clear();
}
void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
AssembleTarget assemble_target, void*& code_buf,
size_t& code_buf_size) const {
void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_target,
void*& code_buf, size_t& code_buf_size) const {
// Select precompiled shader implementation from name/target.
struct ASICShader {
const void* code;
@@ -1169,67 +1168,6 @@ void GpuAgent::SyncClocks() {
}
void GpuAgent::BindTrapHandler() {
const char* src_sp3 = R"(
var s_trap_info_lo = ttmp0
var s_trap_info_hi = ttmp1
var s_tmp0 = ttmp2
var s_tmp1 = ttmp3
var s_tmp2 = ttmp4
var s_tmp3 = ttmp5
shader TrapHandler
type(CS)
// Retrieve the queue inactive signal.
s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0
s_waitcnt lgkmcnt(0)
// Mask all but one lane of the wavefront.
s_mov_b64 exec, 0x1
// Set queue signal value to unhandled exception error.
s_add_u32 s_tmp0, s_tmp0, 0x8
s_addc_u32 s_tmp1, s_tmp1, 0x0
v_mov_b32 v0, s_tmp0
v_mov_b32 v1, s_tmp1
v_mov_b32 v2, 0x80000000
v_mov_b32 v3, 0x0
flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3]
s_waitcnt vmcnt(0)
// Skip event if the signal was already set to unhandled exception.
v_cmp_eq_u64 vcc, v[0:1], v[2:3]
s_cbranch_vccnz L_SIGNAL_DONE
// Check for a non-NULL signal event mailbox.
s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8
s_waitcnt lgkmcnt(0)
s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3]
s_cbranch_scc0 L_SIGNAL_DONE
// Load the signal event value.
s_add_u32 s_tmp0, s_tmp0, 0x10
s_addc_u32 s_tmp1, s_tmp1, 0x0
s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x0
s_waitcnt lgkmcnt(0)
// Write the signal event value to the mailbox.
v_mov_b32 v0, s_tmp2
v_mov_b32 v1, s_tmp3
v_mov_b32 v2, s_tmp0
flat_store_dword v[0:1], v2
s_waitcnt vmcnt(0)
// Send an interrupt to trigger event notification.
s_sendmsg sendmsg(MSG_INTERRUPT)
L_SIGNAL_DONE:
// Halt wavefront and exit trap.
s_sethalt 1
s_rfe_b64 [s_trap_info_lo, s_trap_info_hi]
end
)";
if (isa_->GetMajorVersion() == 7) {
// No trap handler support on Gfx7, soft error.
return;
@@ -1241,8 +1179,7 @@ void GpuAgent::BindTrapHandler() {
}
// Assemble the trap handler source code.
AssembleShader(src_sp3, "TrapHandler", AssembleTarget::ISA, trap_code_buf_,
trap_code_buf_size_);
AssembleShader("TrapHandler", AssembleTarget::ISA, trap_code_buf_, trap_code_buf_size_);
// Bind the trap handler to this node.
HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_,