Replace gfx9 SP3 trap handler with LLVM, fix IB_STS restore
Assembler toolchains are moving from SP3 to LLVM. Replace trap handler
source code with LLVM equivalent.
Fix a trap issue with SQ_WAVE_IB_STS restore. Mostly harmless as all
traps are currently considered fatal to the wavefront.
Change-Id: Iacecd9dd31a1d96a083c8b8327f442f33c861f9f
[ROCm/ROCR-Runtime commit: 6ed686ee29]
This commit is contained in:
committed by
Sean Keely
orang tua
970cca3731
melakukan
822d838eae
@@ -206,8 +206,7 @@ class GpuAgent : public GpuAgentInt {
|
||||
// @param [out] code_buf_size Size of code object buffer in bytes.
|
||||
enum class AssembleTarget { ISA, AQL };
|
||||
|
||||
void AssembleShader(const char* src_sp3, const char* func_name,
|
||||
AssembleTarget assemble_target, void*& code_buf,
|
||||
void AssembleShader(const char* func_name, AssembleTarget assemble_target, void*& code_buf,
|
||||
size_t& code_buf_size) const;
|
||||
|
||||
// @brief Frees code object created by AssembleShader.
|
||||
|
||||
@@ -110,100 +110,92 @@ static const unsigned int kCodeTrapHandler8[] = {
|
||||
|
||||
static const unsigned int kCodeTrapHandler9[] = {
|
||||
/*
|
||||
var SQ_WAVE_PC_HI_TRAP_ID_SHIFT = 16
|
||||
var SQ_WAVE_PC_HI_TRAP_ID_SIZE = 8
|
||||
var SQ_WAVE_PC_HI_TRAP_ID_BFE = (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
var SQ_WAVE_STATUS_HALT_MASK = 0x2000
|
||||
var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x8000
|
||||
var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15
|
||||
var IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26
|
||||
.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT , 16
|
||||
.set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 8
|
||||
.set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
.set SQ_WAVE_STATUS_HALT_MASK , 0x2000
|
||||
|
||||
// ABI between first and second level trap handler.
|
||||
var s_trap_info_lo = ttmp0
|
||||
var s_trap_info_hi = ttmp1
|
||||
var s_ib_sts_save = ttmp11 // [31:26] = SQ_WAVE_IB_STS[20:15]
|
||||
var s_status_save = ttmp12
|
||||
|
||||
// SPI debug data is not present/needed in these registers.
|
||||
var s_tmp0 = ttmp2
|
||||
var s_tmp1 = ttmp3
|
||||
var s_tmp2 = ttmp4
|
||||
var s_tmp3 = ttmp5
|
||||
|
||||
shader main
|
||||
type(CS)
|
||||
.if .amdgcn.gfx_generation_number == 9
|
||||
.set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 26
|
||||
.set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15
|
||||
.set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000
|
||||
.else
|
||||
.error "unsupported target"
|
||||
.endif
|
||||
|
||||
trap_entry:
|
||||
// If this is not a trap then return to the shader.
|
||||
s_bfe_u32 s_tmp0, s_trap_info_hi, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
s_cbranch_scc0 L_EXIT_TRAP
|
||||
s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
s_cbranch_scc0 .exit_trap
|
||||
|
||||
// If llvm.trap then signal queue error.
|
||||
s_cmp_eq_u32 s_tmp0, 0x2
|
||||
s_cbranch_scc1 L_SIGNAL_QUEUE
|
||||
s_cmp_eq_u32 ttmp2, 0x2
|
||||
s_cbranch_scc1 .signal_error
|
||||
|
||||
// For other traps advance PC and return to shader.
|
||||
s_add_u32 s_trap_info_lo, s_trap_info_lo, 0x4
|
||||
s_addc_u32 s_trap_info_hi, s_trap_info_hi, 0x0
|
||||
s_branch L_EXIT_TRAP
|
||||
s_add_u32 ttmp0, ttmp0, 0x4
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
s_branch .exit_trap
|
||||
|
||||
L_SIGNAL_QUEUE:
|
||||
.signal_error:
|
||||
// Retrieve queue_inactive_signal from amd_queue_t* passed in s[0:1].
|
||||
s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0 glc:1
|
||||
s_load_dwordx2 [ttmp2, ttmp3], s[0:1], 0xC0 glc
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Set queue signal value to unhandled exception error.
|
||||
s_mov_b32 s_tmp2, 0x80000000
|
||||
s_mov_b32 s_tmp3, 0x0
|
||||
s_atomic_swap_x2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8 glc:1
|
||||
s_mov_b32 ttmp4, 0x80000000
|
||||
s_mov_b32 ttmp5, 0x0
|
||||
s_atomic_swap_x2 [ttmp4, ttmp5], [ttmp2, ttmp3], 0x8 glc
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Skip event trigger if the signal value was already non-zero.
|
||||
s_or_b32 s_tmp2, s_tmp2, s_tmp3
|
||||
s_cbranch_scc1 L_SIGNAL_DONE
|
||||
s_or_b32 ttmp4, ttmp4, ttmp5
|
||||
s_cbranch_scc1 .signal_done
|
||||
|
||||
// Check for a non-NULL signal event mailbox.
|
||||
s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x10 glc:1
|
||||
s_load_dwordx2 [ttmp4, ttmp5], [ttmp2, ttmp3], 0x10 glc
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3]
|
||||
s_cbranch_scc0 L_SIGNAL_DONE
|
||||
s_and_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], [ttmp4, ttmp5]
|
||||
s_cbranch_scc0 .signal_done
|
||||
|
||||
// Load the signal event value.
|
||||
s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x18 glc:1
|
||||
s_load_dword ttmp2, [ttmp2, ttmp3], 0x18 glc
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Write the signal event value to the mailbox.
|
||||
s_store_dword s_tmp0, [s_tmp2, s_tmp3], 0x0 glc:1
|
||||
s_store_dword ttmp2, [ttmp4, ttmp5], 0x0 glc
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Send an interrupt to trigger event notification.
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
|
||||
L_SIGNAL_DONE:
|
||||
.signal_done:
|
||||
// Halt the wavefront.
|
||||
s_or_b32 s_status_save, s_status_save, SQ_WAVE_STATUS_HALT_MASK
|
||||
s_or_b32 ttmp12, ttmp12, SQ_WAVE_STATUS_HALT_MASK
|
||||
|
||||
L_EXIT_TRAP:
|
||||
.exit_trap:
|
||||
// Restore SQ_WAVE_IB_STS.
|
||||
s_lshr_b32 s_tmp0, s_ib_sts_save, (IB_STS_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 s_tmp0, s_tmp0, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), s_tmp0
|
||||
.if .amdgcn.gfx_generation_number == 9
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
.endif
|
||||
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), s_status_save
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), ttmp12
|
||||
|
||||
// Return to shader at unmodified PC.
|
||||
s_rfe_b64 [s_trap_info_lo, s_trap_info_hi]
|
||||
end
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
*/
|
||||
0x92eeff6d, 0x00080010, 0xbf84001e, 0xbf06826e, 0xbf850003, 0x806c846c,
|
||||
0x826d806d, 0xbf820019, 0xc0071b80, 0x000000c0, 0xbf8cc07f, 0xbef000ff,
|
||||
0x80000000, 0xbef10080, 0xc2831c37, 0x00000008, 0xbf8cc07f, 0x87707170,
|
||||
0xbf85000c, 0xc0071c37, 0x00000010, 0xbf8cc07f, 0x86f07070, 0xbf840007,
|
||||
0xc0031bb7, 0x00000018, 0xbf8cc07f, 0xc0431bb8, 0x00000000, 0xbf8cc07f,
|
||||
0xbf900001, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x00008000,
|
||||
0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c, 0x00000000,
|
||||
0xbf900001, 0x8778ff78, 0x00002000, 0x8f6e8b77, 0x866eff6e, 0x001f8000,
|
||||
0xb96ef807, 0x86fe7e7e, 0x86ea6a6a, 0xb978f802, 0xbe801f6c,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyAligned8[] = {
|
||||
|
||||
@@ -554,8 +554,7 @@ hsa_status_t BlitKernel::Initialize(const core::Agent& agent) {
|
||||
|
||||
for (auto kernel_name : kernel_names) {
|
||||
KernelCode& kernel = kernels_[kernel_name.first];
|
||||
gpuAgent.AssembleShader(kBlitKernelSource.c_str(), kernel_name.second,
|
||||
GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
|
||||
gpuAgent.AssembleShader(kernel_name.second, GpuAgent::AssembleTarget::AQL, kernel.code_buf_,
|
||||
kernel.code_buf_size_);
|
||||
}
|
||||
|
||||
|
||||
@@ -163,9 +163,8 @@ GpuAgent::~GpuAgent() {
|
||||
regions_.clear();
|
||||
}
|
||||
|
||||
void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
AssembleTarget assemble_target, void*& code_buf,
|
||||
size_t& code_buf_size) const {
|
||||
void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_target,
|
||||
void*& code_buf, size_t& code_buf_size) const {
|
||||
// Select precompiled shader implementation from name/target.
|
||||
struct ASICShader {
|
||||
const void* code;
|
||||
@@ -1169,67 +1168,6 @@ void GpuAgent::SyncClocks() {
|
||||
}
|
||||
|
||||
void GpuAgent::BindTrapHandler() {
|
||||
const char* src_sp3 = R"(
|
||||
var s_trap_info_lo = ttmp0
|
||||
var s_trap_info_hi = ttmp1
|
||||
var s_tmp0 = ttmp2
|
||||
var s_tmp1 = ttmp3
|
||||
var s_tmp2 = ttmp4
|
||||
var s_tmp3 = ttmp5
|
||||
|
||||
shader TrapHandler
|
||||
type(CS)
|
||||
|
||||
// Retrieve the queue inactive signal.
|
||||
s_load_dwordx2 [s_tmp0, s_tmp1], s[0:1], 0xC0
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Mask all but one lane of the wavefront.
|
||||
s_mov_b64 exec, 0x1
|
||||
|
||||
// Set queue signal value to unhandled exception error.
|
||||
s_add_u32 s_tmp0, s_tmp0, 0x8
|
||||
s_addc_u32 s_tmp1, s_tmp1, 0x0
|
||||
v_mov_b32 v0, s_tmp0
|
||||
v_mov_b32 v1, s_tmp1
|
||||
v_mov_b32 v2, 0x80000000
|
||||
v_mov_b32 v3, 0x0
|
||||
flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3]
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
// Skip event if the signal was already set to unhandled exception.
|
||||
v_cmp_eq_u64 vcc, v[0:1], v[2:3]
|
||||
s_cbranch_vccnz L_SIGNAL_DONE
|
||||
|
||||
// Check for a non-NULL signal event mailbox.
|
||||
s_load_dwordx2 [s_tmp2, s_tmp3], [s_tmp0, s_tmp1], 0x8
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b64 [s_tmp2, s_tmp3], [s_tmp2, s_tmp3], [s_tmp2, s_tmp3]
|
||||
s_cbranch_scc0 L_SIGNAL_DONE
|
||||
|
||||
// Load the signal event value.
|
||||
s_add_u32 s_tmp0, s_tmp0, 0x10
|
||||
s_addc_u32 s_tmp1, s_tmp1, 0x0
|
||||
s_load_dword s_tmp0, [s_tmp0, s_tmp1], 0x0
|
||||
s_waitcnt lgkmcnt(0)
|
||||
|
||||
// Write the signal event value to the mailbox.
|
||||
v_mov_b32 v0, s_tmp2
|
||||
v_mov_b32 v1, s_tmp3
|
||||
v_mov_b32 v2, s_tmp0
|
||||
flat_store_dword v[0:1], v2
|
||||
s_waitcnt vmcnt(0)
|
||||
|
||||
// Send an interrupt to trigger event notification.
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
|
||||
L_SIGNAL_DONE:
|
||||
// Halt wavefront and exit trap.
|
||||
s_sethalt 1
|
||||
s_rfe_b64 [s_trap_info_lo, s_trap_info_hi]
|
||||
end
|
||||
)";
|
||||
|
||||
if (isa_->GetMajorVersion() == 7) {
|
||||
// No trap handler support on Gfx7, soft error.
|
||||
return;
|
||||
@@ -1241,8 +1179,7 @@ void GpuAgent::BindTrapHandler() {
|
||||
}
|
||||
|
||||
// Assemble the trap handler source code.
|
||||
AssembleShader(src_sp3, "TrapHandler", AssembleTarget::ISA, trap_code_buf_,
|
||||
trap_code_buf_size_);
|
||||
AssembleShader("TrapHandler", AssembleTarget::ISA, trap_code_buf_, trap_code_buf_size_);
|
||||
|
||||
// Bind the trap handler to this node.
|
||||
HSAKMT_STATUS err = hsaKmtSetTrapHandler(node_id(), trap_code_buf_,
|
||||
|
||||
Reference in New Issue
Block a user