rocr: Replace tabs with spaces in trap handler source codes
Use spaces consistently to format the trap handler code. This patch
does not introduce any change in the trap handler. Using `git show -w`
on this patch shows an empty diff.
Change-Id: Ic0244dd203347146ffde65460cd87ecbcc43732a
[ROCm/ROCR-Runtime commit: e0359e5d35]
Esse commit está contido em:
+250
-250
@@ -101,7 +101,7 @@
|
||||
.set TTMP11_TTMPS_SETUP_SHIFT , 31
|
||||
|
||||
.if (.amdgcn.gfx_generation_minor >= 4)
|
||||
.set TTMP11_WAVE_IN_WG_MASK , 0x3F
|
||||
.set TTMP11_WAVE_IN_WG_MASK , 0x3F
|
||||
|
||||
// Bit to indicate that this is a stochastic trap
|
||||
.set TTMP13_PCS_IS_STOCHASTIC , 21
|
||||
@@ -255,8 +255,8 @@ trap_entry:
|
||||
s_cbranch_scc0 .not_s_trap // If trap_id == 0, it's not an s_trap nor host trap
|
||||
|
||||
// Check if the it was an host trap.
|
||||
s_bitcmp1_b32 ttmp1, SQ_WAVE_PC_HI_HT_SHIFT
|
||||
s_cbranch_scc0 .not_host_trap
|
||||
s_bitcmp1_b32 ttmp1, SQ_WAVE_PC_HI_HT_SHIFT
|
||||
s_cbranch_scc0 .not_host_trap
|
||||
|
||||
.if (.amdgcn.gfx_generation_number == 9) // PC_SAMPLING_GFX9
|
||||
// ttmp[14:15] is TMA2; Available: ttmp[2:3], ttmp[4:5], ttmp7, TTMP_REG1
|
||||
@@ -283,17 +283,17 @@ trap_entry:
|
||||
// - Set bit 21 in TTMP13 to indicate a stochastic trap.
|
||||
// - Branch to the profile trap handler logic.
|
||||
|
||||
s_load_dwordx2 ttmp[2:3], ttmp[14:15], 0 glc // ttmp[14:15]=*host_trap_buffers
|
||||
s_load_dwordx2 ttmp[2:3], ttmp[14:15], 0 glc // ttmp[14:15]=*host_trap_buffers
|
||||
.if .amdgcn.gfx_generation_minor >= 4
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT, 1), 0
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT, 1), 0
|
||||
s_bitset0_b32 ttmp13, TTMP13_PCS_IS_STOCHASTIC
|
||||
s_bitset1_b32 ttmp13, TTMP13_PCS_IS_HOSTTRAP // set bit 22 in TTMP13
|
||||
.else
|
||||
s_bitset1_b32 ttmp11, TTMP11_PCS_IS_HOSTTRAP // Set bit 22 in TTMP11
|
||||
.endif
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_mov_b64 ttmp[14:15], ttmp[2:3] //now ttmp[14:15] = host_trap_buffers
|
||||
s_branch .profile_trap_handlers_gfx9 // Off to the profile handlers
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_mov_b64 ttmp[14:15], ttmp[2:3] //now ttmp[14:15] = host_trap_buffers
|
||||
s_branch .profile_trap_handlers_gfx9 // Off to the profile handlers
|
||||
.else
|
||||
// Ignore host traps. They should be masked by the driver anyway.
|
||||
s_branch .not_s_trap
|
||||
@@ -301,54 +301,54 @@ trap_entry:
|
||||
|
||||
.not_host_trap:
|
||||
// It's an s_trap; advance the PC
|
||||
s_add_u32 ttmp0, ttmp0, 0x4
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
s_add_u32 ttmp0, ttmp0, 0x4
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
// If llvm.debugtrap and debugger is not attached.
|
||||
s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor < 4) || .amdgcn.gfx_generation_number >= 10
|
||||
s_bitcmp0_b32 ttmp11, TTMP_DEBUG_ENABLED_SHIFT
|
||||
s_bitcmp0_b32 ttmp11, TTMP_DEBUG_ENABLED_SHIFT
|
||||
.else
|
||||
s_bitcmp0_b32 ttmp13, TTMP_DEBUG_ENABLED_SHIFT
|
||||
s_bitcmp0_b32 ttmp13, TTMP_DEBUG_ENABLED_SHIFT
|
||||
.endif
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
|
||||
// Ignore llvm.debugtrap.
|
||||
s_branch .exit_trap
|
||||
s_branch .exit_trap
|
||||
|
||||
.not_s_trap:
|
||||
.if .amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4
|
||||
//Check for stochastic trap on gfx9.4+
|
||||
s_getreg_b32 ttmp7, hwreg(HW_REG_TRAPSTS) // On gfx94x, TRAPSTS bit 26 ...
|
||||
s_bitcmp1_b32 ttmp7, SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT // is stochastic_sample_trap
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
s_getreg_b32 ttmp7, hwreg(HW_REG_TRAPSTS) // On gfx94x, TRAPSTS bit 26 ...
|
||||
s_bitcmp1_b32 ttmp7, SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT // is stochastic_sample_trap
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
|
||||
// Handle stochastic trap
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT, 1), 0
|
||||
s_load_dwordx2 ttmp[2:3], ttmp[14:15], 0x8 glc // ttmp[14:15]=*stoch_trap_buf
|
||||
s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT, 1), 0
|
||||
s_load_dwordx2 ttmp[2:3], ttmp[14:15], 0x8 glc // ttmp[14:15]=*stoch_trap_buf
|
||||
s_bitset0_b32 ttmp13, TTMP13_PCS_IS_HOSTTRAP
|
||||
s_bitset1_b32 ttmp13, TTMP13_PCS_IS_STOCHASTIC // set bit 25 in TTMP13
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_mov_b64 ttmp[14:15], ttmp[2:3]
|
||||
s_branch .profile_trap_handlers_gfx9 // Off to the profile handlers
|
||||
s_bitset1_b32 ttmp13, TTMP13_PCS_IS_STOCHASTIC // set bit 25 in TTMP13
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_mov_b64 ttmp[14:15], ttmp[2:3]
|
||||
s_branch .profile_trap_handlers_gfx9 // Off to the profile handlers
|
||||
.else
|
||||
s_branch .no_skip_debugtrap
|
||||
.endif // PC_SAMPLING_GFX9
|
||||
|
||||
.if (.amdgcn.gfx_generation_number == 9) // PC_SAMPLING_GFX9
|
||||
// tma->host_trap_buffers Offsets:
|
||||
// [0x00] uint64_t buf_write_val;
|
||||
// [0x08] uint32_t buf_size;
|
||||
// [0x0c] uint32_t reserved0;
|
||||
// [0x10] uint32_t buf_written_val0;
|
||||
// [0x14] uint32_t buf_watermark0;
|
||||
// [0x00] uint64_t buf_write_val;
|
||||
// [0x08] uint32_t buf_size;
|
||||
// [0x0c] uint32_t reserved0;
|
||||
// [0x10] uint32_t buf_written_val0;
|
||||
// [0x14] uint32_t buf_watermark0;
|
||||
// [0x18] hsa_signal_t done_sig0;
|
||||
// [0x20] uint32_t buf_written_val1;
|
||||
// [0x24] uint32_t buf_watermark1;
|
||||
// [0x28] hsa_signal_t done_sig1;
|
||||
// [0x30] uint8_t reserved1[16];
|
||||
// [0x40] sample_t buffer0[buf_size];
|
||||
// [0x20] uint32_t buf_written_val1;
|
||||
// [0x24] uint32_t buf_watermark1;
|
||||
// [0x28] hsa_signal_t done_sig1;
|
||||
// [0x30] uint8_t reserved1[16];
|
||||
// [0x40] sample_t buffer0[buf_size];
|
||||
// [0x40+(buf_size*sizeof(sample_t))]sample_t buffer1[buf_size];
|
||||
//
|
||||
//__global__ void profiling_trap_handler(out_buf_t* tma) {
|
||||
@@ -378,49 +378,49 @@ trap_entry:
|
||||
|
||||
// ttmp[14:15] is tma->host_trap_buffers; Available: ttmp[2:3], ttmp[4:5], ttmp7, ttmp13
|
||||
.profile_trap_handlers_gfx9:
|
||||
s_mov_b64 ttmp[2:3], 1 // atomic increment buf_write_val
|
||||
s_atomic_add_x2 ttmp[2:3], ttmp[14:15], glc // ttmp[2:3] = packed local_entry
|
||||
S_LOAD_DWORD_PCS_TTMP_REG1 ttmp[14:15], 0x8 // TTMP_REG1 = tma->buf_size
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_lshr_b32 ttmp7, ttmp3, 31 // ttmp7 = buf_to_use
|
||||
S_BITSET0_B32_PCS_TTMP_REG2 31 // clear out TTMP_REG2 bit31
|
||||
s_cmp_eq_u32 ttmp7, 0 // store off buf_to_use ...
|
||||
s_cbranch_scc1 .skip_ttmp_set_gfx9 // into bit31 of TTMP_REG2
|
||||
S_BITSET1_B32_PCS_TTMP_REG2 31
|
||||
s_mov_b64 ttmp[2:3], 1 // atomic increment buf_write_val
|
||||
s_atomic_add_x2 ttmp[2:3], ttmp[14:15], glc // ttmp[2:3] = packed local_entry
|
||||
S_LOAD_DWORD_PCS_TTMP_REG1 ttmp[14:15], 0x8 // TTMP_REG1 = tma->buf_size
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_lshr_b32 ttmp7, ttmp3, 31 // ttmp7 = buf_to_use
|
||||
S_BITSET0_B32_PCS_TTMP_REG2 31 // clear out TTMP_REG2 bit31
|
||||
s_cmp_eq_u32 ttmp7, 0 // store off buf_to_use ...
|
||||
s_cbranch_scc1 .skip_ttmp_set_gfx9 // into bit31 of TTMP_REG2
|
||||
S_BITSET1_B32_PCS_TTMP_REG2 31
|
||||
.skip_ttmp_set_gfx9:
|
||||
s_bfe_u64 ttmp[2:3], ttmp[2:3], (63<<16) // ttmp[2:3] = new local_entry
|
||||
s_cmp_lg_u32 ttmp3, 0 // if entry >= 2^32, always lost
|
||||
s_cbranch_scc1 .pc_sampling_exit
|
||||
S_CMP_GE_U32_PCS_TTMP_REG1 ttmp2 // if local_entry >= buf_size
|
||||
s_cbranch_scc1 .pc_sampling_exit
|
||||
s_bfe_u64 ttmp[2:3], ttmp[2:3], (63<<16) // ttmp[2:3] = new local_entry
|
||||
s_cmp_lg_u32 ttmp3, 0 // if entry >= 2^32, always lost
|
||||
s_cbranch_scc1 .pc_sampling_exit
|
||||
S_CMP_GE_U32_PCS_TTMP_REG1 ttmp2 // if local_entry >= buf_size
|
||||
s_cbranch_scc1 .pc_sampling_exit
|
||||
|
||||
// ttmp2=local_entry, ttmp7=buf_to_use (also in bit31 of TTMP_REG2), TTMP_REG1=buf_size
|
||||
// ttmp[14:15] is tma->host_trap_buffers. Available: ttmp3, ttmp[4:5]
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4)
|
||||
s_mul_i32 ttmp6, ttmp6, ttmp7 // ttmp[4:5]=buf_size if ...
|
||||
s_mul_i32 ttmp4, ttmp6, 0x40 // buf_to_use=1, 0 otherwise
|
||||
s_mul_hi_u32 ttmp5, ttmp6, 0x40
|
||||
s_mul_i32 ttmp6, ttmp6, ttmp7 // ttmp[4:5]=buf_size if ...
|
||||
s_mul_i32 ttmp4, ttmp6, 0x40 // buf_to_use=1, 0 otherwise
|
||||
s_mul_hi_u32 ttmp5, ttmp6, 0x40
|
||||
.else
|
||||
s_mul_i32 ttmp13, ttmp13, ttmp7 // ttmp[4:5]=buf_size if ...
|
||||
s_mul_i32 ttmp4, ttmp13, 0x40 // buf_to_use=1, 0 otherwise
|
||||
s_mul_hi_u32 ttmp5, ttmp13, 0x40
|
||||
s_mul_i32 ttmp13, ttmp13, ttmp7 // ttmp[4:5]=buf_size if ...
|
||||
s_mul_i32 ttmp4, ttmp13, 0x40 // buf_to_use=1, 0 otherwise
|
||||
s_mul_hi_u32 ttmp5, ttmp13, 0x40
|
||||
.endif
|
||||
|
||||
s_add_u32 ttmp4, ttmp4, 0x40 // now ttmp[4:5]=offset from ...
|
||||
s_addc_u32 ttmp5, ttmp5, 0 // tma to start of target buffer;
|
||||
s_add_u32 ttmp4, ttmp14, ttmp4 // ttmp[4:5] now points to ...
|
||||
s_addc_u32 ttmp5, ttmp15, ttmp5 // buffer0 or buffer1
|
||||
s_mov_b32 ttmp7, ttmp2
|
||||
s_add_u32 ttmp4, ttmp4, 0x40 // now ttmp[4:5]=offset from ...
|
||||
s_addc_u32 ttmp5, ttmp5, 0 // tma to start of target buffer;
|
||||
s_add_u32 ttmp4, ttmp14, ttmp4 // ttmp[4:5] now points to ...
|
||||
s_addc_u32 ttmp5, ttmp15, ttmp5 // buffer0 or buffer1
|
||||
s_mov_b32 ttmp7, ttmp2
|
||||
|
||||
.if .amdgcn.gfx_generation_number == 9
|
||||
|
||||
.if .amdgcn.gfx_generation_minor >= 4
|
||||
// Check if it's a stochastic trap
|
||||
s_bitcmp1_b32 ttmp13, TTMP13_PCS_IS_STOCHASTIC
|
||||
s_cbranch_scc1 .fill_sample_stochastic
|
||||
s_bitcmp1_b32 ttmp13, TTMP13_PCS_IS_STOCHASTIC
|
||||
s_cbranch_scc1 .fill_sample_stochastic
|
||||
// Check if it's a host trap
|
||||
s_bitcmp1_b32 ttmp13, TTMP13_PCS_IS_HOSTTRAP
|
||||
s_cbranch_scc1 .fill_sample_hosttrap
|
||||
s_bitcmp1_b32 ttmp13, TTMP13_PCS_IS_HOSTTRAP
|
||||
s_cbranch_scc1 .fill_sample_hosttrap
|
||||
.else
|
||||
// Check if it's a host trap
|
||||
s_bitcmp1_b32 ttmp11, TTMP11_PCS_IS_HOSTTRAP
|
||||
@@ -430,7 +430,7 @@ trap_entry:
|
||||
.endif
|
||||
// If neither bit is set, this is unexpected.
|
||||
// This branch is not expected to be taken.
|
||||
s_branch .no_skip_debugtrap
|
||||
s_branch .no_skip_debugtrap
|
||||
|
||||
// ttmp7 contains local_entry, ttmp[4:5] contains "&bufferX",
|
||||
// ttmp[14:15] holds 'tma->host_trap_buffers' pointer
|
||||
@@ -466,71 +466,71 @@ trap_entry:
|
||||
// buf->correlation_id = get_correlation_id();
|
||||
// }
|
||||
.fill_sample_hosttrap:
|
||||
s_mul_i32 ttmp2, ttmp7, 0x40 // offset into buffer for 64B objects
|
||||
s_mul_hi_u32 ttmp3, ttmp7, 0x40 // ttmp[2:3] will contain byte ...
|
||||
s_add_u32 ttmp2, ttmp2, ttmp4
|
||||
s_addc_u32 ttmp3, ttmp3, ttmp5 // ttmp[2:3]=&bufferX[local_entry]
|
||||
s_memrealtime ttmp[4:5]
|
||||
s_and_b32 ttmp1, ttmp1, 0xffff // clear out extra data from PC_HI
|
||||
s_store_dwordx2 ttmp[0:1], ttmp[2:3] // store PC
|
||||
s_waitcnt lgkmcnt(0) // wait for timestamp
|
||||
S_MOV_B32_SRC_PCS_TTMP_REG1 exec_lo
|
||||
S_STORE_DWORD_PCS_TTMP_REG1 ttmp[2:3], 0x8 // store EXEC_LO
|
||||
S_MOV_B32_SRC_PCS_TTMP_REG1 exec_hi
|
||||
S_STORE_DWORD_PCS_TTMP_REG1 ttmp[2:3], 0xc // store EXEC_HI
|
||||
s_store_dwordx2 ttmp[8:9], ttmp[2:3], 0x10 // store wg_id_x and wg_id_y
|
||||
s_store_dword ttmp10, ttmp[2:3], 0x18 // store wg_id_z
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3], 0x30 // store timestamp
|
||||
s_mul_i32 ttmp2, ttmp7, 0x40 // offset into buffer for 64B objects
|
||||
s_mul_hi_u32 ttmp3, ttmp7, 0x40 // ttmp[2:3] will contain byte ...
|
||||
s_add_u32 ttmp2, ttmp2, ttmp4
|
||||
s_addc_u32 ttmp3, ttmp3, ttmp5 // ttmp[2:3]=&bufferX[local_entry]
|
||||
s_memrealtime ttmp[4:5]
|
||||
s_and_b32 ttmp1, ttmp1, 0xffff // clear out extra data from PC_HI
|
||||
s_store_dwordx2 ttmp[0:1], ttmp[2:3] // store PC
|
||||
s_waitcnt lgkmcnt(0) // wait for timestamp
|
||||
S_MOV_B32_SRC_PCS_TTMP_REG1 exec_lo
|
||||
S_STORE_DWORD_PCS_TTMP_REG1 ttmp[2:3], 0x8 // store EXEC_LO
|
||||
S_MOV_B32_SRC_PCS_TTMP_REG1 exec_hi
|
||||
S_STORE_DWORD_PCS_TTMP_REG1 ttmp[2:3], 0xc // store EXEC_HI
|
||||
s_store_dwordx2 ttmp[8:9], ttmp[2:3], 0x10 // store wg_id_x and wg_id_y
|
||||
s_store_dword ttmp10, ttmp[2:3], 0x18 // store wg_id_z
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3], 0x30 // store timestamp
|
||||
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_XCC_ID) //store XCC_ID
|
||||
s_lshl_b32 ttmp4, ttmp4, 8
|
||||
s_and_b32 ttmp5, ttmp11, TTMP11_WAVE_IN_WG_MASK
|
||||
s_or_b32 ttmp4, ttmp4, ttmp5
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x1c // store wave_in_wg
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_XCC_ID) //store XCC_ID
|
||||
s_lshl_b32 ttmp4, ttmp4, 8
|
||||
s_and_b32 ttmp5, ttmp11, TTMP11_WAVE_IN_WG_MASK
|
||||
s_or_b32 ttmp4, ttmp4, ttmp5
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x1c // store wave_in_wg
|
||||
.else
|
||||
s_and_b32 ttmp4, ttmp11, 0x3f
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x1c // store wave_in_wg
|
||||
s_and_b32 ttmp4, ttmp11, 0x3f
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x1c // store wave_in_wg
|
||||
.endif
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_HW_ID)
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x20 // store HW_ID
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_HW_ID)
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x20 // store HW_ID
|
||||
|
||||
s_branch .get_correlation_id
|
||||
|
||||
.if .amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4
|
||||
.fill_sample_stochastic:
|
||||
s_mul_i32 ttmp2, ttmp7, 0x40 // offset into buffer for 64B objects
|
||||
s_mul_i32 ttmp2, ttmp7, 0x40 // offset into buffer for 64B objects
|
||||
s_mul_hi_u32 ttmp3, ttmp7, 0x40
|
||||
s_add_u32 ttmp2, ttmp2, ttmp4
|
||||
s_addc_u32 ttmp3, ttmp3, ttmp5 // ttmp[2:3]=&buffer[local_entry]
|
||||
s_memrealtime ttmp[4:5]
|
||||
s_waitcnt lgkmcnt(0) // Wait for timestamp
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3] 0x30 // Store timestamp
|
||||
s_add_u32 ttmp2, ttmp2, ttmp4
|
||||
s_addc_u32 ttmp3, ttmp3, ttmp5 // ttmp[2:3]=&buffer[local_entry]
|
||||
s_memrealtime ttmp[4:5]
|
||||
s_waitcnt lgkmcnt(0) // Wait for timestamp
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3] 0x30 // Store timestamp
|
||||
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO)
|
||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI)
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3] 0x00 // store snapshot data
|
||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1)
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA)
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3], 0x24 // store snapshot PC
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_LO)
|
||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_PERF_SNAPSHOT_PC_HI)
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3] 0x00 // store snapshot data
|
||||
s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA1)
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_PERF_SNAPSHOT_DATA)
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3], 0x24 // store snapshot PC
|
||||
|
||||
s_mov_b32 ttmp6, exec_lo
|
||||
s_store_dword ttmp6, ttmp[2:3], 0x8 // store EXEC_LO
|
||||
s_mov_b32 ttmp6, exec_hi
|
||||
s_store_dword ttmp6, ttmp[2:3], 0xc // store EXEC_HI
|
||||
s_mov_b32 ttmp6, exec_lo
|
||||
s_store_dword ttmp6, ttmp[2:3], 0x8 // store EXEC_LO
|
||||
s_mov_b32 ttmp6, exec_hi
|
||||
s_store_dword ttmp6, ttmp[2:3], 0xc // store EXEC_HI
|
||||
|
||||
s_store_dwordx2 ttmp[8:9], ttmp[2:3], 0x10 // store wg_id_x and wg_id_y
|
||||
s_store_dword ttmp10, ttmp[2:3], 0x18 // store wg_id_z
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_XCC_ID)
|
||||
s_lshl_b32 ttmp4, ttmp4, 8
|
||||
s_and_b32 ttmp5, ttmp11, TTMP11_WAVE_IN_WG_MASK
|
||||
s_or_b32 ttmp4, ttmp4, ttmp5
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x1c // store chiplet_and_wave_id
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_HW_ID)
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x20 // store HW_ID
|
||||
s_store_dwordx2 ttmp[8:9], ttmp[2:3], 0x10 // store wg_id_x and wg_id_y
|
||||
s_store_dword ttmp10, ttmp[2:3], 0x18 // store wg_id_z
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_XCC_ID)
|
||||
s_lshl_b32 ttmp4, ttmp4, 8
|
||||
s_and_b32 ttmp5, ttmp11, TTMP11_WAVE_IN_WG_MASK
|
||||
s_or_b32 ttmp4, ttmp4, ttmp5
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x1c // store chiplet_and_wave_id
|
||||
s_getreg_b32 ttmp4, hwreg(HW_REG_HW_ID)
|
||||
s_store_dword ttmp4, ttmp[2:3], 0x20 // store HW_ID
|
||||
// ttmp[2:3]=&buffer[local_entry]; ttmp[4:5], ttmp[6:7] are free
|
||||
// ttmp[14:15]=ptr to ‘tma’ and is live out; ttmp11.b31 is buf_to_use, 0 or 1
|
||||
s_branch .get_correlation_id
|
||||
s_branch .get_correlation_id
|
||||
|
||||
.endif
|
||||
|
||||
@@ -559,42 +559,42 @@ trap_entry:
|
||||
// ttmp[14:15] = tma->host_trap_buffers and is live out
|
||||
// ttmp6.b31 is buf_to_use, 0 or 1 and is live out
|
||||
|
||||
s_mov_b64 ttmp[4:5], exec // back up EXEC mask
|
||||
s_mov_b32 exec_lo, 0x80000000 // prepare EXEC for doorbell spin
|
||||
s_sendmsg sendmsg(MSG_GET_DOORBELL) // message 10, puts doorbell in EXEC
|
||||
s_mov_b64 ttmp[4:5], exec // back up EXEC mask
|
||||
s_mov_b32 exec_lo, 0x80000000 // prepare EXEC for doorbell spin
|
||||
s_sendmsg sendmsg(MSG_GET_DOORBELL) // message 10, puts doorbell in EXEC
|
||||
.wait_for_doorbell:
|
||||
s_nop 0x7 // wait a bit for message to return
|
||||
s_bitcmp0_b32 exec_lo, 0x1f // returned message will 0 bit 31
|
||||
s_cbranch_scc0 .wait_for_doorbell // wait some more if no data yet
|
||||
s_mov_b32 exec_hi, ttmp5 // do not care about message[63:32]
|
||||
s_and_b32 ttmp5, exec_lo, DOORBELL_ID_MASK // doorbell now in ttmp5
|
||||
s_mov_b32 exec_lo, ttmp4 // exec mask restored
|
||||
s_nop 0x7 // wait a bit for message to return
|
||||
s_bitcmp0_b32 exec_lo, 0x1f // returned message will 0 bit 31
|
||||
s_cbranch_scc0 .wait_for_doorbell // wait some more if no data yet
|
||||
s_mov_b32 exec_hi, ttmp5 // do not care about message[63:32]
|
||||
s_and_b32 ttmp5, exec_lo, DOORBELL_ID_MASK // doorbell now in ttmp5
|
||||
s_mov_b32 exec_lo, ttmp4 // exec mask restored
|
||||
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
|
||||
s_bfe_u32 ttmp4, ttmp11, (6 | 25 << 16) // extract dispatch ID from ttmp11
|
||||
s_bfe_u32 ttmp4, ttmp11, (6 | 25 << 16) // extract dispatch ID from ttmp11
|
||||
.else
|
||||
s_and_b32 ttmp4, ttmp6, 0x1ffffff // extract low 25 bits from ttmp6 (DispatchPktIndx[24:0])
|
||||
s_and_b32 ttmp4, ttmp6, 0x1ffffff // extract low 25 bits from ttmp6 (DispatchPktIndx[24:0])
|
||||
.endif
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3], 0x38 // ttmp[4:5] is correlation ID. Store correlation_id to sample
|
||||
s_store_dwordx2 ttmp[4:5], ttmp[2:3], 0x38 // ttmp[4:5] is correlation ID. Store correlation_id to sample
|
||||
// get_correlation_id() -- end //
|
||||
|
||||
// complete stores before returning
|
||||
s_dcache_wb
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_waitcnt lgkmcnt(0)
|
||||
// fill_sample(...) - end //
|
||||
|
||||
// ttmp[2:3], ttmp[4:5], ttmp7, and ttmp13 are free
|
||||
// ttmp[14:15] = tma->host_trap_buffers; ttmp6.b31 is buf_to_use, 0 or 1
|
||||
S_LSHR_B32_PCS_TTMP_REG1_REG2 31 // TTMP_REG1 is buf_to_use
|
||||
S_MULK_I32_PCS_TTMP_REG1 0x10 // written_val0 to written_val_X
|
||||
S_ADD_U32_PCS_TTMP_REG1 ttmp14, ttmp14 // now ttmp[14:15] points to ...
|
||||
s_addc_u32 ttmp15, ttmp15, 0x0 // buf_written_valX-0x10
|
||||
s_mov_b32 ttmp7, 1 // atomic increment buf_written_valX
|
||||
s_atomic_add ttmp7, ttmp[14:15], 0x10 glc // ttmp7 will contain 'done'
|
||||
S_LOAD_DWORD_PCS_TTMP_REG1 ttmp[14:15], 0x14 // TTMP_REG1 will hold watermark
|
||||
s_waitcnt lgkmcnt(0)
|
||||
S_CMP_LG_U32_PCS_TTMP_REG1 ttmp7 // if 'done' not at watermark, exit
|
||||
s_cbranch_scc1 .pc_sampling_exit
|
||||
S_LSHR_B32_PCS_TTMP_REG1_REG2 31 // TTMP_REG1 is buf_to_use
|
||||
S_MULK_I32_PCS_TTMP_REG1 0x10 // written_val0 to written_val_X
|
||||
S_ADD_U32_PCS_TTMP_REG1 ttmp14, ttmp14 // now ttmp[14:15] points to ...
|
||||
s_addc_u32 ttmp15, ttmp15, 0x0 // buf_written_valX-0x10
|
||||
s_mov_b32 ttmp7, 1 // atomic increment buf_written_valX
|
||||
s_atomic_add ttmp7, ttmp[14:15], 0x10 glc // ttmp7 will contain 'done'
|
||||
S_LOAD_DWORD_PCS_TTMP_REG1 ttmp[14:15], 0x14 // TTMP_REG1 will hold watermark
|
||||
s_waitcnt lgkmcnt(0)
|
||||
S_CMP_LG_U32_PCS_TTMP_REG1 ttmp7 // if 'done' not at watermark, exit
|
||||
s_cbranch_scc1 .pc_sampling_exit
|
||||
|
||||
// ttmp[2:3], [4:5], ttmp7, and ttmp13 are free
|
||||
// ttmp[14:15] = buf_written_valX-0x10
|
||||
@@ -614,129 +614,129 @@ trap_entry:
|
||||
// We jump to the trap handler exit after this, so no live-out registers except
|
||||
// those that must survive the trap handler
|
||||
|
||||
s_load_dwordx2 ttmp[2:3], ttmp[14:15], 0x18 // load done_sig into ttmp[2:3]
|
||||
s_waitcnt lgkmcnt(0) // it's actually an amd_signal_t*
|
||||
s_load_dwordx2 ttmp[4:5], ttmp[2:3], 0x10 // load event mailbox ptr into 4:5
|
||||
s_load_dword ttmp7, ttmp[2:3], 0x18 // load event_id into ttmp7
|
||||
s_mov_b64 ttmp[14:15], 0
|
||||
s_store_dwordx2 ttmp[14:15], ttmp[2:3], 0x8 glc // zero out signal value
|
||||
s_waitcnt lgkmcnt(0) // wait for value store to complete
|
||||
s_cmp_eq_u64 ttmp[4:5], 0
|
||||
s_cbranch_scc1 .pc_sampling_exit // null mailbox means no interrupt
|
||||
s_cmp_eq_u32 ttmp7, 0
|
||||
s_cbranch_scc1 .pc_sampling_exit // event_id zero means no interrupt
|
||||
s_store_dword ttmp7, ttmp[4:5] glc // send event ID to the mailbox
|
||||
s_waitcnt lgkmcnt(0)
|
||||
S_MOV_B32_SRC_PCS_TTMP_REG1 m0 // save off m0
|
||||
s_mov_b32 m0, ttmp7 // put ID into message payload
|
||||
s_nop 0x0 // Manually inserted wait states
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT) // send interrupt message
|
||||
s_waitcnt lgkmcnt(0) // wait for message to be sent
|
||||
S_MOV_B32_DST_PCS_TTMP_REG1 m0 // restore m0
|
||||
s_load_dwordx2 ttmp[2:3], ttmp[14:15], 0x18 // load done_sig into ttmp[2:3]
|
||||
s_waitcnt lgkmcnt(0) // it's actually an amd_signal_t*
|
||||
s_load_dwordx2 ttmp[4:5], ttmp[2:3], 0x10 // load event mailbox ptr into 4:5
|
||||
s_load_dword ttmp7, ttmp[2:3], 0x18 // load event_id into ttmp7
|
||||
s_mov_b64 ttmp[14:15], 0
|
||||
s_store_dwordx2 ttmp[14:15], ttmp[2:3], 0x8 glc // zero out signal value
|
||||
s_waitcnt lgkmcnt(0) // wait for value store to complete
|
||||
s_cmp_eq_u64 ttmp[4:5], 0
|
||||
s_cbranch_scc1 .pc_sampling_exit // null mailbox means no interrupt
|
||||
s_cmp_eq_u32 ttmp7, 0
|
||||
s_cbranch_scc1 .pc_sampling_exit // event_id zero means no interrupt
|
||||
s_store_dword ttmp7, ttmp[4:5] glc // send event ID to the mailbox
|
||||
s_waitcnt lgkmcnt(0)
|
||||
S_MOV_B32_SRC_PCS_TTMP_REG1 m0 // save off m0
|
||||
s_mov_b32 m0, ttmp7 // put ID into message payload
|
||||
s_nop 0x0 // Manually inserted wait states
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT) // send interrupt message
|
||||
s_waitcnt lgkmcnt(0) // wait for message to be sent
|
||||
S_MOV_B32_DST_PCS_TTMP_REG1 m0 // restore m0
|
||||
// send_signal(...) - end //
|
||||
.pc_sampling_exit:
|
||||
// We can receive regular exceptions while doing PC-Sampling so we need to make sure we
|
||||
// handle these exceptions here
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE, SQ_WAVE_MODE_EXCP_EN_SHIFT, SQ_WAVE_MODE_EXCP_EN_SIZE) // ttmp3[7:0] = MODE.EXCP_EN
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE, SQ_WAVE_MODE_EXCP_EN_SHIFT, SQ_WAVE_MODE_EXCP_EN_SIZE) // ttmp3[7:0] = MODE.EXCP_EN
|
||||
// Set bits corresponding to TRAPSTS.MEM_VIOL, TRAPSTS.ILLEGAL_INST and TRAPSTS.XNACK_ERROR
|
||||
s_or_b32 ttmp3, ttmp3, (1 << SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT | 1 << SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT | 1 << SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, ttmp3
|
||||
s_or_b32 ttmp3, ttmp3, (1 << SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT | 1 << SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT | 1 << SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, ttmp3
|
||||
// SCC will be 1 if either a maskable instruction was set, or one of MEM_VIOL, ILL_INST, XNACK_ERROR
|
||||
s_cbranch_scc1 .no_skip_debugtrap // if any of those are set, handle exceptions
|
||||
s_cbranch_scc1 .no_skip_debugtrap // if any of those are set, handle exceptions
|
||||
|
||||
// Check for maskable exceptions
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE, SQ_WAVE_MODE_EXCP_EN_SHIFT, SQ_WAVE_MODE_EXCP_EN_SIZE)
|
||||
s_and_b32 ttmp3, ttmp2, ttmp3
|
||||
s_cbranch_scc1 .no_skip_debugtrap
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_MODE, SQ_WAVE_MODE_EXCP_EN_SHIFT, SQ_WAVE_MODE_EXCP_EN_SIZE)
|
||||
s_and_b32 ttmp3, ttmp2, ttmp3
|
||||
s_cbranch_scc1 .no_skip_debugtrap
|
||||
|
||||
// Since we are in PC sampling, it is safe to ignore watch1/2/3 and single step
|
||||
// as those should only be enabled by the debugger.
|
||||
// We could add them for completeness, i.e. check MODE.DEBUG_EN (bit 11)
|
||||
// and "MODE.EXCP_EN.WATCH (bit 19) && (TRAPSTS.EXCP_HI.ADDR_WATCH1 (bit 12) || TRAPSTS.EXCP_HI.ADDR_WATCH2 (bit 13) || TRAPSTS.EXCP_HI.ADDR_WATCH3 (bit 14)).
|
||||
s_branch .exit_trap
|
||||
s_branch .exit_trap
|
||||
|
||||
.endif // PC_SAMPLING_GFX9
|
||||
.no_skip_debugtrap:
|
||||
// Save trap id and halt status in ttmp6.
|
||||
s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
s_min_u32 ttmp2, ttmp2, 0xF
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATUS_HALT_BFE
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
s_min_u32 ttmp2, ttmp2, 0xF
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATUS_HALT_BFE
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
|
||||
// Fetch doorbell id for our queue.
|
||||
.if .amdgcn.gfx_generation_number < 11
|
||||
s_mov_b32 ttmp2, exec_lo
|
||||
s_mov_b32 ttmp3, exec_hi
|
||||
s_mov_b32 exec_lo, 0x80000000
|
||||
s_sendmsg sendmsg(MSG_GET_DOORBELL)
|
||||
s_mov_b32 ttmp2, exec_lo
|
||||
s_mov_b32 ttmp3, exec_hi
|
||||
s_mov_b32 exec_lo, 0x80000000
|
||||
s_sendmsg sendmsg(MSG_GET_DOORBELL)
|
||||
.wait_sendmsg:
|
||||
s_nop 0x7
|
||||
s_bitcmp0_b32 exec_lo, 0x1F
|
||||
s_cbranch_scc0 .wait_sendmsg
|
||||
s_mov_b32 exec_hi, ttmp3
|
||||
s_nop 0x7
|
||||
s_bitcmp0_b32 exec_lo, 0x1F
|
||||
s_cbranch_scc0 .wait_sendmsg
|
||||
s_mov_b32 exec_hi, ttmp3
|
||||
// Restore exec_lo, move the doorbell_id into ttmp3
|
||||
s_and_b32 ttmp3, exec_lo, DOORBELL_ID_MASK
|
||||
s_mov_b32 exec_lo, ttmp2
|
||||
s_and_b32 ttmp3, exec_lo, DOORBELL_ID_MASK
|
||||
s_mov_b32 exec_lo, ttmp2
|
||||
.else
|
||||
s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b32 ttmp3, ttmp3, DOORBELL_ID_MASK
|
||||
s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
s_waitcnt lgkmcnt(0)
|
||||
s_and_b32 ttmp3, ttmp3, DOORBELL_ID_MASK
|
||||
.endif
|
||||
|
||||
// Map trap reason to an exception code.
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT
|
||||
s_cbranch_scc0 .not_memory_violation
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT
|
||||
s_cbranch_scc0 .not_memory_violation
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
|
||||
// Aperture violation requires XNACK_ERROR == 0.
|
||||
s_branch .not_aperture_violation
|
||||
s_branch .not_aperture_violation
|
||||
|
||||
.not_memory_violation:
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT
|
||||
s_cbranch_scc0 .not_aperture_violation
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT
|
||||
s_cbranch_scc0 .not_aperture_violation
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
|
||||
.not_aperture_violation:
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
|
||||
s_cbranch_scc0 .not_illegal_instruction
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
|
||||
s_cbranch_scc0 .not_illegal_instruction
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
|
||||
.not_illegal_instruction:
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_MATH_EXCP
|
||||
s_cbranch_scc0 .not_math_exception
|
||||
s_getreg_b32 ttmp7, hwreg(HW_REG_MODE)
|
||||
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
s_and_b32 ttmp2, ttmp2, ttmp7
|
||||
s_cbranch_scc0 .not_math_exception
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_MATH_EXCP
|
||||
s_cbranch_scc0 .not_math_exception
|
||||
s_getreg_b32 ttmp7, hwreg(HW_REG_MODE)
|
||||
s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
s_and_b32 ttmp2, ttmp2, ttmp7
|
||||
s_cbranch_scc0 .not_math_exception
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
|
||||
.not_math_exception:
|
||||
s_bfe_u32 ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
|
||||
s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
s_cbranch_scc0 .not_abort_trap
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
|
||||
s_bfe_u32 ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
|
||||
s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
s_cbranch_scc0 .not_abort_trap
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
|
||||
|
||||
.not_abort_trap:
|
||||
// If no other exception was flagged then report a generic error.
|
||||
s_andn2_b32 ttmp2, ttmp3, DOORBELL_ID_MASK
|
||||
s_cbranch_scc1 .send_interrupt
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
s_andn2_b32 ttmp2, ttmp3, DOORBELL_ID_MASK
|
||||
s_cbranch_scc1 .send_interrupt
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
|
||||
.send_interrupt:
|
||||
// m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
|
||||
s_mov_b32 ttmp2, m0
|
||||
s_mov_b32 m0, ttmp3
|
||||
s_nop 0x0 // Manually inserted wait states
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
s_waitcnt lgkmcnt(0) // Wait for the message to go out.
|
||||
s_mov_b32 m0, ttmp2
|
||||
s_mov_b32 ttmp2, m0
|
||||
s_mov_b32 m0, ttmp3
|
||||
s_nop 0x0 // Manually inserted wait states
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
s_waitcnt lgkmcnt(0) // Wait for the message to go out.
|
||||
s_mov_b32 m0, ttmp2
|
||||
|
||||
// Parking the wave requires saving the original pc in the preserved ttmps.
|
||||
// Register layout before parking the wave:
|
||||
@@ -750,33 +750,33 @@ trap_entry:
|
||||
// ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor < 4) || (.amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor < 3) || (.amdgcn.gfx_generation_number == 11)
|
||||
// Save the PC
|
||||
s_mov_b32 ttmp7, ttmp0
|
||||
s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, ttmp1
|
||||
s_mov_b32 ttmp7, ttmp0
|
||||
s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, ttmp1
|
||||
|
||||
// Park the wave
|
||||
s_getpc_b64 [ttmp0, ttmp1]
|
||||
s_add_u32 ttmp0, ttmp0, .parked - .
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
s_getpc_b64 [ttmp0, ttmp1]
|
||||
s_add_u32 ttmp0, ttmp0, .parked - .
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
.endif
|
||||
|
||||
.halt_wave:
|
||||
// Halt the wavefront upon restoring STATUS below.
|
||||
s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_HALT_SHIFT
|
||||
s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_HALT_SHIFT
|
||||
// Set WAVE.SKIP_EXPORT as a maker so the debugger knows the trap handler was
|
||||
// entered and has decided to halt the wavee.
|
||||
s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT
|
||||
s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT
|
||||
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor >= 4)
|
||||
s_bitcmp1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT
|
||||
s_cbranch_scc1 .ttmps_initialized
|
||||
s_mov_b32 ttmp4, 0
|
||||
s_mov_b32 ttmp5, 0
|
||||
s_bitset0_b32 ttmp6, TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT
|
||||
s_bitset1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT
|
||||
s_bitcmp1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT
|
||||
s_cbranch_scc1 .ttmps_initialized
|
||||
s_mov_b32 ttmp4, 0
|
||||
s_mov_b32 ttmp5, 0
|
||||
s_bitset0_b32 ttmp6, TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT
|
||||
s_bitset1_b32 ttmp11, TTMP11_TTMPS_SETUP_SHIFT
|
||||
.ttmps_initialized:
|
||||
.endif
|
||||
|
||||
@@ -784,32 +784,32 @@ trap_entry:
|
||||
// Restore SQ_WAVE_IB_STS.
|
||||
.if .amdgcn.gfx_generation_number == 9
|
||||
.if .amdgcn.gfx_generation_minor < 4
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
.else
|
||||
s_lshr_b32 ttmp2, ttmp13, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_lshr_b32 ttmp2, ttmp13, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
.endif
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
.elseif .amdgcn.gfx_generation_number == 10 && .amdgcn.gfx_generation_minor < 3
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_or_b32 ttmp2, ttmp2, ttmp3
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_or_b32 ttmp2, ttmp2, ttmp3
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
.endif
|
||||
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
s_and_b64 exec, exec, exec // restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), ttmp12
|
||||
s_and_b64 exec, exec, exec // restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATUS), ttmp12
|
||||
|
||||
// Return to original (possibly modified) PC.
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
|
||||
.parked:
|
||||
s_trap 0x2
|
||||
s_branch .parked
|
||||
s_trap 0x2
|
||||
s_branch .parked
|
||||
|
||||
// For gfx11, add padding instructions so we can ensure instruction cache
|
||||
// prefetch always has something to load.
|
||||
|
||||
+1
-1
@@ -139,7 +139,7 @@ trap_entry:
|
||||
// - EXCP_FLAG_PRIV.ADDR_WATCH && TRAP_CTL.WATCH -> WAVE_TRAP
|
||||
// - (EXCP_FLAG_USER[ALU] & TRAP_CTRL[ALU]) != 0 -> WAVE_MATH_ERROR
|
||||
.check_exceptions:
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)
|
||||
s_getreg_b32 ttmp13, hwreg(HW_REG_TRAP_CTRL)
|
||||
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário