SWDEV-552573 - restore asm file before clang formatting (#802)
* Restore asm file before clang formatting * Surround inline assembly in clang-format off
Цей коміт міститься в:
@@ -17,22 +17,25 @@
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
// clang-format off
|
||||
namespace amd::pal {
|
||||
|
||||
#define RUNTIME_KERNEL(...) #__VA_ARGS__
|
||||
|
||||
const char* SchedulerSourceCode = RUNTIME_KERNEL(
|
||||
\n extern void __amd_scheduler(__global void*, __global void*, uint);
|
||||
\n __kernel void __amd_rocclr_scheduler(__global void* queue, __global void* params,
|
||||
uint paramIdx) {
|
||||
\n
|
||||
extern void __amd_scheduler(__global void*, __global void*, uint);
|
||||
\n
|
||||
__kernel void __amd_rocclr_scheduler(__global void* queue, __global void* params, uint paramIdx) {
|
||||
__amd_scheduler(queue, params, paramIdx);
|
||||
}
|
||||
\n);
|
||||
|
||||
const char* SchedulerSourceCode20 = RUNTIME_KERNEL(
|
||||
\n extern void __amd_scheduler_pal(__global void*, __global void*, uint);
|
||||
\n __kernel void __amd_rocclr_scheduler(__global void* queue, __global void* params,
|
||||
\n
|
||||
extern void __amd_scheduler_pal(__global void*, __global void*, uint);
|
||||
\n
|
||||
__kernel void __amd_rocclr_scheduler(__global void* queue, __global void* params,
|
||||
uint paramIdx) {
|
||||
__amd_scheduler_pal(queue, params, paramIdx);
|
||||
}
|
||||
@@ -59,166 +62,149 @@ const char* SchedulerSourceCode20 = RUNTIME_KERNEL(
|
||||
*/
|
||||
|
||||
const char* TrapHandlerCode = RUNTIME_KERNEL(
|
||||
\n.if.amdgcn.gfx_generation_number == 11
|
||||
\n.set SQ_WAVE_PC_HI_ADDRESS_MASK, 0xFFFF
|
||||
\n.set SQ_WAVE_PC_HI_HT_SHIFT, 24
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT, 16
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SIZE, 8
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_BFE,
|
||||
(SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
\n.set SQ_WAVE_STATUS_HALT_SHIFT, 13
|
||||
\n.set SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT, 18
|
||||
\n.set SQ_WAVE_STATUS_HALT_BFE, (SQ_WAVE_STATUS_HALT_SHIFT | (1 << 16))
|
||||
\n.set SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 8
|
||||
\n.set SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT, 11
|
||||
\n.set SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT, 28
|
||||
\n.set SQ_WAVE_TRAPSTS_MATH_EXCP, 0x7F
|
||||
\n.set SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT, 26
|
||||
\n.set SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT, 22
|
||||
\n.set SQ_WAVE_MODE_EXCP_EN_SHIFT, 12
|
||||
\n.set SQ_WAVE_MODE_EXCP_EN_SIZE, 8
|
||||
\n.set TRAP_ID_ABORT, 2
|
||||
\n.set TRAP_ID_DEBUGTRAP, 3
|
||||
\n.set DOORBELL_ID_SIZE, 10
|
||||
\n.set DOORBELL_ID_MASK, ((1 << DOORBELL_ID_SIZE) - 1)
|
||||
\n.set EC_QUEUE_WAVE_ABORT_M0, (1 << (DOORBELL_ID_SIZE + 0))
|
||||
\n.set EC_QUEUE_WAVE_TRAP_M0, (1 << (DOORBELL_ID_SIZE + 1))
|
||||
\n.set EC_QUEUE_WAVE_MATH_ERROR_M0, (1 << (DOORBELL_ID_SIZE + 2))
|
||||
\n.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0, (1 << (DOORBELL_ID_SIZE + 3))
|
||||
\n.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0, (1 << (DOORBELL_ID_SIZE + 4))
|
||||
\n.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0, (1 << (DOORBELL_ID_SIZE + 5))
|
||||
\n.if .amdgcn.gfx_generation_number == 11
|
||||
\n.set SQ_WAVE_PC_HI_ADDRESS_MASK , 0xFFFF
|
||||
\n.set SQ_WAVE_PC_HI_HT_SHIFT , 24
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT , 16
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 8
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
\n.set SQ_WAVE_STATUS_HALT_SHIFT , 13
|
||||
\n.set SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT , 18
|
||||
\n.set SQ_WAVE_STATUS_HALT_BFE , (SQ_WAVE_STATUS_HALT_SHIFT | (1 << 16))
|
||||
\n.set SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT , 8
|
||||
\n.set SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT , 11
|
||||
\n.set SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT , 28
|
||||
\n.set SQ_WAVE_TRAPSTS_MATH_EXCP , 0x7F
|
||||
\n.set SQ_WAVE_TRAPSTS_PERF_SNAPSHOT_SHIFT , 26
|
||||
\n.set SQ_WAVE_TRAPSTS_HOST_TRAP_SHIFT , 22
|
||||
\n.set SQ_WAVE_MODE_EXCP_EN_SHIFT , 12
|
||||
\n.set SQ_WAVE_MODE_EXCP_EN_SIZE , 8
|
||||
\n.set TRAP_ID_ABORT , 2
|
||||
\n.set TRAP_ID_DEBUGTRAP , 3
|
||||
\n.set DOORBELL_ID_SIZE , 10
|
||||
\n.set DOORBELL_ID_MASK , ((1 << DOORBELL_ID_SIZE) - 1)
|
||||
\n.set EC_QUEUE_WAVE_ABORT_M0 , (1 << (DOORBELL_ID_SIZE + 0))
|
||||
\n.set EC_QUEUE_WAVE_TRAP_M0 , (1 << (DOORBELL_ID_SIZE + 1))
|
||||
\n.set EC_QUEUE_WAVE_MATH_ERROR_M0 , (1 << (DOORBELL_ID_SIZE + 2))
|
||||
\n.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0 , (1 << (DOORBELL_ID_SIZE + 3))
|
||||
\n.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 4))
|
||||
\n.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 5))
|
||||
\n
|
||||
\n.set TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT, 31
|
||||
\n.set TTMP6_WAVE_STOPPED_SHIFT, 30
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_SHIFT, 29
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_MASK, (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SHIFT, 25
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SIZE, 4
|
||||
\n.set TTMP6_SAVED_TRAP_ID_MASK,
|
||||
(((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
|
||||
\n.set TTMP6_SAVED_TRAP_ID_BFE, (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
|
||||
\n.set TTMP6_SPI_TTMPS_SETUP_DISABLED_SHIFT , 31
|
||||
\n.set TTMP6_WAVE_STOPPED_SHIFT , 30
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_SHIFT , 29
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SHIFT , 25
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SIZE , 4
|
||||
\n.set TTMP6_SAVED_TRAP_ID_MASK , (((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
|
||||
\n.set TTMP6_SAVED_TRAP_ID_BFE , (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
|
||||
\n
|
||||
\n.set TTMP_PC_HI_SHIFT, 7
|
||||
\n.set TTMP_DEBUG_ENABLED_SHIFT,
|
||||
23
|
||||
\n.set TTMP_PC_HI_SHIFT , 7
|
||||
\n.set TTMP_DEBUG_ENABLED_SHIFT , 23
|
||||
\n
|
||||
\n // ABI between first and second level trap handler:
|
||||
\n // ttmp0 = PC[31:0]
|
||||
\n // ttmp6 = 0[6:0], DispatchPktIndx[24:0]
|
||||
\n // ttmp8 = WorkgroupIdX
|
||||
\n // ttmp9 = WorkgroupIdY
|
||||
\n // ttmp10 = WorkgroupIdZ
|
||||
\n // ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]
|
||||
\n // ttmp12 = SQ_WAVE_STATUS
|
||||
\n // ttmp14 = TMA[31:0]
|
||||
\n // ttmp15 = TMA[63:32]
|
||||
\n// ABI between first and second level trap handler:
|
||||
\n// ttmp0 = PC[31:0]
|
||||
\n// ttmp6 = 0[6:0], DispatchPktIndx[24:0]
|
||||
\n// ttmp8 = WorkgroupIdX
|
||||
\n// ttmp9 = WorkgroupIdY
|
||||
\n// ttmp10 = WorkgroupIdZ
|
||||
\n// ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], WaveIdInWG[5:0]
|
||||
\n// ttmp12 = SQ_WAVE_STATUS
|
||||
\n// ttmp14 = TMA[31:0]
|
||||
\n// ttmp15 = TMA[63:32]
|
||||
\n
|
||||
\n.globl trap_entry
|
||||
\n.type trap_entry,
|
||||
@function
|
||||
\n.align 256
|
||||
\n .globl trap_entry
|
||||
\n .type trap_entry,@function
|
||||
\n .align 256
|
||||
\n
|
||||
\ntrap_entry :
|
||||
\ntrap_entry:
|
||||
\n // Extract trap_id from ttmp2
|
||||
\n s_bfe_u32 ttmp2, ttmp1,
|
||||
SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_cbranch_scc0.not_s_trap // If trap_id == 0, it's not an s_trap nor host trap
|
||||
\n s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_cbranch_scc0 .not_s_trap // If trap_id == 0, it's not an s_trap nor host trap
|
||||
\n
|
||||
\n // Check if the it was an host trap.
|
||||
\n s_bitcmp1_b32 ttmp1,
|
||||
SQ_WAVE_PC_HI_HT_SHIFT
|
||||
\n s_cbranch_scc1.not_s_trap
|
||||
\n s_bitcmp1_b32 ttmp1, SQ_WAVE_PC_HI_HT_SHIFT
|
||||
\n s_cbranch_scc1 .not_s_trap
|
||||
\n
|
||||
\n // It's an s_trap; advance the PC
|
||||
\n s_add_u32 ttmp0, ttmp0, 0x4
|
||||
\n s_addc_u32 ttmp1, ttmp1,
|
||||
0x0
|
||||
\n s_add_u32 ttmp0, ttmp0, 0x4
|
||||
\n s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
\n
|
||||
\n // If llvm.debugtrap and debugger is not attached.
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
\n s_cbranch_scc0.no_skip_debugtrap
|
||||
\n s_bitcmp0_b32 ttmp11,
|
||||
TTMP_DEBUG_ENABLED_SHIFT
|
||||
\n s_cbranch_scc0
|
||||
.no_skip_debugtrap
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
\n s_cbranch_scc0 .no_skip_debugtrap
|
||||
\n s_bitcmp0_b32 ttmp11, TTMP_DEBUG_ENABLED_SHIFT
|
||||
\n s_cbranch_scc0 .no_skip_debugtrap
|
||||
\n
|
||||
\n // Ignore llvm.debugtrap.
|
||||
\n s_branch.exit_trap
|
||||
\n s_branch .exit_trap
|
||||
\n
|
||||
\n.not_s_trap :
|
||||
\n.no_skip_debugtrap :
|
||||
\n.not_s_trap:
|
||||
\n.no_skip_debugtrap:
|
||||
\n // Save trap id and halt status in ttmp6.
|
||||
\n s_andn2_b32 ttmp6,
|
||||
ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
\n s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_min_u32 ttmp2, ttmp2, 0xF
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
\n s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATUS_HALT_BFE
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6,
|
||||
ttmp2
|
||||
\n s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
\n s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_min_u32 ttmp2, ttmp2, 0xF
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
\n s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATUS_HALT_BFE
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
\n
|
||||
\n // Fetch doorbell id for our queue.
|
||||
\n s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
\n s_waitcnt lgkmcnt(0)
|
||||
\n s_and_b32 ttmp3, ttmp3,
|
||||
DOORBELL_ID_MASK
|
||||
\n s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
\n s_waitcnt lgkmcnt(0)
|
||||
\n s_and_b32 ttmp3, ttmp3, DOORBELL_ID_MASK
|
||||
\n
|
||||
\n // Map trap reason to an exception code.
|
||||
\n s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
\n s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS)
|
||||
\n
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT
|
||||
\n s_cbranch_scc0.not_memory_violation
|
||||
\n s_or_b32 ttmp3, ttmp3,
|
||||
EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_XNACK_ERROR_SHIFT
|
||||
\n s_cbranch_scc0 .not_memory_violation
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
\n
|
||||
\n // Aperture violation requires XNACK_ERROR == 0.
|
||||
\n s_branch.not_aperture_violation
|
||||
\n s_branch .not_aperture_violation
|
||||
\n
|
||||
\n.not_memory_violation :
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT
|
||||
\n s_cbranch_scc0.not_aperture_violation
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
\n.not_memory_violation:
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT
|
||||
\n s_cbranch_scc0 .not_aperture_violation
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
\n
|
||||
\n.not_aperture_violation :
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
|
||||
\n s_cbranch_scc0.not_illegal_instruction
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
\n.not_aperture_violation:
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_TRAPSTS_ILLEGAL_INST_SHIFT
|
||||
\n s_cbranch_scc0 .not_illegal_instruction
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
\n
|
||||
\n.not_illegal_instruction :
|
||||
\n s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_MATH_EXCP
|
||||
\n s_cbranch_scc0.not_math_exception
|
||||
\n s_getreg_b32 ttmp7, hwreg(HW_REG_MODE)
|
||||
\n s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
\n s_and_b32 ttmp2, ttmp2, ttmp7
|
||||
\n s_cbranch_scc0.not_math_exception
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
\n.not_illegal_instruction:
|
||||
\n s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_MATH_EXCP
|
||||
\n s_cbranch_scc0 .not_math_exception
|
||||
\n s_getreg_b32 ttmp7, hwreg(HW_REG_MODE)
|
||||
\n s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
|
||||
\n s_and_b32 ttmp2, ttmp2, ttmp7
|
||||
\n s_cbranch_scc0 .not_math_exception
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
\n
|
||||
\n.not_math_exception :
|
||||
\n s_bfe_u32 ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
\n s_cbranch_scc0.not_abort_trap
|
||||
\n s_or_b32 ttmp3, ttmp3,
|
||||
EC_QUEUE_WAVE_ABORT_M0
|
||||
\n.not_math_exception:
|
||||
\n s_bfe_u32 ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
\n s_cbranch_scc0 .not_abort_trap
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
|
||||
\n
|
||||
\n.not_abort_trap :
|
||||
\n.not_abort_trap:
|
||||
\n // If no other exception was flagged then report a generic error.
|
||||
\n s_andn2_b32 ttmp2, ttmp3, DOORBELL_ID_MASK
|
||||
\n s_cbranch_scc1.send_interrupt
|
||||
\n s_or_b32 ttmp3, ttmp3,
|
||||
EC_QUEUE_WAVE_TRAP_M0
|
||||
\n s_andn2_b32 ttmp2, ttmp3, DOORBELL_ID_MASK
|
||||
\n s_cbranch_scc1 .send_interrupt
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n.send_interrupt :
|
||||
\n.send_interrupt:
|
||||
\n // m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
|
||||
\n s_mov_b32 ttmp2, m0
|
||||
\n s_mov_b32 m0,
|
||||
ttmp3
|
||||
\n s_nop 0x0 // Manually inserted wait states
|
||||
\n s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
\n s_waitcnt lgkmcnt(0) // Wait for the message to go out.
|
||||
\n s_mov_b32 m0,
|
||||
ttmp2
|
||||
\n s_mov_b32 ttmp2, m0
|
||||
\n s_mov_b32 m0, ttmp3
|
||||
\n s_nop 0x0 // Manually inserted wait states
|
||||
\n s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
\n s_waitcnt lgkmcnt(0) // Wait for the message to go out.
|
||||
\n s_mov_b32 m0, ttmp2
|
||||
\n
|
||||
\n // Parking the wave requires saving the original pc in the preserved ttmps.
|
||||
\n // Register layout before parking the wave:
|
||||
@@ -231,147 +217,132 @@ const char* TrapHandlerCode = RUNTIME_KERNEL(
|
||||
\n // ttmp7: pc_lo[31:0]
|
||||
\n // ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
|
||||
\n // Save the PC
|
||||
\n s_mov_b32 ttmp7, ttmp0
|
||||
\n s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
\n s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
\n s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
\n s_or_b32 ttmp11, ttmp11,
|
||||
ttmp1
|
||||
\n s_mov_b32 ttmp7, ttmp0
|
||||
\n s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
\n s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
\n s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
\n s_or_b32 ttmp11, ttmp11, ttmp1
|
||||
\n
|
||||
\n // Park the wave
|
||||
\n s_getpc_b64[ttmp0, ttmp1]
|
||||
\n s_add_u32 ttmp0, ttmp0, .parked -.
|
||||
\n s_addc_u32 ttmp1, ttmp1,
|
||||
0x0
|
||||
\n s_getpc_b64 [ttmp0, ttmp1]
|
||||
\n s_add_u32 ttmp0, ttmp0, .parked - .
|
||||
\n s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
\n
|
||||
\n.halt_wave :
|
||||
\n.halt_wave:
|
||||
\n // Halt the wavefront upon restoring STATUS below.
|
||||
\n s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
\n s_bitset1_b32 ttmp12,
|
||||
SQ_WAVE_STATUS_HALT_SHIFT
|
||||
\n s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
\n s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_HALT_SHIFT
|
||||
\n // Set WAVE.SKIP_EXPORT as a maker so the debugger knows the trap handler was
|
||||
\n // entered and has decided to halt the wavee.
|
||||
\n s_bitset1_b32 ttmp12,
|
||||
SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT
|
||||
\n s_bitset1_b32 ttmp12, SQ_WAVE_STATUS_TRAP_SKIP_EXPORT_SHIFT
|
||||
\n
|
||||
\n.exit_trap :
|
||||
\n.exit_trap:
|
||||
\n // Restore SQ_WAVE_STATUS.
|
||||
\n s_and_b64 exec, exec,
|
||||
exec // restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
\n s_and_b64 vcc, vcc,
|
||||
vcc // restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
\n s_setreg_b32 hwreg(HW_REG_STATUS),
|
||||
ttmp12
|
||||
\n s_and_b64 exec, exec, exec // restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
\n s_and_b64 vcc, vcc, vcc // restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
\n s_setreg_b32 hwreg(HW_REG_STATUS), ttmp12
|
||||
\n
|
||||
\n // Return to original (possibly modified) PC.
|
||||
\n s_rfe_b64[ttmp0, ttmp1]
|
||||
\n s_rfe_b64 [ttmp0, ttmp1]
|
||||
\n
|
||||
\n.parked :
|
||||
\n s_trap 0x2
|
||||
\n s_branch.parked
|
||||
\n.parked:
|
||||
\n s_trap 0x2
|
||||
\n s_branch .parked
|
||||
\n
|
||||
\n // For gfx11, add padding instructions so we can ensure instruction cache
|
||||
\n // prefetch always has something to load.
|
||||
\n.rept(256 - ((.- trap_entry) % 64)) / 4
|
||||
\n s_code_end
|
||||
\n// For gfx11, add padding instructions so we can ensure instruction cache
|
||||
\n// prefetch always has something to load.
|
||||
\n.rept (256 - ((. - trap_entry) % 64)) / 4
|
||||
\n s_code_end
|
||||
\n.endr
|
||||
\n.elseif.amdgcn.gfx_generation_number == 12
|
||||
\n.set DOORBELL_ID_SIZE, 10
|
||||
\n.set DOORBELL_ID_MASK, ((1 << DOORBELL_ID_SIZE) - 1)
|
||||
\n.set EC_QUEUE_WAVE_ABORT_M0, (1 << (DOORBELL_ID_SIZE + 0))
|
||||
\n.set EC_QUEUE_WAVE_TRAP_M0, (1 << (DOORBELL_ID_SIZE + 1))
|
||||
\n.set EC_QUEUE_WAVE_MATH_ERROR_M0, (1 << (DOORBELL_ID_SIZE + 2))
|
||||
\n.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0, (1 << (DOORBELL_ID_SIZE + 3))
|
||||
\n.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0, (1 << (DOORBELL_ID_SIZE + 4))
|
||||
\n.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0, (1 << (DOORBELL_ID_SIZE + 5))
|
||||
\n.elseif .amdgcn.gfx_generation_number == 12
|
||||
\n.set DOORBELL_ID_SIZE , 10
|
||||
\n.set DOORBELL_ID_MASK , ((1 << DOORBELL_ID_SIZE) - 1)
|
||||
\n.set EC_QUEUE_WAVE_ABORT_M0 , (1 << (DOORBELL_ID_SIZE + 0))
|
||||
\n.set EC_QUEUE_WAVE_TRAP_M0 , (1 << (DOORBELL_ID_SIZE + 1))
|
||||
\n.set EC_QUEUE_WAVE_MATH_ERROR_M0 , (1 << (DOORBELL_ID_SIZE + 2))
|
||||
\n.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0 , (1 << (DOORBELL_ID_SIZE + 3))
|
||||
\n.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 4))
|
||||
\n.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 5))
|
||||
\n
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK, (1 << 4) - 1
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT, 4
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT, 6
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT, 7
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT, 8
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_SHIFT, 9
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_SHIFT, 11
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT, 12
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK , (1 << 4) - 1
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT , 4
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT , 6
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT , 7
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT , 8
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_SHIFT , 9
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_SHIFT , 11
|
||||
\n.set SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT , 12
|
||||
\n
|
||||
\n.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT, 0
|
||||
\n.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE, 7
|
||||
\n.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT , 0
|
||||
\n.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE , 7
|
||||
\n
|
||||
\n.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_MASK, ((1 << 7) - 1)
|
||||
\n.set SQ_WAVE_TRAP_CTRL_ADDR_WATCH_SHIFT, 7
|
||||
\n.set SQ_WAVE_TRAP_CTRL_WAVE_END_SHIFT, 8
|
||||
\n.set SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST, 9
|
||||
\n.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_MASK , ((1 << 7) - 1)
|
||||
\n.set SQ_WAVE_TRAP_CTRL_ADDR_WATCH_SHIFT , 7
|
||||
\n.set SQ_WAVE_TRAP_CTRL_WAVE_END_SHIFT , 8
|
||||
\n.set SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST , 9
|
||||
\n
|
||||
\n.set SQ_WAVE_PC_HI_ADDRESS_MASK, 0xFFFF
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_BFE,
|
||||
(SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT, 28
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SIZE, 4
|
||||
\n.set SQ_WAVE_STATE_PRIV_HALT_BFE, (SQ_WAVE_STATE_PRIV_HALT_SHIFT | (1 << 16))
|
||||
\n.set SQ_WAVE_STATE_PRIV_HALT_SHIFT, 14
|
||||
\n.set SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT, 2
|
||||
\n.set TRAP_ID_ABORT, 2
|
||||
\n.set TRAP_ID_DEBUGTRAP, 3
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_MASK, (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_SHIFT, 29
|
||||
\n.set TTMP6_SAVED_TRAP_ID_BFE, (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
|
||||
\n.set TTMP6_SAVED_TRAP_ID_MASK,
|
||||
(((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SHIFT, 25
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SIZE, 4
|
||||
\n.set TTMP6_WAVE_STOPPED_SHIFT, 30
|
||||
\n.set TTMP8_DEBUG_FLAG_SHIFT, 31
|
||||
\n.set TTMP11_DEBUG_ENABLED_SHIFT, 23
|
||||
\n.set TTMP_PC_HI_SHIFT,
|
||||
7
|
||||
\n.set SQ_WAVE_PC_HI_ADDRESS_MASK , 0xFFFF
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT , 28
|
||||
\n.set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 4
|
||||
\n.set SQ_WAVE_STATE_PRIV_HALT_BFE , (SQ_WAVE_STATE_PRIV_HALT_SHIFT | (1 << 16))
|
||||
\n.set SQ_WAVE_STATE_PRIV_HALT_SHIFT , 14
|
||||
\n.set SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT , 2
|
||||
\n.set TRAP_ID_ABORT , 2
|
||||
\n.set TRAP_ID_DEBUGTRAP , 3
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
|
||||
\n.set TTMP6_SAVED_STATUS_HALT_SHIFT , 29
|
||||
\n.set TTMP6_SAVED_TRAP_ID_BFE , (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
|
||||
\n.set TTMP6_SAVED_TRAP_ID_MASK , (((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SHIFT , 25
|
||||
\n.set TTMP6_SAVED_TRAP_ID_SIZE , 4
|
||||
\n.set TTMP6_WAVE_STOPPED_SHIFT , 30
|
||||
\n.set TTMP8_DEBUG_FLAG_SHIFT , 31
|
||||
\n.set TTMP11_DEBUG_ENABLED_SHIFT , 23
|
||||
\n.set TTMP_PC_HI_SHIFT , 7
|
||||
\n
|
||||
\n // ABI between first and second level trap handler:
|
||||
\n // { ttmp1, ttmp0 } = TrapID[3:0], zeros, PC[47:0]
|
||||
\n // ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], 0[5:0]
|
||||
\n // ttmp12 = SQ_WAVE_STATE_PRIV
|
||||
\n // ttmp14 = TMA[31:0]
|
||||
\n // ttmp15 = TMA[63:32]
|
||||
\n// ABI between first and second level trap handler:
|
||||
\n// { ttmp1, ttmp0 } = TrapID[3:0], zeros, PC[47:0]
|
||||
\n// ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], 0[5:0]
|
||||
\n// ttmp12 = SQ_WAVE_STATE_PRIV
|
||||
\n// ttmp14 = TMA[31:0]
|
||||
\n// ttmp15 = TMA[63:32]
|
||||
\n
|
||||
\n.globl trap_entry
|
||||
\n.type trap_entry,
|
||||
@function
|
||||
\n.align 256
|
||||
\n .globl trap_entry
|
||||
\n .type trap_entry,@function
|
||||
\n .align 256
|
||||
\n
|
||||
\ntrap_entry :
|
||||
\ntrap_entry:
|
||||
\n // Clear ttmp3 as it will contain the exception code.
|
||||
\n s_mov_b32 ttmp3,
|
||||
0
|
||||
\n s_mov_b32 ttmp3, 0
|
||||
\n
|
||||
\n // Branch if not a trap (an exception instead).
|
||||
\n s_bfe_u32 ttmp2, ttmp1,
|
||||
SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_cbranch_scc0.check_exceptions
|
||||
\n s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_cbranch_scc0 .check_exceptions
|
||||
\n
|
||||
\n // If caused by s_trap then advance PC, then figure out the trap ID:
|
||||
\n // - if trapID is DEBUGTRAP and debugger is attach, report WAVE_TRAP,
|
||||
\n // - if trapID is ABORTTRAP, report WAVE_ABORT,
|
||||
\n // - report WAVE_TRAP for any other trap ID.
|
||||
\n s_add_u32 ttmp0, ttmp0, 0x4
|
||||
\n s_addc_u32 ttmp1, ttmp1,
|
||||
0x0
|
||||
\n s_add_u32 ttmp0, ttmp0, 0x4
|
||||
\n s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
\n
|
||||
\n // If llvm.debugtrap and debugger is not attached.
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
\n s_cbranch_scc0.not_debug_trap
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
\n s_cbranch_scc0 .not_debug_trap
|
||||
\n
|
||||
\n s_bitcmp1_b32 ttmp11, TTMP11_DEBUG_ENABLED_SHIFT
|
||||
\n s_cbranch_scc0.check_exceptions
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n s_bitcmp1_b32 ttmp11, TTMP11_DEBUG_ENABLED_SHIFT
|
||||
\n s_cbranch_scc0 .check_exceptions
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n.not_debug_trap :
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
\n s_cbranch_scc0.not_abort_trap
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
|
||||
\n s_branch.check_exceptions
|
||||
\n.not_debug_trap:
|
||||
\n s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
\n s_cbranch_scc0 .not_abort_trap
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
|
||||
\n s_branch .check_exceptions
|
||||
\n
|
||||
\n.not_abort_trap :
|
||||
\n s_or_b32 ttmp3, ttmp3,
|
||||
EC_QUEUE_WAVE_TRAP_M0
|
||||
\n.not_abort_trap:
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n // We need to explititly look for all exceptions we want to report to the
|
||||
\n // host:
|
||||
@@ -385,98 +356,89 @@ const char* TrapHandlerCode = RUNTIME_KERNEL(
|
||||
\n // - TRAP_CTRL.TRAP_AFTER_INST -> WAVE_TRAP
|
||||
\n // - EXCP_FLAG_PRIV.ADDR_WATCH && TRAP_CTL.WATCH -> WAVE_TRAP
|
||||
\n // - (EXCP_FLAG_USER[ALU] & TRAP_CTRL[ALU]) != 0 -> WAVE_MATH_ERROR
|
||||
\n.check_exceptions :
|
||||
\n s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)
|
||||
\n s_getreg_b32 ttmp13, hwreg(HW_REG_TRAP_CTRL)
|
||||
\n.check_exceptions:
|
||||
\n s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_PRIV)
|
||||
\n s_getreg_b32 ttmp13, hwreg(HW_REG_TRAP_CTRL)
|
||||
\n
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT
|
||||
\n s_cbranch_scc0.not_memory_violation
|
||||
\n s_or_b32 ttmp3, ttmp3,
|
||||
EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT
|
||||
\n s_cbranch_scc0 .not_memory_violation
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
\n
|
||||
\n // Aperture violation requires XNACK_ERROR == 0.
|
||||
\n s_branch.not_aperture_violation
|
||||
\n s_branch .not_aperture_violation
|
||||
\n
|
||||
\n.not_memory_violation :
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT
|
||||
\n s_cbranch_scc0.not_aperture_violation
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
\n.not_memory_violation:
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT
|
||||
\n s_cbranch_scc0 .not_aperture_violation
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
\n
|
||||
\n.not_aperture_violation :
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
|
||||
\n s_cbranch_scc0.not_illegal_instruction
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
\n.not_aperture_violation:
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
|
||||
\n s_cbranch_scc0 .not_illegal_instruction
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
\n
|
||||
\n.not_illegal_instruction :
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
|
||||
\n s_cbranch_scc0.not_wave_end
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n.not_illegal_instruction:
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
|
||||
\n s_cbranch_scc0 .not_wave_end
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n.not_wave_start :
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_SHIFT
|
||||
\n s_cbranch_scc0.not_wave_end
|
||||
\n s_bitcmp1_b32 ttmp13, SQ_WAVE_TRAP_CTRL_WAVE_END_SHIFT
|
||||
\n s_cbranch_scc0.not_wave_end
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n.not_wave_start:
|
||||
\n s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_SHIFT
|
||||
\n s_cbranch_scc0 .not_wave_end
|
||||
\n s_bitcmp1_b32 ttmp13, SQ_WAVE_TRAP_CTRL_WAVE_END_SHIFT
|
||||
\n s_cbranch_scc0 .not_wave_end
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n.not_wave_end :
|
||||
\n s_bitcmp1_b32 ttmp13, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST
|
||||
\n s_cbranch_scc0.not_trap_after_inst
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n.not_wave_end:
|
||||
\n s_bitcmp1_b32 ttmp13, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST
|
||||
\n s_cbranch_scc0 .not_trap_after_inst
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n.not_trap_after_inst :
|
||||
\n s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
|
||||
\n s_cbranch_scc0.not_addr_watch
|
||||
\n s_bitcmp1_b32 ttmp13, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_SHIFT
|
||||
\n s_cbranch_scc0.not_addr_watch
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n.not_trap_after_inst:
|
||||
\n s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
|
||||
\n s_cbranch_scc0 .not_addr_watch
|
||||
\n s_bitcmp1_b32 ttmp13, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_SHIFT
|
||||
\n s_cbranch_scc0 .not_addr_watch
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
\n
|
||||
\n.not_addr_watch :
|
||||
\n s_getreg_b32 ttmp2,
|
||||
hwreg(HW_REG_EXCP_FLAG_USER, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT,
|
||||
SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE)
|
||||
\n s_and_b32 ttmp13,
|
||||
ttmp13, SQ_WAVE_TRAP_CTRL_MATH_EXCP_MASK
|
||||
\n s_and_b32 ttmp2, ttmp2, ttmp13
|
||||
\n s_cbranch_scc0.not_math_exception
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
\n.not_addr_watch:
|
||||
\n s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_USER, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE)
|
||||
\n s_and_b32 ttmp13, ttmp13, SQ_WAVE_TRAP_CTRL_MATH_EXCP_MASK
|
||||
\n s_and_b32 ttmp2, ttmp2, ttmp13
|
||||
\n s_cbranch_scc0 .not_math_exception
|
||||
\n s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
\n
|
||||
\n.not_math_exception :
|
||||
\n s_cmp_eq_u32 ttmp3,
|
||||
0
|
||||
\n.not_math_exception:
|
||||
\n s_cmp_eq_u32 ttmp3, 0
|
||||
\n // This was not a s_trap we are interested in or an exception, return to
|
||||
\n // the user code.
|
||||
\n s_cbranch_scc1.exit_trap
|
||||
\n s_cbranch_scc1 .exit_trap
|
||||
\n
|
||||
\n.send_interrupt :
|
||||
\n.send_interrupt:
|
||||
\n // Fetch doorbell id for our queue.
|
||||
\n s_sendmsg_rtn_b32 ttmp2, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
\n s_wait_kmcnt 0
|
||||
\n s_and_b32 ttmp2, ttmp2, DOORBELL_ID_MASK
|
||||
\n s_or_b32 ttmp3, ttmp2,
|
||||
ttmp3
|
||||
\n s_sendmsg_rtn_b32 ttmp2, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
\n s_wait_kmcnt 0
|
||||
\n s_and_b32 ttmp2, ttmp2, DOORBELL_ID_MASK
|
||||
\n s_or_b32 ttmp3, ttmp2, ttmp3
|
||||
\n
|
||||
\n // Save trap id and halt status in ttmp6.
|
||||
\n s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
\n s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_min_u32 ttmp2, ttmp2, 0xF
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
\n s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATE_PRIV_HALT_BFE
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6,
|
||||
ttmp2
|
||||
\n s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
\n s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
\n s_min_u32 ttmp2, ttmp2, 0xF
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
\n s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATE_PRIV_HALT_BFE
|
||||
\n s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
\n s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
\n
|
||||
\n // m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
|
||||
\n s_mov_b32 ttmp2, m0
|
||||
\n s_mov_b32 m0,
|
||||
ttmp3
|
||||
\n s_nop 0x0 // Manually inserted wait states
|
||||
\n s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
\n s_mov_b32 ttmp2, m0
|
||||
\n s_mov_b32 m0, ttmp3
|
||||
\n s_nop 0x0 // Manually inserted wait states
|
||||
\n s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
\n // Wait for the message to go out.
|
||||
\n s_wait_kmcnt 0
|
||||
\n s_mov_b32 m0,
|
||||
ttmp2
|
||||
\n s_wait_kmcnt 0
|
||||
\n s_mov_b32 m0, ttmp2
|
||||
\n
|
||||
\n // Parking the wave requires saving the original pc in the preserved ttmps.
|
||||
\n // Register layout before parking the wave:
|
||||
@@ -490,57 +452,50 @@ const char* TrapHandlerCode = RUNTIME_KERNEL(
|
||||
\n // ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
|
||||
\n //
|
||||
\n // Save the PC
|
||||
\n s_mov_b32 ttmp10, ttmp0
|
||||
\n s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
\n s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
\n s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
\n s_or_b32 ttmp11, ttmp11,
|
||||
ttmp1
|
||||
\n s_mov_b32 ttmp10, ttmp0
|
||||
\n s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
\n s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
\n s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
\n s_or_b32 ttmp11, ttmp11, ttmp1
|
||||
\n
|
||||
\n // Park the wave
|
||||
\n s_getpc_b64[ttmp0, ttmp1]
|
||||
\n s_add_u32 ttmp0, ttmp0, .parked -.
|
||||
\n s_addc_u32 ttmp1, ttmp1,
|
||||
0x0
|
||||
\n s_getpc_b64 [ttmp0, ttmp1]
|
||||
\n s_add_u32 ttmp0, ttmp0, .parked - .
|
||||
\n s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
\n
|
||||
\n.halt_wave :
|
||||
\n.halt_wave:
|
||||
\n // Halt the wavefront upon restoring STATUS below.
|
||||
\n s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
\n s_bitset1_b32 ttmp12,
|
||||
SQ_WAVE_STATE_PRIV_HALT_SHIFT
|
||||
\n s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
\n s_bitset1_b32 ttmp12, SQ_WAVE_STATE_PRIV_HALT_SHIFT
|
||||
\n
|
||||
\n // Initialize TTMP registers
|
||||
\n s_bitcmp1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
|
||||
\n s_cbranch_scc1.ttmps_initialized
|
||||
\n s_mov_b32 ttmp4, 0
|
||||
\n s_mov_b32 ttmp5, 0
|
||||
\n s_bitset1_b32 ttmp8,
|
||||
TTMP8_DEBUG_FLAG_SHIFT
|
||||
\n.ttmps_initialized :
|
||||
\n s_bitcmp1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
|
||||
\n s_cbranch_scc1 .ttmps_initialized
|
||||
\n s_mov_b32 ttmp4, 0
|
||||
\n s_mov_b32 ttmp5, 0
|
||||
\n s_bitset1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
|
||||
\n.ttmps_initialized:
|
||||
\n
|
||||
\n.exit_trap :
|
||||
\n.exit_trap:
|
||||
\n // Restore SQ_WAVE_STATUS.
|
||||
\n s_and_b64 exec, exec,
|
||||
exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
\n s_and_b64 vcc, vcc,
|
||||
vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
\n s_setreg_b32 hwreg(HW_REG_STATE_PRIV, 0, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT), ttmp12
|
||||
\n s_lshr_b32 ttmp12, ttmp12,
|
||||
(SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1)
|
||||
\n s_setreg_b32 hwreg(HW_REG_STATE_PRIV, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1,
|
||||
32 - SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT - 1),
|
||||
ttmp12
|
||||
\n s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
\n s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
\n s_setreg_b32 hwreg(HW_REG_STATE_PRIV, 0, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT), ttmp12
|
||||
\n s_lshr_b32 ttmp12, ttmp12, (SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1)
|
||||
\n s_setreg_b32 hwreg(HW_REG_STATE_PRIV, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT + 1, 32 - SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_SHIFT - 1), ttmp12
|
||||
\n
|
||||
\n // Return to original (possibly modified) PC.
|
||||
\n s_rfe_b64[ttmp0, ttmp1]
|
||||
\n s_rfe_b64 [ttmp0, ttmp1]
|
||||
\n
|
||||
\n.parked :
|
||||
\n s_trap 0x2
|
||||
\n s_branch.parked
|
||||
\n.parked:
|
||||
\n s_trap 0x2
|
||||
\n s_branch .parked
|
||||
\n
|
||||
\n // Add s_code_end padding so instruction prefetch always has something to read.
|
||||
\n.rept(256 - ((.- trap_entry) % 64)) / 4
|
||||
\n s_code_end
|
||||
\n// Add s_code_end padding so instruction prefetch always has something to read.
|
||||
\n.rept (256 - ((. - trap_entry) % 64)) / 4
|
||||
\n s_code_end
|
||||
\n.endr
|
||||
\n.endif \n);
|
||||
\n.endif
|
||||
\n);
|
||||
} // namespace amd::pal
|
||||
// clang-format on
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача