Add GFX12 trap handler
Given the differences between previous architectures and gfx12, this patch implements the gfx12 2nd level trap handler in a separate source file, and adjusts the build system. Change-Id: I65192ffbbcd66a4f78d2d0c3fb1739a92cac95d4 Signed-off-by: Lancelot SIX <lancelot.six@amd.com> Signed-off-by: Chris Freehill <cfreehil@amd.com>
Этот коммит содержится в:
коммит произвёл
Chris Freehill
родитель
24463635f9
Коммит
855015377c
@@ -293,8 +293,7 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
|
||||
{kCodeTrapHandlerV2_1010, sizeof(kCodeTrapHandlerV2_1010), 2, 4},// gfx1010
|
||||
{kCodeTrapHandlerV2_10, sizeof(kCodeTrapHandlerV2_10), 2, 4}, // gfx10
|
||||
{kCodeTrapHandlerV2_11, sizeof(kCodeTrapHandlerV2_11), 2, 4}, // gfx11
|
||||
// GFX12_TODO: Using one for GFX11 for now.
|
||||
{kCodeTrapHandlerV2_11, sizeof(kCodeTrapHandlerV2_11), 2, 4}, // gfx12
|
||||
{kCodeTrapHandlerV2_12, sizeof(kCodeTrapHandlerV2_12), 2, 4}, // gfx12
|
||||
}},
|
||||
{"CopyAligned",
|
||||
{
|
||||
|
||||
@@ -46,8 +46,9 @@ cmake_minimum_required ( VERSION 3.7 )
|
||||
find_package(Clang REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )
|
||||
find_package(LLVM REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )
|
||||
|
||||
set (TARGET_DEVS "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100")
|
||||
set (POSTFIX "9;940;941;942;1010;10;11")
|
||||
set (TARGET_DEVS "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1200")
|
||||
set (POSTFIX "9;940;941;942;1010;10;11;12")
|
||||
set (SOURCE_SUFFIX ";;;;;;;_gfx12")
|
||||
|
||||
if(${CMAKE_VERBOSE_MAKEFILE})
|
||||
get_property(clang_path TARGET clang PROPERTY LOCATION)
|
||||
@@ -92,11 +93,11 @@ endfunction(gen_kernel_bc)
|
||||
##==========================================
|
||||
## Find device code object name and forward to custom command
|
||||
##==========================================
|
||||
function(build_kernel TRAP_HANDLER_NAME TARGET_ID POSTFIX)
|
||||
function(build_kernel TRAP_HANDLER_NAME TARGET_ID POSTFIX SOURCE_SUFFIX)
|
||||
|
||||
## generate trap handler object code files
|
||||
set (CODE_OBJECT_FILE "${TRAP_HANDLER_NAME}_${POSTFIX}")
|
||||
set (TRAP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/trap_handler.s")
|
||||
set (TRAP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/trap_handler${SOURCE_SUFFIX}.s")
|
||||
gen_kernel_bc(${TARGET_ID} ${TRAP_FILE} ${CODE_OBJECT_FILE})
|
||||
|
||||
## Build a list of code object file names
|
||||
@@ -117,10 +118,11 @@ function(build_kernel_for_devices TRAP_HANDLER_NAME)
|
||||
foreach(ind RANGE ${dev_count})
|
||||
list(GET TARGET_DEVS ${ind} dev)
|
||||
list(GET POSTFIX ${ind} post)
|
||||
list(GET SOURCE_SUFFIX ${ind} suffix)
|
||||
if(${CMAKE_VERBOSE_MAKEFILE})
|
||||
message("\n Generating: ${dev} ...")
|
||||
endif()
|
||||
build_kernel(${TRAP_HANDLER_NAME} ${dev} ${post})
|
||||
build_kernel(${TRAP_HANDLER_NAME} ${dev} ${post} "${suffix}")
|
||||
endforeach(ind)
|
||||
|
||||
set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)
|
||||
|
||||
@@ -0,0 +1,228 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/// Trap Handler V2 source
|
||||
.set DOORBELL_ID_MASK , ((1 << DOORBELL_ID_SIZE) - 1)
|
||||
.set DOORBELL_ID_SIZE , 10
|
||||
.set EC_QUEUE_WAVE_ABORT_M0 , (1 << (DOORBELL_ID_SIZE + 0))
|
||||
.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 5))
|
||||
.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0 , (1 << (DOORBELL_ID_SIZE + 3))
|
||||
.set EC_QUEUE_WAVE_MATH_ERROR_M0 , (1 << (DOORBELL_ID_SIZE + 2))
|
||||
.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 4))
|
||||
.set EC_QUEUE_WAVE_TRAP_M0 , (1 << (DOORBELL_ID_SIZE + 1))
|
||||
.set SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT , 4
|
||||
.set SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT , 7
|
||||
.set SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT , 6
|
||||
.set SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT , 8
|
||||
.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT , 0
|
||||
.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE , 6
|
||||
.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_SHIFT , 0
|
||||
.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_SIZE , 6
|
||||
.set SQ_WAVE_PC_HI_ADDRESS_MASK , 0xFFFF
|
||||
.set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
|
||||
.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT , 28
|
||||
.set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 4
|
||||
.set SQ_WAVE_STATE_PRIV_HALT_BFE , (SQ_WAVE_STATE_PRIV_HALT_SHIFT | (1 << 16))
|
||||
.set SQ_WAVE_STATE_PRIV_HALT_SHIFT , 14
|
||||
.set TRAP_ID_ABORT , 2
|
||||
.set TRAP_ID_DEBUGTRAP , 3
|
||||
.set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
|
||||
.set TTMP6_SAVED_STATUS_HALT_SHIFT , 29
|
||||
.set TTMP6_SAVED_TRAP_ID_BFE , (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
|
||||
.set TTMP6_SAVED_TRAP_ID_MASK , (((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
|
||||
.set TTMP6_SAVED_TRAP_ID_SHIFT , 25
|
||||
.set TTMP6_SAVED_TRAP_ID_SIZE , 4
|
||||
.set TTMP6_WAVE_STOPPED_SHIFT , 30
|
||||
.set TTMP8_DEBUG_FLAG_SHIFT , 31
|
||||
.set TTMP11_DEBUG_ENABLED_SHIFT , 23
|
||||
.set TTMP_PC_HI_SHIFT , 7
|
||||
|
||||
// ABI between first and second level trap handler:
|
||||
// { ttmp1, ttmp0 } = TrapID[3:0], zeros, PC[47:0]
|
||||
// ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], 0[5:0]
|
||||
// ttmp12 = SQ_WAVE_STATE_PRIV
|
||||
// ttmp14 = TMA[31:0]
|
||||
// ttmp15 = TMA[63:32]
|
||||
|
||||
trap_entry:
|
||||
// Branch if not a trap (an exception instead).
|
||||
s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
|
||||
s_getreg_b32 ttmp3, hwreg(HW_REG_EXCP_FLAG_PRIV)
|
||||
s_bitcmp1_b32 ttmp3, SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT
|
||||
|
||||
// If caused by s_trap then advance PC.
|
||||
s_cbranch_scc1 .not_s_trap
|
||||
s_add_u32 ttmp0, ttmp0, 0x4
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
.not_s_trap:
|
||||
// If llvm.debugtrap and debugger is not attached.
|
||||
s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
|
||||
s_bitcmp0_b32 ttmp11, TTMP11_DEBUG_ENABLED_SHIFT
|
||||
s_cbranch_scc0 .no_skip_debugtrap
|
||||
|
||||
// Ignore llvm.debugtrap.
|
||||
s_branch .exit_trap
|
||||
|
||||
.no_skip_debugtrap:
|
||||
// Save trap id and halt status in ttmp6.
|
||||
s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
|
||||
s_min_u32 ttmp2, ttmp2, 0xF
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
|
||||
s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATE_PRIV_HALT_BFE
|
||||
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
|
||||
s_or_b32 ttmp6, ttmp6, ttmp2
|
||||
|
||||
// Save trap status.
|
||||
s_mov_b32 ttmp2, ttmp3
|
||||
|
||||
// Fetch doorbell id for our queue.
|
||||
s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
|
||||
s_wait_kmcnt 0
|
||||
s_and_b32 ttmp3, ttmp3, DOORBELL_ID_MASK
|
||||
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT
|
||||
s_cbranch_scc0 .not_memory_violation
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
|
||||
|
||||
// Aperture violation requires XNACK_ERROR == 0.
|
||||
s_branch .not_aperture_violation
|
||||
|
||||
.not_memory_violation:
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT
|
||||
s_cbranch_scc0 .not_aperture_violation
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
|
||||
|
||||
.not_aperture_violation:
|
||||
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
|
||||
s_cbranch_scc0 .not_illegal_instruction
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
|
||||
|
||||
.not_illegal_instruction:
|
||||
s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_USER, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE)
|
||||
s_cbranch_scc0 .not_math_exception
|
||||
s_getreg_b32 ttmp10, hwreg(HW_REG_TRAP_CTRL, SQ_WAVE_TRAP_CTRL_MATH_EXCP_SHIFT, SQ_WAVE_TRAP_CTRL_MATH_EXCP_SIZE)
|
||||
s_and_b32 ttmp2, ttmp2, ttmp10
|
||||
|
||||
s_cbranch_scc0 .not_math_exception
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
|
||||
|
||||
.not_math_exception:
|
||||
s_bfe_u32 ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
|
||||
s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
|
||||
s_cbranch_scc0 .not_abort_trap
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
|
||||
|
||||
.not_abort_trap:
|
||||
// If no other exception was flagged then report a generic error.
|
||||
s_andn2_b32 ttmp2, ttmp3, DOORBELL_ID_MASK
|
||||
s_cbranch_scc1 .send_interrupt
|
||||
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
|
||||
|
||||
.send_interrupt:
|
||||
// m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
|
||||
s_mov_b32 ttmp2, m0
|
||||
s_mov_b32 m0, ttmp3
|
||||
s_nop 0x0 // Manually inserted wait states
|
||||
s_sendmsg sendmsg(MSG_INTERRUPT)
|
||||
// Wait for the message to go out.
|
||||
s_wait_kmcnt 0
|
||||
s_mov_b32 m0, ttmp2
|
||||
|
||||
// Parking the wave requires saving the original pc in the preserved ttmps.
|
||||
// Register layout before parking the wave:
|
||||
//
|
||||
// ttmp10: ?[31:0]
|
||||
// ttmp11: 1st_level_ttmp11[31:23] 0[15:0] 1st_level_ttmp11[6:0]
|
||||
//
|
||||
// After parking the wave:
|
||||
//
|
||||
// ttmp10: pc_lo[31:0]
|
||||
// ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
|
||||
//
|
||||
// Save the PC
|
||||
s_mov_b32 ttmp10, ttmp0
|
||||
s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
|
||||
s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
|
||||
s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
|
||||
s_or_b32 ttmp11, ttmp11, ttmp1
|
||||
|
||||
// Park the wave
|
||||
s_getpc_b64 [ttmp0, ttmp1]
|
||||
s_add_u32 ttmp0, ttmp0, .parked - .
|
||||
s_addc_u32 ttmp1, ttmp1, 0x0
|
||||
|
||||
.halt_wave:
|
||||
// Halt the wavefront upon restoring STATUS below.
|
||||
s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
|
||||
s_bitset1_b32 ttmp12, SQ_WAVE_STATE_PRIV_HALT_SHIFT
|
||||
|
||||
// Initialize TTMP registers
|
||||
s_bitcmp1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
|
||||
s_cbranch_scc1 .ttmps_initialized
|
||||
s_mov_b32 ttmp4, 0
|
||||
s_mov_b32 ttmp5, 0
|
||||
s_bitset1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
|
||||
.ttmps_initialized:
|
||||
|
||||
.exit_trap:
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
|
||||
s_setreg_b32 hwreg(HW_REG_STATE_PRIV), ttmp12
|
||||
|
||||
// Return to original (possibly modified) PC.
|
||||
s_rfe_b64 [ttmp0, ttmp1]
|
||||
|
||||
.parked:
|
||||
s_trap 0x2
|
||||
s_branch .parked
|
||||
|
||||
// Add s_code_end padding so instruction prefetch always has something to read.
|
||||
.rept (256 - ((. - trap_entry) % 64)) / 4
|
||||
s_code_end
|
||||
.endr
|
||||
Ссылка в новой задаче
Block a user