Given the differences between previous architectures and gfx12, this
patch implements the gfx12 2nd level trap handler in a separate source
file, and adjusts the build system.

Change-Id: I65192ffbbcd66a4f78d2d0c3fb1739a92cac95d4
Signed-off-by: Lancelot SIX <lancelot.six@amd.com>
Signed-off-by: Chris Freehill <cfreehil@amd.com>
Этот коммит содержится в:
Lancelot SIX
2023-12-08 17:48:10 +00:00
коммит произвёл Chris Freehill
родитель 24463635f9
Коммит 855015377c
3 изменённых файлов: 236 добавлений и 7 удалений
+1 -2
Просмотреть файл
@@ -293,8 +293,7 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
{kCodeTrapHandlerV2_1010, sizeof(kCodeTrapHandlerV2_1010), 2, 4},// gfx1010
{kCodeTrapHandlerV2_10, sizeof(kCodeTrapHandlerV2_10), 2, 4}, // gfx10
{kCodeTrapHandlerV2_11, sizeof(kCodeTrapHandlerV2_11), 2, 4}, // gfx11
// GFX12_TODO: Using one for GFX11 for now.
{kCodeTrapHandlerV2_11, sizeof(kCodeTrapHandlerV2_11), 2, 4}, // gfx12
{kCodeTrapHandlerV2_12, sizeof(kCodeTrapHandlerV2_12), 2, 4}, // gfx12
}},
{"CopyAligned",
{
+7 -5
Просмотреть файл
@@ -46,8 +46,9 @@ cmake_minimum_required ( VERSION 3.7 )
find_package(Clang REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )
find_package(LLVM REQUIRED HINTS ${CMAKE_PREFIX_PATH}/llvm PATHS /opt/rocm/llvm )
set (TARGET_DEVS "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100")
set (POSTFIX "9;940;941;942;1010;10;11")
set (TARGET_DEVS "gfx900;gfx940;gfx941;gfx942;gfx1010;gfx1030;gfx1100;gfx1200")
set (POSTFIX "9;940;941;942;1010;10;11;12")
set (SOURCE_SUFFIX ";;;;;;;_gfx12")
if(${CMAKE_VERBOSE_MAKEFILE})
get_property(clang_path TARGET clang PROPERTY LOCATION)
@@ -92,11 +93,11 @@ endfunction(gen_kernel_bc)
##==========================================
## Find device code object name and forward to custom command
##==========================================
function(build_kernel TRAP_HANDLER_NAME TARGET_ID POSTFIX)
function(build_kernel TRAP_HANDLER_NAME TARGET_ID POSTFIX SOURCE_SUFFIX)
## generate trap handler object code files
set (CODE_OBJECT_FILE "${TRAP_HANDLER_NAME}_${POSTFIX}")
set (TRAP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/trap_handler.s")
set (TRAP_FILE "${CMAKE_CURRENT_SOURCE_DIR}/trap_handler${SOURCE_SUFFIX}.s")
gen_kernel_bc(${TARGET_ID} ${TRAP_FILE} ${CODE_OBJECT_FILE})
## Build a list of code object file names
@@ -117,10 +118,11 @@ function(build_kernel_for_devices TRAP_HANDLER_NAME)
foreach(ind RANGE ${dev_count})
list(GET TARGET_DEVS ${ind} dev)
list(GET POSTFIX ${ind} post)
list(GET SOURCE_SUFFIX ${ind} suffix)
if(${CMAKE_VERBOSE_MAKEFILE})
message("\n Generating: ${dev} ...")
endif()
build_kernel(${TRAP_HANDLER_NAME} ${dev} ${post})
build_kernel(${TRAP_HANDLER_NAME} ${dev} ${post} "${suffix}")
endforeach(ind)
set(HSACO_TARG_LIST ${HSACO_TARG_LIST} PARENT_SCOPE)
+228
Просмотреть файл
@@ -0,0 +1,228 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
/// Trap Handler V2 source
.set DOORBELL_ID_MASK , ((1 << DOORBELL_ID_SIZE) - 1)
.set DOORBELL_ID_SIZE , 10
.set EC_QUEUE_WAVE_ABORT_M0 , (1 << (DOORBELL_ID_SIZE + 0))
.set EC_QUEUE_WAVE_APERTURE_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 5))
.set EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0 , (1 << (DOORBELL_ID_SIZE + 3))
.set EC_QUEUE_WAVE_MATH_ERROR_M0 , (1 << (DOORBELL_ID_SIZE + 2))
.set EC_QUEUE_WAVE_MEMORY_VIOLATION_M0 , (1 << (DOORBELL_ID_SIZE + 4))
.set EC_QUEUE_WAVE_TRAP_M0 , (1 << (DOORBELL_ID_SIZE + 1))
.set SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT , 4
.set SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT , 7
.set SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT , 6
.set SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT , 8
.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT , 0
.set SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE , 6
.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_SHIFT , 0
.set SQ_WAVE_TRAP_CTRL_MATH_EXCP_SIZE , 6
.set SQ_WAVE_PC_HI_ADDRESS_MASK , 0xFFFF
.set SQ_WAVE_PC_HI_TRAP_ID_BFE , (SQ_WAVE_PC_HI_TRAP_ID_SHIFT | (SQ_WAVE_PC_HI_TRAP_ID_SIZE << 16))
.set SQ_WAVE_PC_HI_TRAP_ID_SHIFT , 28
.set SQ_WAVE_PC_HI_TRAP_ID_SIZE , 4
.set SQ_WAVE_STATE_PRIV_HALT_BFE , (SQ_WAVE_STATE_PRIV_HALT_SHIFT | (1 << 16))
.set SQ_WAVE_STATE_PRIV_HALT_SHIFT , 14
.set TRAP_ID_ABORT , 2
.set TRAP_ID_DEBUGTRAP , 3
.set TTMP6_SAVED_STATUS_HALT_MASK , (1 << TTMP6_SAVED_STATUS_HALT_SHIFT)
.set TTMP6_SAVED_STATUS_HALT_SHIFT , 29
.set TTMP6_SAVED_TRAP_ID_BFE , (TTMP6_SAVED_TRAP_ID_SHIFT | (TTMP6_SAVED_TRAP_ID_SIZE << 16))
.set TTMP6_SAVED_TRAP_ID_MASK , (((1 << TTMP6_SAVED_TRAP_ID_SIZE) - 1) << TTMP6_SAVED_TRAP_ID_SHIFT)
.set TTMP6_SAVED_TRAP_ID_SHIFT , 25
.set TTMP6_SAVED_TRAP_ID_SIZE , 4
.set TTMP6_WAVE_STOPPED_SHIFT , 30
.set TTMP8_DEBUG_FLAG_SHIFT , 31
.set TTMP11_DEBUG_ENABLED_SHIFT , 23
.set TTMP_PC_HI_SHIFT , 7
// ABI between first and second level trap handler:
// { ttmp1, ttmp0 } = TrapID[3:0], zeros, PC[47:0]
// ttmp11 = 0[7:0], DebugEnabled[0], 0[15:0], NoScratch[0], 0[5:0]
// ttmp12 = SQ_WAVE_STATE_PRIV
// ttmp14 = TMA[31:0]
// ttmp15 = TMA[63:32]
trap_entry:
// Branch if not a trap (an exception instead).
s_bfe_u32 ttmp2, ttmp1, SQ_WAVE_PC_HI_TRAP_ID_BFE
s_cbranch_scc0 .no_skip_debugtrap
s_getreg_b32 ttmp3, hwreg(HW_REG_EXCP_FLAG_PRIV)
s_bitcmp1_b32 ttmp3, SQ_WAVE_EXCP_FLAG_PRIV_HT_SHIFT
// If caused by s_trap then advance PC.
s_cbranch_scc1 .not_s_trap
s_add_u32 ttmp0, ttmp0, 0x4
s_addc_u32 ttmp1, ttmp1, 0x0
.not_s_trap:
// If llvm.debugtrap and debugger is not attached.
s_cmp_eq_u32 ttmp2, TRAP_ID_DEBUGTRAP
s_cbranch_scc0 .no_skip_debugtrap
s_bitcmp0_b32 ttmp11, TTMP11_DEBUG_ENABLED_SHIFT
s_cbranch_scc0 .no_skip_debugtrap
// Ignore llvm.debugtrap.
s_branch .exit_trap
.no_skip_debugtrap:
// Save trap id and halt status in ttmp6.
s_andn2_b32 ttmp6, ttmp6, (TTMP6_SAVED_TRAP_ID_MASK | TTMP6_SAVED_STATUS_HALT_MASK)
s_min_u32 ttmp2, ttmp2, 0xF
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_TRAP_ID_SHIFT
s_or_b32 ttmp6, ttmp6, ttmp2
s_bfe_u32 ttmp2, ttmp12, SQ_WAVE_STATE_PRIV_HALT_BFE
s_lshl_b32 ttmp2, ttmp2, TTMP6_SAVED_STATUS_HALT_SHIFT
s_or_b32 ttmp6, ttmp6, ttmp2
// Save trap status.
s_mov_b32 ttmp2, ttmp3
// Fetch doorbell id for our queue.
s_sendmsg_rtn_b32 ttmp3, sendmsg(MSG_RTN_GET_DOORBELL)
s_wait_kmcnt 0
s_and_b32 ttmp3, ttmp3, DOORBELL_ID_MASK
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_XNACK_ERROR_SHIFT
s_cbranch_scc0 .not_memory_violation
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MEMORY_VIOLATION_M0
// Aperture violation requires XNACK_ERROR == 0.
s_branch .not_aperture_violation
.not_memory_violation:
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_MEMVIOL_SHIFT
s_cbranch_scc0 .not_aperture_violation
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_APERTURE_VIOLATION_M0
.not_aperture_violation:
s_bitcmp1_b32 ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
s_cbranch_scc0 .not_illegal_instruction
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ILLEGAL_INSTRUCTION_M0
.not_illegal_instruction:
s_getreg_b32 ttmp2, hwreg(HW_REG_EXCP_FLAG_USER, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SHIFT, SQ_WAVE_EXCP_FLAG_USER_MATH_EXCP_SIZE)
s_cbranch_scc0 .not_math_exception
s_getreg_b32 ttmp10, hwreg(HW_REG_TRAP_CTRL, SQ_WAVE_TRAP_CTRL_MATH_EXCP_SHIFT, SQ_WAVE_TRAP_CTRL_MATH_EXCP_SIZE)
s_and_b32 ttmp2, ttmp2, ttmp10
s_cbranch_scc0 .not_math_exception
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_MATH_ERROR_M0
.not_math_exception:
s_bfe_u32 ttmp2, ttmp6, TTMP6_SAVED_TRAP_ID_BFE
s_cmp_eq_u32 ttmp2, TRAP_ID_ABORT
s_cbranch_scc0 .not_abort_trap
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_ABORT_M0
.not_abort_trap:
// If no other exception was flagged then report a generic error.
s_andn2_b32 ttmp2, ttmp3, DOORBELL_ID_MASK
s_cbranch_scc1 .send_interrupt
s_or_b32 ttmp3, ttmp3, EC_QUEUE_WAVE_TRAP_M0
.send_interrupt:
// m0 = interrupt data = (exception_code << DOORBELL_ID_SIZE) | doorbell_id
s_mov_b32 ttmp2, m0
s_mov_b32 m0, ttmp3
s_nop 0x0 // Manually inserted wait states
s_sendmsg sendmsg(MSG_INTERRUPT)
// Wait for the message to go out.
s_wait_kmcnt 0
s_mov_b32 m0, ttmp2
// Parking the wave requires saving the original pc in the preserved ttmps.
// Register layout before parking the wave:
//
// ttmp10: ?[31:0]
// ttmp11: 1st_level_ttmp11[31:23] 0[15:0] 1st_level_ttmp11[6:0]
//
// After parking the wave:
//
// ttmp10: pc_lo[31:0]
// ttmp11: 1st_level_ttmp11[31:23] pc_hi[15:0] 1st_level_ttmp11[6:0]
//
// Save the PC
s_mov_b32 ttmp10, ttmp0
s_and_b32 ttmp1, ttmp1, SQ_WAVE_PC_HI_ADDRESS_MASK
s_lshl_b32 ttmp1, ttmp1, TTMP_PC_HI_SHIFT
s_andn2_b32 ttmp11, ttmp11, (SQ_WAVE_PC_HI_ADDRESS_MASK << TTMP_PC_HI_SHIFT)
s_or_b32 ttmp11, ttmp11, ttmp1
// Park the wave
s_getpc_b64 [ttmp0, ttmp1]
s_add_u32 ttmp0, ttmp0, .parked - .
s_addc_u32 ttmp1, ttmp1, 0x0
.halt_wave:
// Halt the wavefront upon restoring STATUS below.
s_bitset1_b32 ttmp6, TTMP6_WAVE_STOPPED_SHIFT
s_bitset1_b32 ttmp12, SQ_WAVE_STATE_PRIV_HALT_SHIFT
// Initialize TTMP registers
s_bitcmp1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
s_cbranch_scc1 .ttmps_initialized
s_mov_b32 ttmp4, 0
s_mov_b32 ttmp5, 0
s_bitset1_b32 ttmp8, TTMP8_DEBUG_FLAG_SHIFT
.ttmps_initialized:
.exit_trap:
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATE_PRIV), ttmp12
// Return to original (possibly modified) PC.
s_rfe_b64 [ttmp0, ttmp1]
.parked:
s_trap 0x2
s_branch .parked
// Add s_code_end padding so instruction prefetch always has something to read.
.rept (256 - ((. - trap_entry) % 64)) / 4
s_code_end
.endr