kfdtest: consolidate LoopIsa
1, Initialize the registers before using them is the best practice. Though the use case here doesn't care whether the registers are initialized or not, some emulators complain the "read_before_write" behavior. Initialize the registers used to silence these complaints. 2, Update s_wait stuff for gfx12. Change-Id: I462b2b0b5017dd2876a5954169d3b6b2f1c2a75b Signed-off-by: Lang Yu <lang.yu@amd.com>
이 커밋은 다음에 포함됨:
@@ -691,6 +691,14 @@ const char *LoopIsa =
|
||||
R"(
|
||||
s_movk_i32 s0, 0x0008
|
||||
s_movk_i32 s1, 0x00ff
|
||||
s_mov_b32 s4, 0
|
||||
s_mov_b32 s5, 0
|
||||
s_mov_b32 s6, 0
|
||||
s_mov_b32 s7, 0
|
||||
s_mov_b32 s12, 0
|
||||
s_mov_b32 s13, 0
|
||||
s_mov_b32 s14, 0
|
||||
s_mov_b32 s15, 0
|
||||
v_mov_b32 v0, 0
|
||||
v_mov_b32 v1, 0
|
||||
v_mov_b32 v2, 0
|
||||
@@ -717,7 +725,12 @@ const char *LoopIsa =
|
||||
s_cbranch_scc1 END_OF_PGM
|
||||
v_add_f32 v0, 2.0, v0
|
||||
v_cvt_f32_i32 v17, s1
|
||||
s_waitcnt lgkmcnt(0)
|
||||
.if (.amdgcn.gfx_generation_number >= 12)
|
||||
s_wait_dscnt 0
|
||||
s_wait_kmcnt 0
|
||||
.else
|
||||
s_waitcnt lgkmcnt(0)
|
||||
.endif
|
||||
v_add_f32 v18, s8, v17
|
||||
v_add_f32 v19, s9, v17
|
||||
v_add_f32 v20, s10, v17
|
||||
|
||||
새 이슈에서 참조
사용자 차단