kfdtest: Update shaders to compile on gfx940
gfx940 changed the semantics of the glc and slc coherency options
on vector stores and loads. This means that shaders that use
those bits no longer compile on gfx940.
Add precompilation if statements to those shaders to use the
new coherency bits.
Also add gfx940 to ASMTest so that compilation is tested.
Note: One of the tests enabled by this patch on gfx940,
KFDEvictTest.QueueTest, does not pass on gfx940 emulators.
Signed-off-by: David Francis <David.Francis@amd.com>
Change-Id: I942f9d2536e9eb5510c4d5af30df6ff1a95c8cf7
[ROCm/ROCR-Runtime commit: 30da9a3cf9]
Этот коммит содержится в:
коммит произвёл
Graham Sider
родитель
543fe60c96
Коммит
78f489fb95
@@ -43,6 +43,7 @@ static const std::vector<uint32_t> TargetList = {
|
||||
0x090009,
|
||||
0x09000a,
|
||||
0x09000c,
|
||||
0x090400,
|
||||
0x0a0100,
|
||||
0x0a0101,
|
||||
0x0a0102,
|
||||
|
||||
@@ -95,9 +95,15 @@ const char *CopyDwordIsa = R"(
|
||||
v_mov_b32 v1, s1
|
||||
v_mov_b32 v2, s2
|
||||
v_mov_b32 v3, s3
|
||||
flat_load_dword v4, v[0:1] glc slc
|
||||
s_waitcnt 0
|
||||
flat_store_dword v[2:3], v4 glc slc
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4 && .amdgcn.gfx_generation_stepping == 0)
|
||||
flat_load_dword v4, v[0:1] nt sc1 sc0
|
||||
s_waitcnt 0
|
||||
flat_store_dword v[2:3], v4 nt sc1 sc0
|
||||
.else
|
||||
flat_load_dword v4, v[0:1] glc slc
|
||||
s_waitcnt 0
|
||||
flat_store_dword v[2:3], v4 glc slc
|
||||
.endif
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
@@ -112,7 +118,10 @@ const char *AtomicIncIsa = R"(
|
||||
.text
|
||||
v_mov_b32 v0, s0
|
||||
v_mov_b32 v1, s1
|
||||
.if (.amdgcn.gfx_generation_number >= 8)
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4 && .amdgcn.gfx_generation_stepping == 0)
|
||||
v_mov_b32 v2, 1
|
||||
flat_atomic_add v3, v[0:1], v2 nt sc1 sc0
|
||||
.elseif (.amdgcn.gfx_generation_number >= 8)
|
||||
v_mov_b32 v2, 1
|
||||
flat_atomic_add v3, v[0:1], v2 glc slc
|
||||
.else
|
||||
@@ -153,9 +162,15 @@ const char *ScratchCopyDwordIsa = R"(
|
||||
s_mov_b32 flat_scratch_hi, 0
|
||||
.endif
|
||||
// Copy a dword between the passed addresses
|
||||
flat_load_dword v4, v[0:1] slc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
flat_store_dword v[2:3], v4 slc
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4 && .amdgcn.gfx_generation_stepping == 0)
|
||||
flat_load_dword v4, v[0:1] nt sc1 sc0
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
flat_store_dword v[2:3], v4 nt sc1 sc0
|
||||
.else
|
||||
flat_load_dword v4, v[0:1] slc
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0)
|
||||
flat_store_dword v[2:3], v4 slc
|
||||
.endif
|
||||
s_endpgm
|
||||
)";
|
||||
|
||||
@@ -179,6 +194,8 @@ const char *PollMemoryIsa = R"(
|
||||
s_cbranch_scc0 LOOP
|
||||
.if (.amdgcn.gfx_generation_number >= 10)
|
||||
flat_store_dword v[0:1], v2 slc
|
||||
.elseif (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4 && .amdgcn.gfx_generation_stepping == 0)
|
||||
flat_store_dword v[0:1], v2 nt sc1 sc0
|
||||
.else
|
||||
s_store_dword s18, s[2:3], 0x0 glc
|
||||
.endif
|
||||
@@ -524,7 +541,11 @@ const char *ReadMemoryIsa = SHADER_MACROS R"(
|
||||
V_ADD_CO_CI_U32 v3, v3, 0 // v[2:3] = s[0:1] + v0 * 8
|
||||
|
||||
// Load 64bit local buffer address stored at v[2:3] to v[6:7]
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4 && .amdgcn.gfx_generation_stepping == 0)
|
||||
flat_load_dwordx2 v[6:7], v[2:3] nt sc1 sc0
|
||||
.else
|
||||
flat_load_dwordx2 v[6:7], v[2:3] slc
|
||||
.endif
|
||||
s_waitcnt vmcnt(0) & lgkmcnt(0) // wait for memory reads to finish
|
||||
v_mov_b32 v8, 0x5678
|
||||
s_movk_i32 s8, 0x5678
|
||||
@@ -542,7 +563,11 @@ const char *ReadMemoryIsa = SHADER_MACROS R"(
|
||||
v_mov_b32 v12, v6
|
||||
v_mov_b32 v13, v7
|
||||
L_LOOP_READ:
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc
|
||||
.if (.amdgcn.gfx_generation_number == 9 && .amdgcn.gfx_generation_minor == 4 && .amdgcn.gfx_generation_stepping == 0)
|
||||
flat_load_dwordx2 v[14:15], v[12:13] nt sc1 sc0
|
||||
.else
|
||||
flat_load_dwordx2 v[14:15], v[12:13] slc
|
||||
.endif
|
||||
V_ADD_CO_U32 v9, v9, v10
|
||||
V_ADD_CO_U32 v12, v12, v10
|
||||
V_ADD_CO_CI_U32 v13, v13, 0
|
||||
|
||||
Ссылка в новой задаче
Block a user