Add binary shaders for gfx10
Change-Id: Iaf586a15a2f2aebc266da5148aa8637b092c1002
[ROCm/ROCR-Runtime commit: d1c5a079cd]
Этот коммит содержится в:
коммит произвёл
Chris Freehill
родитель
e44fecc07c
Коммит
e729948e41
@@ -127,6 +127,13 @@ static const unsigned int kCodeTrapHandler9[] = {
|
||||
.set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 26
|
||||
.set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15
|
||||
.set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000
|
||||
.elseif .amdgcn.gfx_generation_number == 10
|
||||
.set TTMP11_SAVE_REPLAY_W64H_SHIFT , 31
|
||||
.set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 24
|
||||
.set SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT , 25
|
||||
.set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15
|
||||
.set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x3F8000
|
||||
.set SQ_WAVE_IB_STS_REPLAY_W64H_MASK , 0x2000000
|
||||
.else
|
||||
.error "unsupported target"
|
||||
.endif
|
||||
@@ -217,6 +224,14 @@ static const unsigned int kCodeTrapHandler9[] = {
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
.endif
|
||||
.if .amdgcn.gfx_generation_number == 10
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
|
||||
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
|
||||
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
|
||||
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
|
||||
s_or_b32 ttmp2, ttmp2, ttmp3
|
||||
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
|
||||
.endif
|
||||
|
||||
// Restore SQ_WAVE_STATUS.
|
||||
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
|
||||
@@ -296,6 +311,77 @@ static const unsigned int kCodeFill8[] = {
|
||||
0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyAligned10[] = {
|
||||
0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xF4080300, 0xFA000020,
|
||||
0xF4080400, 0xFA000030, 0xF4080500, 0xFA000040, 0xF4000600, 0xFA000050,
|
||||
0xBF8CC07F, 0x8F028602, 0xD70F6A00, 0x00020002, 0x7E060205, 0xD70F6A02,
|
||||
0x00020004, 0xD5286A03, 0x01A90103, 0x7E0A0207, 0xD70F6A04, 0x00020006,
|
||||
0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001102, 0xBF86000F, 0x87FE6A7E,
|
||||
0xDC200000, 0x017D0002, 0xBF8C3F70, 0xD70F6A02, 0x00020418, 0xD5286A03,
|
||||
0x01A90103, 0xDC600000, 0x007D0104, 0xD70F6A04, 0x00020818, 0xD5286A05,
|
||||
0x01A90105, 0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209,
|
||||
0xD70F6A02, 0x00020208, 0xD5286A03, 0x01A90103, 0x7E0A020B, 0xD70F6A04,
|
||||
0x0002020A, 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001902, 0xBF86000E,
|
||||
0xDC380000, 0x087D0002, 0xD70F6A02, 0x00020419, 0xD5286A03, 0x01A90103,
|
||||
0xBF8C3F70, 0xDC780000, 0x007D0804, 0xD70F6A04, 0x00020819, 0xD5286A05,
|
||||
0x01A90105, 0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD70F6A02,
|
||||
0x0002020C, 0xD5286A03, 0x01A90103, 0x7E0A020F, 0xD70F6A04, 0x0002020E,
|
||||
0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00002102, 0xBF86000F, 0x87FE6A7E,
|
||||
0xDC300000, 0x017D0002, 0xD70F6A02, 0x00020419, 0xD5286A03, 0x01A90103,
|
||||
0xBF8C3F70, 0xDC700000, 0x007D0104, 0xD70F6A04, 0x00020819, 0xD5286A05,
|
||||
0x01A90105, 0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD70F6A02, 0x00020010,
|
||||
0xD5286A03, 0x01A90103, 0x7E0A0213, 0xD70F6A04, 0x00020012, 0xD5286A05,
|
||||
0x01A90105, 0xD4E1006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000,
|
||||
0x017D0002, 0xBF8C3F70, 0xDC600000, 0x007D0104, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyMisaligned10[] = {
|
||||
0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xF4080300, 0xFA000020,
|
||||
0xF4000400, 0xFA000030, 0xBF8CC07F, 0x8F028602, 0xD70F6A00, 0x00020002,
|
||||
0x7E060205, 0xD70F6A02, 0x00020004, 0xD5286A03, 0x01A90103, 0x7E0A0207,
|
||||
0xD70F6A04, 0x00020006, 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001102,
|
||||
0xBF860032, 0xDC200000, 0x067D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
|
||||
0x01A90103, 0xDC200000, 0x077D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
|
||||
0x01A90103, 0xDC200000, 0x087D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
|
||||
0x01A90103, 0xDC200000, 0x097D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
|
||||
0x01A90103, 0xBF8C3F70, 0xDC600000, 0x007D0604, 0xD70F6A04, 0x00020810,
|
||||
0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0704, 0xD70F6A04, 0x00020810,
|
||||
0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0804, 0xD70F6A04, 0x00020810,
|
||||
0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0904, 0xD70F6A04, 0x00020810,
|
||||
0xD5286A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD70F6A02, 0x00020008,
|
||||
0xD5286A03, 0x01A90103, 0x7E0A020B, 0xD70F6A04, 0x0002000A, 0xD5286A05,
|
||||
0x01A90105, 0xD4E1006A, 0x00001902, 0xBF86000F, 0x87FE6A7E, 0xDC200000,
|
||||
0x017D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, 0x01A90103, 0xBF8C3F70,
|
||||
0xDC600000, 0x007D0104, 0xD70F6A04, 0x00020810, 0xD5286A05, 0x01A90105,
|
||||
0xBF82FFEE, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeFill10[] = {
|
||||
0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xBF8CC07F, 0x8F028602,
|
||||
0xD70F6A00, 0x00020002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A,
|
||||
0x8F0C840B, 0x34020084, 0x7E060205, 0xD70F6A02, 0x00020204, 0xD5286A03,
|
||||
0x01A90103, 0xD4E1006A, 0x00000D02, 0xBF860007, 0xDC780000, 0x007D0402,
|
||||
0xD70F6A02, 0x0002040C, 0xD5286A03, 0x01A90103, 0xBF82FFF6, 0x8F0C820B,
|
||||
0x34020082, 0x7E060207, 0xD70F6A02, 0x00020206, 0xD5286A03, 0x01A90103,
|
||||
0xD4E1006A, 0x00001102, 0xBF860008, 0x87FE6A7E, 0xDC700000, 0x007D0402,
|
||||
0xD70F6A02, 0x0002040C, 0xD5286A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeTrapHandler10[] = {
|
||||
0xB96EF803, 0x8770FF6E, 0x10000100, 0xBF06FF70, 0x00000100, 0xBEF003FF,
|
||||
0x20000000, 0xBF85000E, 0x8770FF6E, 0x00000800, 0xBEF003F4, 0xBF85000A,
|
||||
0x93EEFF6D, 0x00080010, 0xBF84002C, 0xBF06826E, 0xBEF003FF, 0x80000000,
|
||||
0xBF850003, 0x806C846C, 0x826D806D, 0xBF820025, 0xBEFE03FF, 0x80000000,
|
||||
0xBF90000A, 0xBF800007, 0xBF0C9F7E, 0xBF84FFFD, 0x876EFF7E, 0x000003FF,
|
||||
0x8F6E836E, 0xF4051BBD, 0xDC000000, 0xBF8CC07F, 0xF4051BB7, 0xFA0000C0,
|
||||
0xBF8CC07F, 0xBEF10380, 0xF6811C37, 0xFA000008, 0xBF8CC07F, 0x88707170,
|
||||
0xBF85000E, 0xF4051C37, 0xFA000010, 0xBF8CC07F, 0x87F07070, 0xBF840009,
|
||||
0xF4011BB7, 0xFA000018, 0xBF8CC07F, 0xF4411BB8, 0xFA000000, 0xBF8CC07F,
|
||||
0xBEFC0380, 0xBF800000, 0xBF900001, 0x8878FF78, 0x00002000, 0x906E8977,
|
||||
0x876FFF6E, 0x003F8000, 0x906E8677, 0x876EFF6E, 0x02000000, 0x886E6F6E,
|
||||
0xB9EEF807, 0x87FE7E7E, 0x87EA6A6A, 0xB9F8F802, 0xBE80226C,
|
||||
};
|
||||
|
||||
} // namespace amd
|
||||
|
||||
#endif // header guard
|
||||
|
||||
@@ -173,6 +173,7 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
|
||||
ASICShader compute_7;
|
||||
ASICShader compute_8;
|
||||
ASICShader compute_9;
|
||||
ASICShader compute_10;
|
||||
};
|
||||
|
||||
std::map<std::string, CompiledShader> compiled_shaders = {
|
||||
@@ -181,24 +182,28 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
|
||||
{NULL, 0, 0, 0},
|
||||
{kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4},
|
||||
{kCodeTrapHandler9, sizeof(kCodeTrapHandler9), 2, 4},
|
||||
{kCodeTrapHandler10, sizeof(kCodeTrapHandler10), 2, 4},
|
||||
}},
|
||||
{"CopyAligned",
|
||||
{
|
||||
{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},
|
||||
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},
|
||||
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},
|
||||
{kCodeCopyAligned10, sizeof(kCodeCopyAligned10), 32, 12},
|
||||
}},
|
||||
{"CopyMisaligned",
|
||||
{
|
||||
{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},
|
||||
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},
|
||||
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},
|
||||
{kCodeCopyMisaligned10, sizeof(kCodeCopyMisaligned10), 23, 10},
|
||||
}},
|
||||
{"Fill",
|
||||
{
|
||||
{kCodeFill7, sizeof(kCodeFill7), 19, 8},
|
||||
{kCodeFill8, sizeof(kCodeFill8), 19, 8},
|
||||
{kCodeFill8, sizeof(kCodeFill8), 19, 8},
|
||||
{kCodeFill10, sizeof(kCodeFill10), 19, 8},
|
||||
}}};
|
||||
|
||||
auto compiled_shader_it = compiled_shaders.find(func_name);
|
||||
@@ -217,6 +222,9 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
|
||||
case 9:
|
||||
asic_shader = &compiled_shader_it->second.compute_9;
|
||||
break;
|
||||
case 10:
|
||||
asic_shader = &compiled_shader_it->second.compute_10;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Precompiled shader unavailable for target");
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user