Change-Id: Iaf586a15a2f2aebc266da5148aa8637b092c1002


[ROCm/ROCR-Runtime commit: d1c5a079cd]
Этот коммит содержится в:
Jay Cornwall
2019-09-12 14:41:14 -04:00
коммит произвёл Chris Freehill
родитель e44fecc07c
Коммит e729948e41
2 изменённых файлов: 94 добавлений и 0 удалений
+86
Просмотреть файл
@@ -127,6 +127,13 @@ static const unsigned int kCodeTrapHandler9[] = {
.set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 26
.set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15
.set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000
.elseif .amdgcn.gfx_generation_number == 10
.set TTMP11_SAVE_REPLAY_W64H_SHIFT , 31
.set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 24
.set SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT , 25
.set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15
.set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x3F8000
.set SQ_WAVE_IB_STS_REPLAY_W64H_MASK , 0x2000000
.else
.error "unsupported target"
.endif
@@ -217,6 +224,14 @@ static const unsigned int kCodeTrapHandler9[] = {
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
.endif
.if .amdgcn.gfx_generation_number == 10
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT)
s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK
s_or_b32 ttmp2, ttmp2, ttmp3
s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
.endif
// Restore SQ_WAVE_STATUS.
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
@@ -296,6 +311,77 @@ static const unsigned int kCodeFill8[] = {
0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
};
static const unsigned int kCodeCopyAligned10[] = {
0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xF4080300, 0xFA000020,
0xF4080400, 0xFA000030, 0xF4080500, 0xFA000040, 0xF4000600, 0xFA000050,
0xBF8CC07F, 0x8F028602, 0xD70F6A00, 0x00020002, 0x7E060205, 0xD70F6A02,
0x00020004, 0xD5286A03, 0x01A90103, 0x7E0A0207, 0xD70F6A04, 0x00020006,
0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001102, 0xBF86000F, 0x87FE6A7E,
0xDC200000, 0x017D0002, 0xBF8C3F70, 0xD70F6A02, 0x00020418, 0xD5286A03,
0x01A90103, 0xDC600000, 0x007D0104, 0xD70F6A04, 0x00020818, 0xD5286A05,
0x01A90105, 0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209,
0xD70F6A02, 0x00020208, 0xD5286A03, 0x01A90103, 0x7E0A020B, 0xD70F6A04,
0x0002020A, 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001902, 0xBF86000E,
0xDC380000, 0x087D0002, 0xD70F6A02, 0x00020419, 0xD5286A03, 0x01A90103,
0xBF8C3F70, 0xDC780000, 0x007D0804, 0xD70F6A04, 0x00020819, 0xD5286A05,
0x01A90105, 0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD70F6A02,
0x0002020C, 0xD5286A03, 0x01A90103, 0x7E0A020F, 0xD70F6A04, 0x0002020E,
0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00002102, 0xBF86000F, 0x87FE6A7E,
0xDC300000, 0x017D0002, 0xD70F6A02, 0x00020419, 0xD5286A03, 0x01A90103,
0xBF8C3F70, 0xDC700000, 0x007D0104, 0xD70F6A04, 0x00020819, 0xD5286A05,
0x01A90105, 0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD70F6A02, 0x00020010,
0xD5286A03, 0x01A90103, 0x7E0A0213, 0xD70F6A04, 0x00020012, 0xD5286A05,
0x01A90105, 0xD4E1006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000,
0x017D0002, 0xBF8C3F70, 0xDC600000, 0x007D0104, 0xBF810000,
};
static const unsigned int kCodeCopyMisaligned10[] = {
0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xF4080300, 0xFA000020,
0xF4000400, 0xFA000030, 0xBF8CC07F, 0x8F028602, 0xD70F6A00, 0x00020002,
0x7E060205, 0xD70F6A02, 0x00020004, 0xD5286A03, 0x01A90103, 0x7E0A0207,
0xD70F6A04, 0x00020006, 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001102,
0xBF860032, 0xDC200000, 0x067D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
0x01A90103, 0xDC200000, 0x077D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
0x01A90103, 0xDC200000, 0x087D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
0x01A90103, 0xDC200000, 0x097D0002, 0xD70F6A02, 0x00020410, 0xD5286A03,
0x01A90103, 0xBF8C3F70, 0xDC600000, 0x007D0604, 0xD70F6A04, 0x00020810,
0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0704, 0xD70F6A04, 0x00020810,
0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0804, 0xD70F6A04, 0x00020810,
0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0904, 0xD70F6A04, 0x00020810,
0xD5286A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD70F6A02, 0x00020008,
0xD5286A03, 0x01A90103, 0x7E0A020B, 0xD70F6A04, 0x0002000A, 0xD5286A05,
0x01A90105, 0xD4E1006A, 0x00001902, 0xBF86000F, 0x87FE6A7E, 0xDC200000,
0x017D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, 0x01A90103, 0xBF8C3F70,
0xDC600000, 0x007D0104, 0xD70F6A04, 0x00020810, 0xD5286A05, 0x01A90105,
0xBF82FFEE, 0xBF810000,
};
static const unsigned int kCodeFill10[] = {
0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xBF8CC07F, 0x8F028602,
0xD70F6A00, 0x00020002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A,
0x8F0C840B, 0x34020084, 0x7E060205, 0xD70F6A02, 0x00020204, 0xD5286A03,
0x01A90103, 0xD4E1006A, 0x00000D02, 0xBF860007, 0xDC780000, 0x007D0402,
0xD70F6A02, 0x0002040C, 0xD5286A03, 0x01A90103, 0xBF82FFF6, 0x8F0C820B,
0x34020082, 0x7E060207, 0xD70F6A02, 0x00020206, 0xD5286A03, 0x01A90103,
0xD4E1006A, 0x00001102, 0xBF860008, 0x87FE6A7E, 0xDC700000, 0x007D0402,
0xD70F6A02, 0x0002040C, 0xD5286A03, 0x01A90103, 0xBF82FFF5, 0xBF810000,
};
static const unsigned int kCodeTrapHandler10[] = {
0xB96EF803, 0x8770FF6E, 0x10000100, 0xBF06FF70, 0x00000100, 0xBEF003FF,
0x20000000, 0xBF85000E, 0x8770FF6E, 0x00000800, 0xBEF003F4, 0xBF85000A,
0x93EEFF6D, 0x00080010, 0xBF84002C, 0xBF06826E, 0xBEF003FF, 0x80000000,
0xBF850003, 0x806C846C, 0x826D806D, 0xBF820025, 0xBEFE03FF, 0x80000000,
0xBF90000A, 0xBF800007, 0xBF0C9F7E, 0xBF84FFFD, 0x876EFF7E, 0x000003FF,
0x8F6E836E, 0xF4051BBD, 0xDC000000, 0xBF8CC07F, 0xF4051BB7, 0xFA0000C0,
0xBF8CC07F, 0xBEF10380, 0xF6811C37, 0xFA000008, 0xBF8CC07F, 0x88707170,
0xBF85000E, 0xF4051C37, 0xFA000010, 0xBF8CC07F, 0x87F07070, 0xBF840009,
0xF4011BB7, 0xFA000018, 0xBF8CC07F, 0xF4411BB8, 0xFA000000, 0xBF8CC07F,
0xBEFC0380, 0xBF800000, 0xBF900001, 0x8878FF78, 0x00002000, 0x906E8977,
0x876FFF6E, 0x003F8000, 0x906E8677, 0x876EFF6E, 0x02000000, 0x886E6F6E,
0xB9EEF807, 0x87FE7E7E, 0x87EA6A6A, 0xB9F8F802, 0xBE80226C,
};
} // namespace amd
#endif // header guard
+8
Просмотреть файл
@@ -173,6 +173,7 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
ASICShader compute_7;
ASICShader compute_8;
ASICShader compute_9;
ASICShader compute_10;
};
std::map<std::string, CompiledShader> compiled_shaders = {
@@ -181,24 +182,28 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
{NULL, 0, 0, 0},
{kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4},
{kCodeTrapHandler9, sizeof(kCodeTrapHandler9), 2, 4},
{kCodeTrapHandler10, sizeof(kCodeTrapHandler10), 2, 4},
}},
{"CopyAligned",
{
{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},
{kCodeCopyAligned10, sizeof(kCodeCopyAligned10), 32, 12},
}},
{"CopyMisaligned",
{
{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},
{kCodeCopyMisaligned10, sizeof(kCodeCopyMisaligned10), 23, 10},
}},
{"Fill",
{
{kCodeFill7, sizeof(kCodeFill7), 19, 8},
{kCodeFill8, sizeof(kCodeFill8), 19, 8},
{kCodeFill8, sizeof(kCodeFill8), 19, 8},
{kCodeFill10, sizeof(kCodeFill10), 19, 8},
}}};
auto compiled_shader_it = compiled_shaders.find(func_name);
@@ -217,6 +222,9 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar
case 9:
asic_shader = &compiled_shader_it->second.compute_9;
break;
case 10:
asic_shader = &compiled_shader_it->second.compute_10;
break;
default:
assert(false && "Precompiled shader unavailable for target");
}