From e729948e4114e36eb0ca0ca810e5a9132d4e197f Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Thu, 12 Sep 2019 14:41:14 -0400 Subject: [PATCH] Add binary shaders for gfx10 Change-Id: Iaf586a15a2f2aebc266da5148aa8637b092c1002 [ROCm/ROCR-Runtime commit: d1c5a079cdccc2e50decba1b7b6ad21405700d28] --- .../hsa-runtime/core/inc/amd_gpu_shaders.h | 86 +++++++++++++++++++ .../core/runtime/amd_gpu_agent.cpp | 8 ++ 2 files changed, 94 insertions(+) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h index 59ba7dc4ad..089390ed6f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h @@ -127,6 +127,13 @@ static const unsigned int kCodeTrapHandler9[] = { .set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 26 .set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15 .set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x1F8000 + .elseif .amdgcn.gfx_generation_number == 10 + .set TTMP11_SAVE_REPLAY_W64H_SHIFT , 31 + .set TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT , 24 + .set SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT , 25 + .set SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT , 15 + .set SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK , 0x3F8000 + .set SQ_WAVE_IB_STS_REPLAY_W64H_MASK , 0x2000000 .else .error "unsupported target" .endif @@ -217,6 +224,14 @@ static const unsigned int kCodeTrapHandler9[] = { s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 .endif + .if .amdgcn.gfx_generation_number == 10 + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_or_b32 ttmp2, ttmp2, ttmp3 + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + .endif // Restore SQ_WAVE_STATUS. s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 @@ -296,6 +311,77 @@ static const unsigned int kCodeFill8[] = { 0x00001902, 0xD11C6A03, 0x01A90103, 0xBF82FFF5, 0xBF810000, }; +static const unsigned int kCodeCopyAligned10[] = { + 0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xF4080300, 0xFA000020, + 0xF4080400, 0xFA000030, 0xF4080500, 0xFA000040, 0xF4000600, 0xFA000050, + 0xBF8CC07F, 0x8F028602, 0xD70F6A00, 0x00020002, 0x7E060205, 0xD70F6A02, + 0x00020004, 0xD5286A03, 0x01A90103, 0x7E0A0207, 0xD70F6A04, 0x00020006, + 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001102, 0xBF86000F, 0x87FE6A7E, + 0xDC200000, 0x017D0002, 0xBF8C3F70, 0xD70F6A02, 0x00020418, 0xD5286A03, + 0x01A90103, 0xDC600000, 0x007D0104, 0xD70F6A04, 0x00020818, 0xD5286A05, + 0x01A90105, 0xBF82FFEE, 0xBEFE04C1, 0x8F198418, 0x34020084, 0x7E060209, + 0xD70F6A02, 0x00020208, 0xD5286A03, 0x01A90103, 0x7E0A020B, 0xD70F6A04, + 0x0002020A, 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001902, 0xBF86000E, + 0xDC380000, 0x087D0002, 0xD70F6A02, 0x00020419, 0xD5286A03, 0x01A90103, + 0xBF8C3F70, 0xDC780000, 0x007D0804, 0xD70F6A04, 0x00020819, 0xD5286A05, + 0x01A90105, 0xBF82FFEF, 0x8F198218, 0x34020082, 0x7E06020D, 0xD70F6A02, + 0x0002020C, 0xD5286A03, 0x01A90103, 0x7E0A020F, 0xD70F6A04, 0x0002020E, + 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00002102, 0xBF86000F, 0x87FE6A7E, + 0xDC300000, 0x017D0002, 0xD70F6A02, 0x00020419, 0xD5286A03, 0x01A90103, + 0xBF8C3F70, 0xDC700000, 0x007D0104, 0xD70F6A04, 0x00020819, 0xD5286A05, + 0x01A90105, 0xBF82FFEE, 0xBEFE04C1, 0x7E060211, 0xD70F6A02, 0x00020010, + 0xD5286A03, 0x01A90103, 0x7E0A0213, 0xD70F6A04, 0x00020012, 0xD5286A05, + 0x01A90105, 0xD4E1006A, 0x00002902, 0xBF860006, 0x87FE6A7E, 0xDC200000, + 0x017D0002, 0xBF8C3F70, 0xDC600000, 0x007D0104, 0xBF810000, +}; + +static const unsigned int kCodeCopyMisaligned10[] = { + 0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xF4080300, 0xFA000020, + 0xF4000400, 0xFA000030, 0xBF8CC07F, 0x8F028602, 0xD70F6A00, 0x00020002, + 0x7E060205, 0xD70F6A02, 0x00020004, 0xD5286A03, 0x01A90103, 0x7E0A0207, + 0xD70F6A04, 0x00020006, 0xD5286A05, 0x01A90105, 0xD4E1006A, 0x00001102, + 0xBF860032, 0xDC200000, 0x067D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, + 0x01A90103, 0xDC200000, 0x077D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, + 0x01A90103, 0xDC200000, 0x087D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, + 0x01A90103, 0xDC200000, 0x097D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, + 0x01A90103, 0xBF8C3F70, 0xDC600000, 0x007D0604, 0xD70F6A04, 0x00020810, + 0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0704, 0xD70F6A04, 0x00020810, + 0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0804, 0xD70F6A04, 0x00020810, + 0xD5286A05, 0x01A90105, 0xDC600000, 0x007D0904, 0xD70F6A04, 0x00020810, + 0xD5286A05, 0x01A90105, 0xBF82FFCB, 0x7E060209, 0xD70F6A02, 0x00020008, + 0xD5286A03, 0x01A90103, 0x7E0A020B, 0xD70F6A04, 0x0002000A, 0xD5286A05, + 0x01A90105, 0xD4E1006A, 0x00001902, 0xBF86000F, 0x87FE6A7E, 0xDC200000, + 0x017D0002, 0xD70F6A02, 0x00020410, 0xD5286A03, 0x01A90103, 0xBF8C3F70, + 0xDC600000, 0x007D0104, 0xD70F6A04, 0x00020810, 0xD5286A05, 0x01A90105, + 0xBF82FFEE, 0xBF810000, +}; + +static const unsigned int kCodeFill10[] = { + 0xF4080100, 0xFA000000, 0xF4080200, 0xFA000010, 0xBF8CC07F, 0x8F028602, + 0xD70F6A00, 0x00020002, 0x7E08020A, 0x7E0A020A, 0x7E0C020A, 0x7E0E020A, + 0x8F0C840B, 0x34020084, 0x7E060205, 0xD70F6A02, 0x00020204, 0xD5286A03, + 0x01A90103, 0xD4E1006A, 0x00000D02, 0xBF860007, 0xDC780000, 0x007D0402, + 0xD70F6A02, 0x0002040C, 0xD5286A03, 0x01A90103, 0xBF82FFF6, 0x8F0C820B, + 0x34020082, 0x7E060207, 0xD70F6A02, 0x00020206, 0xD5286A03, 0x01A90103, + 0xD4E1006A, 0x00001102, 0xBF860008, 0x87FE6A7E, 0xDC700000, 0x007D0402, + 0xD70F6A02, 0x0002040C, 0xD5286A03, 0x01A90103, 0xBF82FFF5, 0xBF810000, +}; + +static const unsigned int kCodeTrapHandler10[] = { + 0xB96EF803, 0x8770FF6E, 0x10000100, 0xBF06FF70, 0x00000100, 0xBEF003FF, + 0x20000000, 0xBF85000E, 0x8770FF6E, 0x00000800, 0xBEF003F4, 0xBF85000A, + 0x93EEFF6D, 0x00080010, 0xBF84002C, 0xBF06826E, 0xBEF003FF, 0x80000000, + 0xBF850003, 0x806C846C, 0x826D806D, 0xBF820025, 0xBEFE03FF, 0x80000000, + 0xBF90000A, 0xBF800007, 0xBF0C9F7E, 0xBF84FFFD, 0x876EFF7E, 0x000003FF, + 0x8F6E836E, 0xF4051BBD, 0xDC000000, 0xBF8CC07F, 0xF4051BB7, 0xFA0000C0, + 0xBF8CC07F, 0xBEF10380, 0xF6811C37, 0xFA000008, 0xBF8CC07F, 0x88707170, + 0xBF85000E, 0xF4051C37, 0xFA000010, 0xBF8CC07F, 0x87F07070, 0xBF840009, + 0xF4011BB7, 0xFA000018, 0xBF8CC07F, 0xF4411BB8, 0xFA000000, 0xBF8CC07F, + 0xBEFC0380, 0xBF800000, 0xBF900001, 0x8878FF78, 0x00002000, 0x906E8977, + 0x876FFF6E, 0x003F8000, 0x906E8677, 0x876EFF6E, 0x02000000, 0x886E6F6E, + 0xB9EEF807, 0x87FE7E7E, 0x87EA6A6A, 0xB9F8F802, 0xBE80226C, +}; + } // namespace amd #endif // header guard diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 78c10ec230..3a8074662f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -173,6 +173,7 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar ASICShader compute_7; ASICShader compute_8; ASICShader compute_9; + ASICShader compute_10; }; std::map compiled_shaders = { @@ -181,24 +182,28 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar {NULL, 0, 0, 0}, {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}, {kCodeTrapHandler9, sizeof(kCodeTrapHandler9), 2, 4}, + {kCodeTrapHandler10, sizeof(kCodeTrapHandler10), 2, 4}, }}, {"CopyAligned", { {kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12}, {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}, {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}, + {kCodeCopyAligned10, sizeof(kCodeCopyAligned10), 32, 12}, }}, {"CopyMisaligned", { {kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10}, {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}, {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}, + {kCodeCopyMisaligned10, sizeof(kCodeCopyMisaligned10), 23, 10}, }}, {"Fill", { {kCodeFill7, sizeof(kCodeFill7), 19, 8}, {kCodeFill8, sizeof(kCodeFill8), 19, 8}, {kCodeFill8, sizeof(kCodeFill8), 19, 8}, + {kCodeFill10, sizeof(kCodeFill10), 19, 8}, }}}; auto compiled_shader_it = compiled_shaders.find(func_name); @@ -217,6 +222,9 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar case 9: asic_shader = &compiled_shader_it->second.compute_9; break; + case 10: + asic_shader = &compiled_shader_it->second.compute_10; + break; default: assert(false && "Precompiled shader unavailable for target"); }