Fix gfx9 trap handler to retrieve correct return address
The trap protocol changed between gfx8 and gfx9. The return address
is in trap temporaries [0,1] on gfx9 rather than [4,5] on gfx8.
Unfortunately SP3 changes the meaning of the ttmp register aliases
in gfx9, further confusing the issue.
Clean up later when LLVM assembly build is introduced to the runtime.
Change-Id: I84ea9bf3736f060dd95d0361f9d5a0f9a3576178
[ROCm/ROCR-Runtime commit: f0a1c7c4c6]
This commit is contained in:
@@ -108,6 +108,15 @@ static const unsigned int kCodeTrapHandler8[] = {
|
||||
0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeTrapHandler9[] = {
|
||||
0xc0061b80, 0x000000c0, 0xbf8cc07f, 0xbefe0181, 0x806e886e, 0x826f806f,
|
||||
0x7e00026e, 0x7e02026f, 0x7e0402ff, 0x80000000, 0x7e060280, 0xdd800000,
|
||||
0x007f0200, 0xbf8c0f70, 0x7dd40500, 0xbf870011, 0xc0061c37, 0x00000008,
|
||||
0xbf8cc07f, 0x86f07070, 0xbf84000c, 0x806e906e, 0x826f806f, 0xc0021bb7,
|
||||
0x00000000, 0xbf8cc07f, 0x7e000270, 0x7e020271, 0x7e04026e, 0xdc700000,
|
||||
0x007f0200, 0xbf8c0f70, 0xbf900001, 0xbf8d0001, 0xbe801f6c, 0x00000000,
|
||||
};
|
||||
|
||||
static const unsigned int kCodeCopyAligned8[] = {
|
||||
0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020,
|
||||
0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050,
|
||||
|
||||
@@ -175,20 +175,34 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
struct CompiledShader {
|
||||
ASICShader compute_7;
|
||||
ASICShader compute_8;
|
||||
ASICShader compute_9;
|
||||
};
|
||||
|
||||
std::map<std::string, CompiledShader> compiled_shaders = {
|
||||
{"TrapHandler",
|
||||
{{NULL, 0, 0, 0}, {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}}},
|
||||
{
|
||||
{NULL, 0, 0, 0},
|
||||
{kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4},
|
||||
{kCodeTrapHandler9, sizeof(kCodeTrapHandler9), 2, 4},
|
||||
}},
|
||||
{"CopyAligned",
|
||||
{{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},
|
||||
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}}},
|
||||
{
|
||||
{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12},
|
||||
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},
|
||||
{kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12},
|
||||
}},
|
||||
{"CopyMisaligned",
|
||||
{{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},
|
||||
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}}},
|
||||
{
|
||||
{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10},
|
||||
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},
|
||||
{kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10},
|
||||
}},
|
||||
{"Fill",
|
||||
{{kCodeFill7, sizeof(kCodeFill7), 19, 8},
|
||||
{kCodeFill8, sizeof(kCodeFill8), 19, 8}}}};
|
||||
{
|
||||
{kCodeFill7, sizeof(kCodeFill7), 19, 8},
|
||||
{kCodeFill8, sizeof(kCodeFill8), 19, 8},
|
||||
{kCodeFill8, sizeof(kCodeFill8), 19, 8},
|
||||
}}};
|
||||
|
||||
auto compiled_shader_it = compiled_shaders.find(func_name);
|
||||
assert(compiled_shader_it != compiled_shaders.end() &&
|
||||
@@ -201,9 +215,11 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name,
|
||||
asic_shader = &compiled_shader_it->second.compute_7;
|
||||
break;
|
||||
case 8:
|
||||
case 9: // ISA-compatible with 8
|
||||
asic_shader = &compiled_shader_it->second.compute_8;
|
||||
break;
|
||||
case 9:
|
||||
asic_shader = &compiled_shader_it->second.compute_9;
|
||||
break;
|
||||
default:
|
||||
assert(false && "Precompiled shader unavailable for target");
|
||||
}
|
||||
|
||||
Verwijs in nieuw issue
Block a user