From 2c71b68fdbc0dc912f467d251f3e60251d4d4eca Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 5 Apr 2017 17:30:13 -0500 Subject: [PATCH] Fix gfx9 trap handler to retrieve correct return address The trap protocol changed between gfx8 and gfx9. The return address is in trap temporaries [0,1] on gfx9 rather than [4,5] on gfx8. Unfortunately SP3 changes the meaning of the ttmp register aliases in gfx9, further confusing the issue. Clean up later when LLVM assembly build is introduced to the runtime. Change-Id: I84ea9bf3736f060dd95d0361f9d5a0f9a3576178 [ROCm/ROCR-Runtime commit: f0a1c7c4c691aeea86761f26ce15f2954c1daf7e] --- .../hsa-runtime/core/inc/amd_gpu_shaders.h | 9 ++++++ .../core/runtime/amd_gpu_agent.cpp | 32 ++++++++++++++----- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h index 2aa074981f..da0dec47af 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_shaders.h @@ -108,6 +108,15 @@ static const unsigned int kCodeTrapHandler8[] = { 0x00000200, 0xBF8C0F70, 0xBF900001, 0xBF8D0001, 0xBE801F70, }; +static const unsigned int kCodeTrapHandler9[] = { + 0xc0061b80, 0x000000c0, 0xbf8cc07f, 0xbefe0181, 0x806e886e, 0x826f806f, + 0x7e00026e, 0x7e02026f, 0x7e0402ff, 0x80000000, 0x7e060280, 0xdd800000, + 0x007f0200, 0xbf8c0f70, 0x7dd40500, 0xbf870011, 0xc0061c37, 0x00000008, + 0xbf8cc07f, 0x86f07070, 0xbf84000c, 0x806e906e, 0x826f806f, 0xc0021bb7, + 0x00000000, 0xbf8cc07f, 0x7e000270, 0x7e020271, 0x7e04026e, 0xdc700000, + 0x007f0200, 0xbf8c0f70, 0xbf900001, 0xbf8d0001, 0xbe801f6c, 0x00000000, +}; + static const unsigned int kCodeCopyAligned8[] = { 0xC00A0100, 0x00000000, 0xC00A0200, 0x00000010, 0xC00A0300, 0x00000020, 0xC00A0400, 0x00000030, 0xC00A0500, 0x00000040, 0xC0020600, 0x00000050, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 82175997d3..13097f2744 100755 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -175,20 +175,34 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, struct CompiledShader { ASICShader compute_7; ASICShader compute_8; + ASICShader compute_9; }; std::map compiled_shaders = { {"TrapHandler", - {{NULL, 0, 0, 0}, {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}}}, + { + {NULL, 0, 0, 0}, + {kCodeTrapHandler8, sizeof(kCodeTrapHandler8), 2, 4}, + {kCodeTrapHandler9, sizeof(kCodeTrapHandler9), 2, 4}, + }}, {"CopyAligned", - {{kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12}, - {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}}}, + { + {kCodeCopyAligned7, sizeof(kCodeCopyAligned7), 32, 12}, + {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}, + {kCodeCopyAligned8, sizeof(kCodeCopyAligned8), 32, 12}, + }}, {"CopyMisaligned", - {{kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10}, - {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}}}, + { + {kCodeCopyMisaligned7, sizeof(kCodeCopyMisaligned7), 23, 10}, + {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}, + {kCodeCopyMisaligned8, sizeof(kCodeCopyMisaligned8), 23, 10}, + }}, {"Fill", - {{kCodeFill7, sizeof(kCodeFill7), 19, 8}, - {kCodeFill8, sizeof(kCodeFill8), 19, 8}}}}; + { + {kCodeFill7, sizeof(kCodeFill7), 19, 8}, + {kCodeFill8, sizeof(kCodeFill8), 19, 8}, + {kCodeFill8, sizeof(kCodeFill8), 19, 8}, + }}}; auto compiled_shader_it = compiled_shaders.find(func_name); assert(compiled_shader_it != compiled_shaders.end() && @@ -201,9 +215,11 @@ void GpuAgent::AssembleShader(const char* src_sp3, const char* func_name, asic_shader = &compiled_shader_it->second.compute_7; break; case 8: - case 9: // ISA-compatible with 8 asic_shader = &compiled_shader_it->second.compute_8; break; + case 9: + asic_shader = &compiled_shader_it->second.compute_9; + break; default: assert(false && "Precompiled shader unavailable for target"); }