diff --git a/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp b/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp index b49a73199d..df87480523 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_elf_image.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp index 8e4b3051d9..ed87533591 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -53,6 +53,9 @@ #include #include +#ifdef SP3_STATIC_LIB +#include "sp3.h" +#endif // SP3_STATIC_LIB #ifndef _WIN32 #define _alloca alloca @@ -1230,7 +1233,241 @@ namespace code { void AmdHsaCode::PrintDisassembly(std::ostream& out, const unsigned char *isa, size_t size, uint32_t isa_offset) { + #ifdef SP3_STATIC_LIB + // Default asic is ci. + std::string asic = "CI"; + std::string vendor_name, architecture_name; + uint32_t major_version, minor_version, stepping; + if (GetNoteIsa(vendor_name, architecture_name, &major_version, &minor_version, &stepping)) { + if (major_version == 7) { + asic = "CI"; + } else if (major_version == 8) { + asic = "VI"; + } else if (major_version == 9) { + asic = "GREENLAND"; + } else { + assert(!"unknown compute capability"); + } + } + + struct sp3_context *dis_state = sp3_new(); + sp3_setasic(dis_state, asic.c_str()); + + sp3_vma *dis_vma = sp3_vm_new_ptr(0, size / 4, (const uint32_t*)isa); + + std::vector comments(HsaText()->size() / 4, 0); + for (size_t i = 0; i < SymbolCount(); ++i) { + Symbol* sym = GetSymbol(i); + if (sym->IsKernelSymbol() && sym->IsDefinition()) { + comments[sym->SectionOffset() / 4] = COMMENT_AMD_KERNEL_CODE_T_BEGIN; + comments[(sym->SectionOffset() + 252) / 4] = COMMENT_AMD_KERNEL_CODE_T_END; + amd_kernel_code_t kernel_code; + HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t)); + comments[(kernel_code.kernel_code_entry_byte_offset + sym->SectionOffset()) / 4] = COMMENT_KERNEL_ISA_BEGIN; + } + } + sp3_vma *comment_vma = sp3_vm_new_ptr(0, comments.size(), (const uint32_t*)comments.data()); + sp3_setcomments(dis_state, comment_vma, CommentTopCallBack, CommentRightCallBack, this); + + // When isa_offset == 0 disassembly full hsatext section. + // Otherwise disassembly only from this offset till endpgm instruction. + char *text = sp3_disasm( + dis_state, + dis_vma, + isa_offset / 4, + nullptr, + SP3_SHTYPE_CS, + nullptr, + (unsigned)(size / 4), + isa_offset == 0 ? SP3DIS_FORCEVALID | SP3DIS_COMMENTS : SP3DIS_COMMENTS); + + enum class IsaState { + UNKNOWN, + AMD_KERNEL_CODE_T_BEGIN, + AMD_KERNEL_CODE_T, + AMD_KERNEL_CODE_T_END, + ISA_BEGIN, + ISA, + PADDING, + }; + + std::string line; + char *text_ptr = text; + IsaState state = IsaState::UNKNOWN; + + uint32_t offset = 0; + uint32_t padding_end = 0; + std::string padding; + + while (text_ptr && text_ptr[0] != '\0') { + line.clear(); + while (text_ptr[0] != '\0' && text_ptr[0] != '\n') { + line.push_back(text_ptr[0]); + ++text_ptr; + } + ltrim(line); + if (text_ptr[0] == '\n') { + ++text_ptr; + } + switch (state) { + case IsaState::UNKNOWN: + assert(line != "// amd_kernel_code_t end"); + padding.clear(); + if (line == "// amd_kernel_code_t begin") { + state = IsaState::AMD_KERNEL_CODE_T_BEGIN; + } else if (line == "// isa begin") { + state = IsaState::ISA_BEGIN; + } else if (line == "end") { + out << line << std::endl; + } else if (line.find("v_cndmask_b32 v0, s0, v0, vcc") != std::string::npos) { + padding += " " + line + "\n"; + offset = ParseInstructionOffset(line); + padding_end = ParseInstructionOffset(line); + state = IsaState::PADDING; + } else if (line != "shader (null)") { + out << " " << line << std::endl; + } + break; + + case IsaState::AMD_KERNEL_CODE_T_BEGIN: + assert(line != "// amd_kernel_code_t begin"); + assert(line != "// amd_kernel_code_t end"); + assert(line != "// isa begin"); + assert(line != "end"); + padding.clear(); + offset = ParseInstructionOffset(line); + state = IsaState::AMD_KERNEL_CODE_T; + break; + + case IsaState::AMD_KERNEL_CODE_T: + assert(line != "// amd_kernel_code_t begin"); + assert(line != "// isa begin"); + assert(line != "end"); + assert(padding.empty()); + if (line == "// amd_kernel_code_t end") { + state = IsaState::AMD_KERNEL_CODE_T_END; + } + break; + + case IsaState::AMD_KERNEL_CODE_T_END: + assert(line != "// amd_kernel_code_t begin"); + assert(line != "// amd_kernel_code_t end"); + assert(line != "// isa begin"); + assert(line != "end"); + assert(padding.empty()); + for (size_t i = 0; i < SymbolCount(); ++i) { + Symbol* sym = GetSymbol(i); + if (sym->IsKernelSymbol() && sym->IsDefinition() && sym->SectionOffset() == offset) { + std::ostream::fmtflags flags = out.flags(); + char fill = out.fill(); + out << " //" << std::endl; + out << " // amd_kernel_code_t for " << sym->Name() + << " (" << std::hex << std::setw(12) << std::setfill('0') << std::right << offset + << " - " << std::setw(12) << (offset + 256) << ')' << std::endl; + out << " //" << std::endl; + out << std::setfill(fill); + out.flags(flags); + break; + } + } + state = IsaState::UNKNOWN; + break; + + case IsaState::ISA_BEGIN: + assert(line != "// amd_kernel_code_t begin"); + assert(line != "// amd_kernel_code_t end"); + assert(line != "// isa begin"); + padding.clear(); + offset = ParseInstructionOffset(line); + for (size_t i = 0; i < SymbolCount(); ++i) { + Symbol* sym = GetSymbol(i); + if (sym->IsKernelSymbol() && sym->IsDefinition()) { + amd_kernel_code_t kernel_code; + HsaText()->getData(sym->SectionOffset(), &kernel_code, sizeof(amd_kernel_code_t)); + if ((sym->SectionOffset() + kernel_code.kernel_code_entry_byte_offset) == offset) { + out << " //" << std::endl; + out << " // " << sym->Name() << ':' << std::endl; + out << " //" << std::endl; + break; + } + } + } + if (line == "end") { + out << line << std::endl; + state = IsaState::UNKNOWN; + } else { + out << " " << line << std::endl; + state = IsaState::ISA; + } + break; + + case IsaState::ISA: + assert(line != "// amd_kernel_code_t end"); + if (!padding.empty()) { + out << padding; + out.flush(); + padding.clear(); + } + if (line == "// amd_kernel_code_t begin") { + state = IsaState::AMD_KERNEL_CODE_T_BEGIN; + } else if (line == "// isa begin") { + state = IsaState::ISA_BEGIN; + } else if (line == "end") { + out << line << std::endl; + state = IsaState::UNKNOWN; + } else if (line.find("v_cndmask_b32 v0, s0, v0, vcc") != std::string::npos) { + padding += " " + line + "\n"; + offset = ParseInstructionOffset(line); + padding_end = offset; + state = IsaState::PADDING; + } else { + out << " " << line << std::endl; + } + break; + + case IsaState::PADDING: + assert(line != "// amd_kernel_code_t end"); + if (line.find("v_cndmask_b32 v0, s0, v0, vcc") != std::string::npos) { + padding += " " + line + "\n"; + padding_end = ParseInstructionOffset(line); + } else if (line == "// amd_kernel_code_t begin" || line == "// isa begin" || line == "end") { + padding.clear(); + std::ostream::fmtflags flags = out.flags(); + char fill = out.fill(); + out << " //" << std::endl; + out << " // padding (" + << std::hex << std::setw(12) << std::setfill('0') << std::right << offset + << " - " << std::setw(12) << (padding_end + 4) << ')' << std::endl; + out << " //" << std::endl; + out << std::setfill(fill); + out.flags(flags); + if (line == "// amd_kernel_code_t begin") { + state = IsaState::AMD_KERNEL_CODE_T_BEGIN; + } else if (line == "// isa begin") { + state = IsaState::ISA_BEGIN; + } else if (line == "end") { + out << line << std::endl; + state = IsaState::UNKNOWN; + } + } else { + padding += " " + line + "\n"; + state = IsaState::ISA; + } + break; + + default: + assert(false); + break; + } + } + + sp3_free(text); + sp3_close(dis_state); + sp3_vm_free(dis_vma); + sp3_vm_free(comment_vma); + #else PrintRawData(out, isa, size); + #endif // SP3_STATIC_LIB out << std::dec; } diff --git a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.cpp b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.cpp index 66955333de..82482c11c6 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.hpp b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.hpp index 30d07e2df9..1d084da8b5 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.hpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_hsa_code_util.hpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.cpp b/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.cpp index 7547697831..004c9cc626 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.hpp b/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.hpp index 1bfa1ad5d2..6bc7632d1f 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.hpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_hsa_locks.hpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/libamdhsacode/amd_options.cpp b/runtime/hsa-runtime/libamdhsacode/amd_options.cpp index 2c72f2d018..5e462ab1c2 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_options.cpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_options.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/libamdhsacode/amd_options.hpp b/runtime/hsa-runtime/libamdhsacode/amd_options.hpp index 25d812250b..ffeb61fadc 100644 --- a/runtime/hsa-runtime/libamdhsacode/amd_options.hpp +++ b/runtime/hsa-runtime/libamdhsacode/amd_options.hpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // diff --git a/runtime/hsa-runtime/loader/executable.cpp b/runtime/hsa-runtime/loader/executable.cpp index b66e4ca1ac..e1438212c6 100644 --- a/runtime/hsa-runtime/loader/executable.cpp +++ b/runtime/hsa-runtime/loader/executable.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -54,19 +54,6 @@ using namespace amd::hsa; using namespace amd::hsa::common; -namespace { - -bool IsBasePm4(hsa_profile_t profile) { - if (profile == HSA_PROFILE_FULL) { return false; } - char *emulate_aql = getenv("HSA_EMULATE_AQL"); - if (nullptr == emulate_aql) { return false; } - char *tools_lib = getenv("HSA_TOOLS_LIB"); - if (nullptr == tools_lib) { return false; } - return "1" == std::string(emulate_aql) && 0 != std::string(tools_lib).size(); -} - -} // namespace anonymous - namespace amd { namespace hsa { namespace loader { @@ -116,6 +103,23 @@ hsa_status_t AmdHsaCodeLoader::IterateExecutables( return HSA_STATUS_SUCCESS; } +uint64_t AmdHsaCodeLoader::FindHostAddress(uint64_t device_address) +{ + if (device_address == 0) { + return 0; + } + std::lock_guard lock(executables_mutex); + for (auto &exec : executables) { + if (exec != nullptr) { + uint64_t host_address = exec->FindHostAddress(device_address); + if (host_address != 0) { + return host_address; + } + } + } + return 0; +} + //===----------------------------------------------------------------------===// // SymbolImpl. // //===----------------------------------------------------------------------===// @@ -669,6 +673,23 @@ hsa_status_t ExecutableImpl::IterateLoadedCodeObjects( return HSA_STATUS_SUCCESS; } +uint64_t ExecutableImpl::FindHostAddress(uint64_t device_address) +{ + for (auto &obj : loaded_code_objects) { + assert(obj); + for (auto &seg : obj->LoadedSegments()) { + assert(seg); + uint64_t paddr = (uint64_t)(uintptr_t)seg->Address(seg->VAddr()); + if (paddr <= device_address && device_address < paddr + seg->Size()) { + void *haddr = context_->SegmentHostAddress( + seg->ElfSegment(), seg->Agent(), seg->Ptr(), device_address - paddr); + return nullptr == haddr ? 0 : (uint64_t)(uintptr_t)haddr; + } + } + } + return 0; +} + #define HSAERRCHECK(hsc) \ if (hsc != HSA_STATUS_SUCCESS) { \ assert(false); \ @@ -742,6 +763,18 @@ hsa_status_t ExecutableImpl::LoadCodeObject( if (majorVersion != 1 && majorVersion != 2) { return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } + uint32_t codeHsailMajor; + uint32_t codeHsailMinor; + hsa_profile_t codeProfile; + hsa_machine_model_t codeMachineModel; + hsa_default_float_rounding_mode_t codeRoundingMode; + if (!code->GetNoteHsail(&codeHsailMajor, &codeHsailMinor, &codeProfile, &codeMachineModel, &codeRoundingMode)) { + codeProfile = HSA_PROFILE_FULL; + } + if (profile_ != codeProfile) { + return HSA_STATUS_ERROR_INCOMPATIBLE_ARGUMENTS; + } + hsa_status_t status; objects.push_back(new LoadedCodeObjectImpl(this, agent, code->ElfData(), code->ElfSize())); @@ -861,6 +894,13 @@ hsa_status_t ExecutableImpl::LoadDefinitionSymbol(hsa_agent_t agent, code::Symbo bool is_dynamic_callstack = AMD_HSA_BITS_GET(akc.kernel_code_properties, AMD_KERNEL_CODE_PROPERTIES_IS_DYNAMIC_CALLSTACK) ? true : false; + uint64_t size = sym->Size(); + + if (!size && sym->SectionOffset() < sym->GetSection()->size()) { + // ORCA Runtime relies on symbol size equal to size of kernel ISA. If symbol size is 0 in ELF, + // calculate end of segment - symbol value. + size = sym->GetSection()->size() - sym->SectionOffset(); + } KernelSymbol *kernel_symbol = new KernelSymbol(true, sym->Name(), sym->Linkage(), @@ -870,27 +910,13 @@ hsa_status_t ExecutableImpl::LoadDefinitionSymbol(hsa_agent_t agent, code::Symbo group_segment_size, private_segment_size, is_dynamic_callstack, - sym->Size(), + size, 256, address); kernel_symbol->debug_info.elf_raw = code->ElfData(); kernel_symbol->debug_info.elf_size = code->ElfSize(); kernel_symbol->debug_info.kernel_name = kernel_symbol->name.c_str(); kernel_symbol->debug_info.owning_segment = (void*)SymbolSegment(agent, sym)->Address(sym->GetSection()->addr()); - kernel_symbol->debug_info.profile = profile_; - - // \todo kzhuravl 11/17/15 This is a temporary rt hack: needs to be - // removed when large bar is supported. - if (IsBasePm4(profile_)) { - kernel_symbol->debug_info.gpuva = kernel_symbol->address; - Segment *kernel_symbol_segment = SymbolSegment(agent, sym); - kernel_symbol->address = - (uint64_t) (uintptr_t) context_->SegmentHostAddress( - kernel_symbol_segment->ElfSegment(), - kernel_symbol_segment->Agent(), - kernel_symbol_segment->Ptr(), - kernel_symbol_segment->Offset(sym->VAddr())); - } symbol = kernel_symbol; // \todo kzhuravl 10/15/15 This is a debugger backdoor: needs to be diff --git a/runtime/hsa-runtime/loader/executable.hpp b/runtime/hsa-runtime/loader/executable.hpp index d4e19cfbb8..d6b8a73093 100644 --- a/runtime/hsa-runtime/loader/executable.hpp +++ b/runtime/hsa-runtime/loader/executable.hpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -393,6 +393,8 @@ public: void *data), void *data); + uint64_t FindHostAddress(uint64_t device_address) override; + Context* context() { return context_; } size_t id() { return id_; } @@ -456,6 +458,8 @@ public: hsa_executable_t executable, void *data), void *data) override; + + uint64_t FindHostAddress(uint64_t device_address) override; }; } // namespace loader diff --git a/runtime/hsa-runtime/loader/loaders.cpp b/runtime/hsa-runtime/loader/loaders.cpp index e3345db19b..f01d48d276 100644 --- a/runtime/hsa-runtime/loader/loaders.cpp +++ b/runtime/hsa-runtime/loader/loaders.cpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -87,6 +87,10 @@ namespace loader { gfx803.handle = 803; gfx804.handle = 804; gfx810.handle = 810; +#if defined(GFX9_BUILD) + gfx900.handle = 900; + gfx901.handle = 901; +#endif // GFX9_BUILD } hsa_isa_t OfflineLoaderContext::IsaFromName(const char *name) @@ -108,6 +112,12 @@ namespace loader { return gfx804; } else if (sname == "AMD:AMDGPU:8:1:0") { return gfx810; +#if defined(GFX9_BUILD) + } else if (sname == "AMD:AMDGPU:9:0:0") { + return gfx900; + } else if (sname == "AMD:AMDGPU:9:0:1") { + return gfx901; +#endif // GFX_BUILD } else { assert(0); return invalid; diff --git a/runtime/hsa-runtime/loader/loaders.hpp b/runtime/hsa-runtime/loader/loaders.hpp index 9a1df578d9..85a9ed2ece 100644 --- a/runtime/hsa-runtime/loader/loaders.hpp +++ b/runtime/hsa-runtime/loader/loaders.hpp @@ -3,7 +3,7 @@ // The University of Illinois/NCSA // Open Source License (NCSA) // -// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2014-2016, Advanced Micro Devices, Inc. All rights reserved. // // Developed by: // @@ -55,7 +55,11 @@ namespace loader { private: hsa_isa_t invalid; hsa_isa_t gfx700, gfx701, gfx800, gfx801, gfx802, gfx803, gfx804, gfx810; +#if defined(GFX9_BUILD) + hsa_isa_t gfx900, gfx901; +#else hsa_isa_t reserved; +#endif // GFX9_BUILD std::ostream& out; typedef std::set PointerSet; PointerSet pointers;