Comhaid
rocm-systems/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-codeobj/disassembly.cpp
T
Giovanni Lenzi Baraldi 2cd198a7e7 Gbaraldi/threadtrace2 (#724)
* Added first ATT API

* Finalizing thread trace API

* Fixing more rebase conflicts

* Added codeobj disassembly sample

* Fixing merge issues with rebase [2]

* Adding ATT packets

* Implemented thread trace intercept

* Moved codeobj parser to same repo as rocprofiler

* Moved thread trace to new API

* Fixing merge conflicts

* Fixing more merge conflicts

* Adding thread trace packet reuse

* Merged aql_profile_v2 headers

* Linked ATT sample to aqlprofile

* Updated decoder to include non-loaded codeobjs

* Implemented ISA decoder into ATT sample

* Added marker_id to vaddr

* Updating aql_profile_v2 API to memcpy

* Updating thread trace API to include 64bit markers. Using the result of ISA matching.

* Added instruction type and cycles summary

* Updated sample with selection of kernel by kernel_object

* Added option to copy from memory kernels

* Moved tool_data in thread_trace to dynamic alloc

* Restoring hsa.cpp

* Fixed ATT sample crash. General improvements.

* Moved codeobj library to outside src/

* Updated license header

* Moved codeobj_capture to camelcase

* Solving some more merge conflicts

* Update samples/advanced_thread_trace/CMakeLists.txt

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update samples/advanced_thread_trace/CMakeLists.txt

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update samples/code_object_isa_decode/CMakeLists.txt

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update source/lib/rocprofiler-sdk/thread_trace/CMakeLists.txt

* Removing unused parameter check

* Adding const to isEmpty

* Removing unused warning

* Adding libdw-dev to requirements

* Running clang-format

* Commenting out new aql calls

* Clang format

* Unused variable fix

* Adding codeobj-decoder coverage

* Commenting out threadtrace

* Update samples/CMakeLists.txt

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* P

* WOverloaded

* Addressing clang-tidy

* Virtual destructor on ttracer class

* Corr id

* Fixing code source format

* Update CMakeLists.txt

* Build fixes

* Update source/lib/rocprofiler-sdk-codeobj/code_object_track.cpp

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Fix shadowing

* Update CMakeLists.txt

* Update samples/CMakeLists.txt

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Ammar ELWazir <ammar.elwazir@amd.com>
Co-authored-by: Ammar ELWazir <aelwazir@amd.com>
Co-authored-by: Benjamin Welton <bewelton@amd.com>

[ROCm/rocprofiler-sdk commit: 69b8a43dc6]
2024-04-08 12:43:02 -07:00

373 línte
14 KiB
C++

// MIT License
//
// Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#if !defined(_GNU_SOURCE) || !defined(_XOPEN_SOURCE)
# define _XOPEN_SOURCE 700
#endif
#include <cxxabi.h>
#include <elf.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include <cassert>
#include <cstdarg>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <elfutils/libdw.h>
#include <hsa/amd_hsa_elf.h>
#include "lib/rocprofiler-sdk-codeobj/code_printing.hpp"
#define THROW_COMGR(call) \
if(amd_comgr_status_s status = call) \
{ \
const char* reason = ""; \
amd_comgr_status_string(status, &reason); \
std::cerr << __FILE__ << ':' << __LINE__ << " code: " << status << " failed: " << reason \
<< std::endl; \
throw std::exception(); \
}
#define RETURN_COMGR(call) \
if(amd_comgr_status_s status = call) \
{ \
const char* reason = ""; \
amd_comgr_status_string(status, &reason); \
std::cerr << __FILE__ << ':' << __LINE__ << " code: " << status << " failed: " << reason \
<< std::endl; \
return AMD_COMGR_STATUS_ERROR; \
}
CodeObjectBinary::CodeObjectBinary(const std::string& uri)
: m_uri(uri)
{
const std::string protocol_delim{"://"};
size_t protocol_end = m_uri.find(protocol_delim);
std::string protocol = m_uri.substr(0, protocol_end);
protocol_end += protocol_delim.length();
std::transform(protocol.begin(), protocol.end(), protocol.begin(), [](unsigned char c) {
return std::tolower(c);
});
std::string path;
size_t path_end = m_uri.find_first_of("#?", protocol_end);
if(path_end != std::string::npos)
{
path = m_uri.substr(protocol_end, path_end++ - protocol_end);
}
else
{
path = m_uri.substr(protocol_end);
}
/* %-decode the string. */
std::string decoded_path;
decoded_path.reserve(path.length());
for(size_t i = 0; i < path.length(); ++i)
{
if(path[i] == '%' && std::isxdigit(path[i + 1]) && std::isxdigit(path[i + 2]))
{
decoded_path += std::stoi(path.substr(i + 1, 2), 0, 16);
i += 2;
}
else
{
decoded_path += path[i];
}
}
/* Tokenize the query/fragment. */
std::vector<std::string> tokens;
size_t pos, last = path_end;
while((pos = m_uri.find('&', last)) != std::string::npos)
{
tokens.emplace_back(m_uri.substr(last, pos - last));
last = pos + 1;
}
if(last != std::string::npos)
{
tokens.emplace_back(m_uri.substr(last));
}
/* Create a tag-value map from the tokenized query/fragment. */
std::unordered_map<std::string, std::string> params;
std::for_each(tokens.begin(), tokens.end(), [&](std::string& token) {
size_t delim = token.find('=');
if(delim != std::string::npos)
{
params.emplace(token.substr(0, delim), token.substr(delim + 1));
}
});
buffer = std::vector<char>{};
size_t offset = 0;
size_t size = 0;
if(auto offset_it = params.find("offset"); offset_it != params.end())
{
offset = std::stoul(offset_it->second, nullptr, 0);
}
if(auto size_it = params.find("size"); size_it != params.end())
{
if(!(size = std::stoul(size_it->second, nullptr, 0))) return;
}
if(protocol != "file") throw protocol + " protocol not supported!";
std::ifstream file(decoded_path, std::ios::in | std::ios::binary);
if(!file || !file.is_open()) throw "could not open " + decoded_path;
if(!size)
{
file.ignore(std::numeric_limits<std::streamsize>::max());
size_t bytes = file.gcount();
file.clear();
if(bytes < offset) throw "invalid uri " + decoded_path + " (file size < offset)";
size = bytes - offset;
}
file.seekg(offset, std::ios_base::beg);
buffer.resize(size);
file.read(&buffer[0], size);
}
DisassemblyInstance::DisassemblyInstance(const char* codeobj_data, uint64_t codeobj_size)
{
buffer = std::vector<char>(codeobj_size, 0);
std::memcpy(buffer.data(), codeobj_data, codeobj_size);
THROW_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data));
THROW_COMGR(amd_comgr_set_data(data, buffer.size(), buffer.data()));
size_t isa_size = 128;
std::string input_isa{};
input_isa.resize(isa_size);
THROW_COMGR(amd_comgr_get_data_isa_name(data, &isa_size, input_isa.data()));
THROW_COMGR(amd_comgr_create_disassembly_info(
input_isa.data(),
&DisassemblyInstance::memory_callback,
&DisassemblyInstance::inst_callback,
[](uint64_t, void*) {},
&info));
}
amd_comgr_status_t
DisassemblyInstance::symbol_callback(amd_comgr_symbol_t symbol, void* user_data)
{
amd_comgr_symbol_type_t type;
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_TYPE, &type));
if(type != AMD_COMGR_SYMBOL_TYPE_FUNC) return AMD_COMGR_STATUS_SUCCESS;
uint64_t vaddr = 0;
uint64_t mem_size = 0;
uint64_t name_size = 0;
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_VALUE, &vaddr));
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_SIZE, &mem_size));
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME_LENGTH, &name_size));
std::string name;
name.resize(name_size);
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME, name.data()));
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
std::optional<uint64_t> faddr = instance.va2fo(vaddr);
if(faddr) instance.symbol_map[vaddr] = {name, *faddr, vaddr, mem_size};
return AMD_COMGR_STATUS_SUCCESS;
}
std::map<uint64_t, SymbolInfo>&
DisassemblyInstance::GetKernelMap()
{
symbol_map = {};
THROW_COMGR(amd_comgr_iterate_symbols(data, &DisassemblyInstance::symbol_callback, this));
return symbol_map;
}
DisassemblyInstance::~DisassemblyInstance()
{
amd_comgr_release_data(data);
amd_comgr_destroy_disassembly_info(info);
}
std::pair<std::string, size_t>
DisassemblyInstance::ReadInstruction(uint64_t faddr)
{
uint64_t size_read;
uint64_t addr_in_buffer = reinterpret_cast<uint64_t>(buffer.data()) + faddr;
THROW_COMGR(amd_comgr_disassemble_instruction(info, addr_in_buffer, (void*) this, &size_read));
return {std::move(this->last_instruction), size_read};
}
uint64_t
DisassemblyInstance::memory_callback(uint64_t from, char* to, uint64_t size, void* user_data)
{
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
int64_t copysize = reinterpret_cast<int64_t>(instance.buffer.data()) + instance.buffer.size() -
static_cast<int64_t>(from);
copysize = std::min<int64_t>(size, copysize);
std::memcpy(to, (char*) from, copysize);
return copysize;
}
void
DisassemblyInstance::inst_callback(const char* instruction, void* user_data)
{
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
if(!instruction) return;
while(*instruction == '\t' || *instruction == ' ')
instruction++;
instance.last_instruction = instruction;
}
#define CHECK_VA2FO(x, msg) \
if(!(x)) \
{ \
std::cerr << __FILE__ << ' ' << __LINE__ << ' ' << msg << std::endl; \
return std::nullopt; \
}
// mem - input argument, start of the elf
// va - input argument, virtual address
// return file offset, if found
std::optional<uint64_t>
DisassemblyInstance::va2fo(uint64_t va)
{
CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
uint8_t* e_ident = (uint8_t*) buffer.data();
CHECK_VA2FO(e_ident, "e_ident is nullptr");
CHECK_VA2FO(e_ident[EI_MAG0] == ELFMAG0 || e_ident[EI_MAG1] == ELFMAG1 ||
e_ident[EI_MAG2] == ELFMAG2 || e_ident[EI_MAG3] == ELFMAG3,
"unexpected ei_mag");
CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
CHECK_VA2FO(e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
e_ident[EI_ABIVERSION] == 3,
"unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
Elf64_Ehdr* ehdr = (Elf64_Ehdr*) buffer.data();
CHECK_VA2FO(ehdr, "ehdr is nullptr");
CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine");
CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr), "buffer is not large enough");
Elf64_Phdr* phdr = (Elf64_Phdr*) ((uint8_t*) buffer.data() + ehdr->e_phoff);
CHECK_VA2FO(phdr, "phdr is nullptr");
for(uint16_t i = 0; i < ehdr->e_phnum; ++i)
{
if(phdr[i].p_type != PT_LOAD) continue;
if(va < phdr[i].p_vaddr || va >= (phdr[i].p_vaddr + phdr[i].p_memsz)) continue;
return va + phdr[i].p_offset - phdr[i].p_vaddr;
}
return std::nullopt;
}
#undef CHECK_VA2FO
#define CHECK_VA2FO(x, msg) \
if(!(x)) \
{ \
std::cerr << __FILE__ << ' ' << __LINE__ << ' ' << msg << std::endl; \
return {}; \
}
std::vector<std::pair<uint64_t, uint64_t>>
DisassemblyInstance::getSegments()
{
CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
uint8_t* e_ident = (uint8_t*) buffer.data();
CHECK_VA2FO(e_ident, "e_ident is nullptr");
CHECK_VA2FO(e_ident[EI_MAG0] == ELFMAG0 || e_ident[EI_MAG1] == ELFMAG1 ||
e_ident[EI_MAG2] == ELFMAG2 || e_ident[EI_MAG3] == ELFMAG3,
"unexpected ei_mag");
CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
CHECK_VA2FO(e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
e_ident[EI_ABIVERSION] == 3,
"unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
Elf64_Ehdr* ehdr = (Elf64_Ehdr*) buffer.data();
CHECK_VA2FO(ehdr, "ehdr is nullptr");
CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine");
CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr), "buffer is not large enough");
Elf64_Phdr* phdr = (Elf64_Phdr*) ((uint8_t*) buffer.data() + ehdr->e_phoff);
CHECK_VA2FO(phdr, "phdr is nullptr");
std::vector<std::pair<uint64_t, uint64_t>> segments;
for(Elf64_Half i = 0; i < ehdr->e_phnum; ++i)
{
if(phdr[i].p_type != PT_LOAD) continue;
segments.push_back({phdr[i].p_vaddr - phdr[i].p_offset, phdr[i].p_memsz});
}
return segments;
}