SWDEV-432445: ATT continuous mode update part2. Added codeobj tracking.

Change-Id: I1b58af70d221bbeb9b4cab960d26357a504045dd


[ROCm/rocprofiler commit: edf93d48ab]
Этот коммит содержится в:
Giovanni LB
2023-11-30 14:16:29 -03:00
коммит произвёл Giovanni Baraldi
родитель 34fd4840d1
Коммит f275cdd602
18 изменённых файлов: 731 добавлений и 289 удалений
+1
Просмотреть файл
@@ -56,6 +56,7 @@ configure_file(trace_view.py att/trace_view.py COPYONLY)
configure_file(stitch.py att/stitch.py COPYONLY)
configure_file(drawing.py att/drawing.py COPYONLY)
configure_file(att_to_csv.py att/att_to_csv.py COPYONLY)
configure_file(service.py att/service.py COPYONLY)
configure_file(ui/index.html att/ui/index.html COPYONLY)
configure_file(ui/logo.svg att/ui/logo.svg COPYONLY)
configure_file(ui/styles.css att/ui/styles.css COPYONLY)
+55 -76
Просмотреть файл
@@ -69,11 +69,8 @@ catch (...) \
return returndata; \
}
code_object_decoder_t::code_object_decoder_t(const char* codeobj_data, uint64_t codeobj_size) {
buffer = std::vector<char>{};
buffer.resize(codeobj_size);
std::memcpy(buffer.data(), codeobj_data, codeobj_size);
CodeObjDecoderComponent::CodeObjDecoderComponent(const char* codeobj_data, uint64_t codeobj_size)
{
m_fd = -1;
#if defined(_GNU_SOURCE) && defined(MFD_ALLOW_SEALING) && defined(MFD_CLOEXEC)
m_fd = ::memfd_create(m_uri.c_str(), MFD_ALLOW_SEALING | MFD_CLOEXEC);
@@ -86,7 +83,7 @@ code_object_decoder_t::code_object_decoder_t(const char* codeobj_data, uint64_t
return;
}
if (size_t size = ::write(m_fd, buffer.data(), buffer.size()); size != buffer.size()) {
if (size_t size = ::write(m_fd, codeobj_data, codeobj_size); size != codeobj_size) {
printf("could not write to the temporary file\n");
return;
}
@@ -130,26 +127,20 @@ code_object_decoder_t::code_object_decoder_t(const char* codeobj_data, uint64_t
// load_symbol_map();
}
try {
disassembly = std::make_unique<DisassemblyInstance>(*this); // Can throw
} catch(std::exception& e) {
return;
}
disassembly = std::make_unique<DisassemblyInstance>(codeobj_data, codeobj_size, std::nullopt); // Can throw
try {
m_symbol_map = disassembly->GetKernelMap(); // Can throw
} catch(std::exception& e) {
return;
}
} catch(...) {}
//disassemble_kernels();
}
code_object_decoder_t::~code_object_decoder_t() {
CodeObjDecoderComponent::~CodeObjDecoderComponent() {
if (m_fd) ::close(m_fd);
}
std::optional<SymbolInfo> code_object_decoder_t::find_symbol(uint64_t vaddr) {
std::optional<SymbolInfo> CodeObjDecoderComponent::find_symbol(uint64_t vaddr) {
/* Load the symbol table. */
auto it = m_symbol_map.upper_bound(vaddr);
if (it == m_symbol_map.begin())
@@ -172,7 +163,7 @@ std::optional<SymbolInfo> code_object_decoder_t::find_symbol(uint64_t vaddr) {
}
std::pair<instruction_instance_t, size_t>
code_object_decoder_t::disassemble_instruction(uint64_t faddr, uint64_t vaddr)
CodeObjDecoderComponent::disassemble_instruction(uint64_t faddr, uint64_t vaddr)
{
if (!disassembly)
throw std::exception();
@@ -191,7 +182,7 @@ code_object_decoder_t::disassemble_instruction(uint64_t faddr, uint64_t vaddr)
return {disassembly->last_instruction, size};
}
void code_object_decoder_t::disassemble_kernel(uint64_t faddr, uint64_t vaddr)
void CodeObjDecoderComponent::disassemble_kernel(uint64_t faddr, uint64_t vaddr)
{
if (!disassembly) return;
auto symbol = find_symbol(vaddr);
@@ -216,17 +207,18 @@ void code_object_decoder_t::disassemble_kernel(uint64_t faddr, uint64_t vaddr)
}
}
void code_object_decoder_t::disassemble_kernels() {
void CodeObjDecoderComponent::disassemble_kernels() {
for (auto& [vaddr, v] : m_symbol_map) disassemble_kernel(v.faddr, vaddr);
}
void code_object_decoder_t::disassemble_single_kernel(uint64_t kaddr) {
void CodeObjDecoderComponent::disassemble_single_kernel(uint64_t kaddr) {
for (auto& [vaddr, v] : m_symbol_map)
if (kaddr >= vaddr && kaddr < vaddr + v.mem_size)
disassemble_kernel(v.faddr, vaddr);
}
CodeobjService::CodeobjService(const char* filepath, uint64_t load_base): load_base(load_base)
CodeobjDecoder::CodeobjDecoder(const char* filepath, uint64_t loadbase, uint64_t mem_size):
loadbase(loadbase), load_end(loadbase + mem_size)
{
if (!filepath) throw "Empty filepath.";
@@ -245,22 +237,23 @@ CodeobjService::CodeobjService(const char* filepath, uint64_t load_base): load_b
file.seekg(0, file.beg);
file.read(buffer.data(), buffer.size());
decoder = std::make_unique<code_object_decoder_t>(buffer.data(), buffer.size());
decoder = std::make_unique<CodeObjDecoderComponent>(buffer.data(), buffer.size());
}
else
{
std::unique_ptr<CodeObjectBinary> binary = std::make_unique<CodeObjectBinary>(filepath);
decoder = std::make_unique<code_object_decoder_t>(binary->buffer.data(), binary->buffer.size());
auto& buffer = binary->buffer;
decoder = std::make_unique<CodeObjDecoderComponent>(buffer.data(), buffer.size());
}
auto elf_segments = decoder->disassembly->getSegments();
}
bool CodeobjService::decode_single(uint64_t vaddr, uint64_t faddr)
bool CodeobjDecoder::add_to_map(uint64_t faddr, uint64_t vaddr, uint64_t voffset)
{
if (!decoder->disassembly) return false;
try
{
decoded_map[vaddr] = decoder->disassemble_instruction(faddr, vaddr-load_base);
decoded_map[vaddr] = decoder->disassemble_instruction(faddr, voffset);
}
catch(std::exception& e)
{
@@ -269,28 +262,27 @@ bool CodeobjService::decode_single(uint64_t vaddr, uint64_t faddr)
return true;
}
std::pair<instruction_instance_t, size_t>& CodeobjService::getDecoded(uint64_t addr)
bool CodeobjDecoder::decode_single_at_offset(uint64_t vaddr, uint64_t voffset)
{
auto faddr = decoder->disassembly->va2fo(voffset);
if (!faddr)
return false;
return add_to_map(*faddr, vaddr, voffset);
}
bool CodeobjDecoder::decode_single(uint64_t vaddr)
{
if (!decoder || vaddr < loadbase) return false;
return decode_single_at_offset(vaddr, vaddr-loadbase);
}
std::pair<instruction_instance_t, size_t>& CodeobjDecoder::getDecoded(uint64_t addr)
{
if (decoded_map.find(addr) != decoded_map.end())
return decoded_map[addr];
std::optional<uint64_t> faddr{};
if (!bNotElfFILE)
{
faddr = DisassemblyInstance::va2fo(decoder->buffer.data(), addr-load_base);
if (!faddr)
bNotElfFILE = true;
}
if (bNotElfFILE && decoder->buffer.size() > 0x100) {
uint64_t f_offset = *reinterpret_cast<uint32_t*>(decoder->buffer.data()+0xb8);
uint64_t v_offset = *reinterpret_cast<uint32_t*>(decoder->buffer.data()+0xc8);
faddr = addr+f_offset-load_base-v_offset;
}
if (!faddr || !decode_single(addr, *faddr))
if (!decode_single(addr))
{
std::cerr << "Invalid addr: " << std::hex << addr << std::dec << std::endl;
throw std::exception();
@@ -299,62 +291,49 @@ std::pair<instruction_instance_t, size_t>& CodeobjService::getDecoded(uint64_t a
return decoded_map[addr];
}
std::unordered_map<uint64_t, std::unique_ptr<CodeobjService>> services{};
std::atomic<uint64_t> shandles{1};
#define PUBLIC_API __attribute__((visibility("default")))
CodeobjTableTranslation table;
extern "C"
{
PUBLIC_API uint64_t createService(const char* filename, uint64_t load_base)
PUBLIC_API int addDecoder(const char* filename, uint32_t id, uint64_t loadbase, uint64_t memsize)
{
C_API_BEGIN
uint64_t handle = shandles.fetch_add(1);
services[handle] = std::make_unique<CodeobjService>(filename, load_base);
return handle;
table.addDecoder(filename, id, loadbase, memsize);
return 0;
C_API_END(0)
C_API_END(1)
}
PUBLIC_API int deleteService(uint64_t handle)
PUBLIC_API int removeDecoder(uint32_t id, uint64_t loadbase)
{
return services.erase(handle);
return table.removeDecoder(id, loadbase) != false;
}
PUBLIC_API const char* getInstruction(uint64_t handle, uint64_t addr)
PUBLIC_API instruction_info_t getInstructionFromAddr(uint64_t vaddr)
{
static instruction_info_t default_info{nullptr, nullptr, 0};
C_API_BEGIN
return services.at(handle)->getInstruction(addr);
return table.get(vaddr);
C_API_END(nullptr)
C_API_END(default_info)
}
PUBLIC_API const char* getCppref(uint64_t handle, uint64_t addr)
{
C_API_BEGIN
return services.at(handle)->getCppref(addr);
C_API_END(nullptr)
}
PUBLIC_API size_t getInstSize(uint64_t handle, uint64_t addr)
PUBLIC_API instruction_info_t getInstructionFromID(uint32_t id, uint64_t offset)
{
static instruction_info_t default_info{nullptr, nullptr, 0};
C_API_BEGIN
return services.at(handle)->getSize(addr);
return table.get(id, offset);
C_API_END(0)
C_API_END(default_info)
}
PUBLIC_API const char* getSymbolName(uint64_t addr)
{
C_API_BEGIN
for (auto& [handle, service] : services)
{
if (!service->inrange(addr)) continue;
return service->getSymbolName(addr);
}
return nullptr;
return table.getSymbolName(addr);
C_API_END(nullptr)
}
}
}
+109 -18
Просмотреть файл
@@ -30,13 +30,15 @@
#include <unordered_map>
#include "disassembly.hpp"
#include "segment.hpp"
class code_object_decoder_t {
class CodeObjDecoderComponent
{
public:
std::optional<SymbolInfo> find_symbol(uint64_t address);
code_object_decoder_t(const char* codeobj_data, uint64_t codeobj_size);
~code_object_decoder_t();
CodeObjDecoderComponent(const char* codeobj_data, uint64_t codeobj_size);
~CodeObjDecoderComponent();
std::pair<instruction_instance_t, size_t>
disassemble_instruction(uint64_t faddr, uint64_t vaddr);
@@ -50,44 +52,133 @@ public:
std::map<uint64_t, SymbolInfo> m_symbol_map{};
std::string m_uri;
std::vector<char> buffer;
std::vector<instruction_instance_t> instructions{};
std::unique_ptr<DisassemblyInstance> disassembly{};
};
class CodeobjService
typedef struct {
const char* inst;
const char* cpp;
size_t size;
} instruction_info_t;
class CodeobjDecoder
{
public:
CodeobjService(const char* filepath, uint64_t load_base);
bool decode_single(uint64_t vaddr, uint64_t faddr);
CodeobjDecoder(const char* filepath, uint64_t loadbase, uint64_t memsize);
bool decode_single(uint64_t vaddr);
bool decode_single_at_offset(uint64_t vaddr, uint64_t voffset);
bool add_to_map(uint64_t faddr, uint64_t vaddr, uint64_t voffset);
std::pair<instruction_instance_t, size_t>& getDecoded(uint64_t addr);
const char* getInstruction(uint64_t addr) { return getDecoded(addr).first.instruction; }
const char* getCppref(uint64_t addr) { return getDecoded(addr).first.cpp_reference; }
size_t getSize(uint64_t addr) { return getDecoded(addr).second; }
uint64_t size() const {
if (!decoder) return 0;
return decoder->buffer.size();
instruction_info_t get(uint64_t addr) {
auto& inst = getDecoded(addr);
return {inst.first.instruction, inst.first.cpp_reference, inst.second};
}
uint64_t begin() const { return load_base; };
uint64_t end() const { return begin() + size(); }
uint64_t begin() const { return loadbase; };
uint64_t end() const { return load_end; }
uint64_t size() const { return load_end-loadbase; }
bool inrange(uint64_t addr) const { return addr >= begin() && addr < end(); }
const char* getSymbolName(uint64_t addr) const {
if (!decoder) return nullptr;
auto it = decoder->m_symbol_map.find(addr-load_base);
auto it = decoder->m_symbol_map.find(addr-loadbase);
if (it != decoder->m_symbol_map.end())
return it->second.name.data();
return nullptr;
}
std::vector<std::pair<uint64_t, uint64_t>> elf_segments{};
private:
const uint64_t load_base;
const uint64_t loadbase;
uint64_t load_end = 0;
std::unordered_map<uint64_t, std::pair<instruction_instance_t, size_t>> decoded_map;
std::unique_ptr<code_object_decoder_t> decoder{nullptr};
bool bNotElfFILE = false;
std::unique_ptr<CodeObjDecoderComponent> decoder{nullptr};
};
/**
* @brief Maps ID and offsets into instructions
*/
class CodeobjList
{
public:
CodeobjList() = default;
virtual void addDecoder(const char* filepath, uint32_t id, uint64_t loadbase, uint64_t memsize)
{
decoders[id] = std::make_shared<CodeobjDecoder>(filepath, loadbase, memsize);
}
virtual bool removeDecoder(uint32_t id)
{
return decoders.erase(id) != 0;
}
instruction_info_t get(uint32_t id, uint64_t offset)
{
auto& decoder = decoders.at(id);
auto& inst = decoder->getDecoded(decoder->begin() + offset);
return {inst.first.instruction, inst.first.cpp_reference, inst.second};
}
const char* getSymbolName(uint32_t id, uint64_t offset)
{
auto& decoder = decoders.at(id);
uint64_t vaddr = decoder->begin() + offset;
if (decoder->inrange(vaddr))
return decoder->getSymbolName(vaddr);
return nullptr;
}
protected:
std::unordered_map<uint32_t, std::shared_ptr<CodeobjDecoder>> decoders{};
};
/**
* @brief Translates virtual addresses to elf file offsets
*/
class CodeobjTableTranslation : protected CodeobjList
{
using Super = CodeobjList;
public:
CodeobjTableTranslation() = default;
void addDecoder(const char* filepath, uint32_t id, uint64_t loadbase, uint64_t memsize) override
{
this->Super::addDecoder(filepath, id, loadbase, memsize);
auto ptr = decoders.at(id);
table.insert({ptr->begin(), static_cast<uint32_t>(ptr->size()), id, 0});
}
virtual bool removeDecoder(uint32_t id, uint64_t loadbase)
{
return table.remove(loadbase) && this->Super::removeDecoder(id);
}
instruction_info_t get(uint64_t vaddr)
{
auto& addr_range = table.find_codeobj_in_range(vaddr);
return get(addr_range.id, vaddr - addr_range.vbegin);
}
instruction_info_t get(uint32_t id, uint64_t offset) { return this->Super::get(id, offset); }
const char* getSymbolName(uint64_t vaddr)
{
for (auto& [_, decoder] : decoders)
{
if (!decoder->inrange(vaddr)) continue;
return decoder->getSymbolName(vaddr);
}
return nullptr;
}
private:
CodeobjTableTranslator table;
};
+122 -54
Просмотреть файл
@@ -122,58 +122,65 @@ CodeObjectBinary::CodeObjectBinary(const std::string& uri) : m_uri(uri) {
});
buffer = std::vector<char>{};
try {
size_t offset{0}, size{0};
size_t offset{0}, size{0};
if (auto offset_it = params.find("offset"); offset_it != params.end()) {
offset = std::stoul(offset_it->second, nullptr, 0);
}
if (auto size_it = params.find("size"); size_it != params.end()) {
if (!(size = std::stoul(size_it->second, nullptr, 0))) return;
}
if (protocol != "file") throw protocol + " protocol not supported!";
std::ifstream file(decoded_path, std::ios::in | std::ios::binary);
if (!file || !file.is_open()) throw "could not open " + decoded_path;
if (!size) {
file.ignore(std::numeric_limits<std::streamsize>::max());
size_t bytes = file.gcount();
file.clear();
if (bytes < offset)
throw "invalid uri " + decoded_path + " (file size < offset)";
size = bytes - offset;
}
file.seekg(offset, std::ios_base::beg);
buffer.resize(size);
file.read(&buffer[0], size);
} catch (...) {
if (auto offset_it = params.find("offset"); offset_it != params.end()) {
offset = std::stoul(offset_it->second, nullptr, 0);
}
if (auto size_it = params.find("size"); size_it != params.end()) {
if (!(size = std::stoul(size_it->second, nullptr, 0))) return;
}
if (protocol != "file") throw protocol + " protocol not supported!";
std::ifstream file(decoded_path, std::ios::in | std::ios::binary);
if (!file || !file.is_open()) throw "could not open " + decoded_path;
if (!size) {
file.ignore(std::numeric_limits<std::streamsize>::max());
size_t bytes = file.gcount();
file.clear();
if (bytes < offset)
throw "invalid uri " + decoded_path + " (file size < offset)";
size = bytes - offset;
}
file.seekg(offset, std::ios_base::beg);
buffer.resize(size);
file.read(&buffer[0], size);
}
DisassemblyInstance::DisassemblyInstance(code_object_decoder_t& decoder)
: buffer(reinterpret_cast<void*>(decoder.buffer.data())),
size(decoder.buffer.size())
DisassemblyInstance::DisassemblyInstance(
const char* codeobj_data,
uint64_t codeobj_size,
std::optional<std::string> input_isa
)
{
THROW_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data));
THROW_COMGR(amd_comgr_set_data(data, size, decoder.buffer.data()));
buffer = std::vector<char>(codeobj_size, 0);
std::memcpy(buffer.data(), codeobj_data, codeobj_size);
/*std::cout << "checking isa" << std::endl;
char isa_name[128];
size_t isa_size = sizeof(isa_name);
CHECK_COMGR(amd_comgr_get_data_isa_name(data, &isa_size, isa_name));
std::cout << isa_name << std::endl; */
const char* isa_name = "amdgcn-amd-amdhsa--gfx1100";
THROW_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data));
THROW_COMGR(amd_comgr_set_data(data, buffer.size(), buffer.data()));
if (!input_isa)
{
input_isa = "amdgcn-amd-amdhsa--gfx1100";
}
if (!input_isa)
{
input_isa = std::string();
input_isa->resize(128);
size_t isa_size = sizeof(input_isa->size());
THROW_COMGR(amd_comgr_get_data_isa_name(data, &isa_size, input_isa->data()));
}
THROW_COMGR(amd_comgr_create_disassembly_info(
isa_name,
input_isa->data(),
&DisassemblyInstance::memory_callback, &DisassemblyInstance::inst_callback,
[](uint64_t address, void* user_data) {}, &info));
}
static bool IsKernelType(amd_comgr_symbol_type_t type)
@@ -208,7 +215,7 @@ amd_comgr_status_t DisassemblyInstance::symbol_callback(amd_comgr_symbol_t symbo
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME, name.data()));
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
std::optional<uint64_t> faddr = va2fo(instance.buffer, vaddr);
std::optional<uint64_t> faddr = instance.va2fo(vaddr);
if (faddr)
instance.symbol_map[vaddr] = {name, *faddr, mem_size};
@@ -230,7 +237,7 @@ DisassemblyInstance::~DisassemblyInstance() {
uint64_t DisassemblyInstance::ReadInstruction(uint64_t faddr, uint64_t vaddr, const char* cpp_line)
{
uint64_t size_read;
uint64_t addr_in_buffer = reinterpret_cast<uint64_t>(buffer) + faddr;
uint64_t addr_in_buffer = reinterpret_cast<uint64_t>(buffer.data()) + faddr;
THROW_COMGR(amd_comgr_disassemble_instruction(info, addr_in_buffer, (void*)this, &size_read));
@@ -242,7 +249,8 @@ uint64_t DisassemblyInstance::ReadInstruction(uint64_t faddr, uint64_t vaddr, co
uint64_t DisassemblyInstance::memory_callback(uint64_t from, char* to, uint64_t size,
void* user_data) {
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
int64_t copysize = reinterpret_cast<int64_t>(instance.buffer) + instance.size - (int64_t)from;
int64_t copysize = reinterpret_cast<int64_t>(instance.buffer.data())
+ instance.buffer.size() - static_cast<int64_t>(from);
copysize = std::min<int64_t>(size, copysize);
std::memcpy(to, (char*)from, copysize);
return copysize;
@@ -261,11 +269,11 @@ void DisassemblyInstance::inst_callback(const char* instruction, void* user_data
// mem - input argument, start of the elf
// va - input argument, virtual address
// return file offset, if found
std::optional<uint64_t> DisassemblyInstance::va2fo(void *mem, uint64_t va)
std::optional<uint64_t> DisassemblyInstance::va2fo(uint64_t va)
{
CHECK_VA2FO(mem, "mem is nullptr");
/*CHECK_VA2FO(buffer.size(), "buffer is not large enough");
uint8_t *e_ident = (uint8_t*)mem;
uint8_t *e_ident = (uint8_t*)buffer.data();
CHECK_VA2FO(e_ident, "e_ident is nullptr");
CHECK_VA2FO(
@@ -277,20 +285,25 @@ std::optional<uint64_t> DisassemblyInstance::va2fo(void *mem, uint64_t va)
CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
CHECK_VA2FO(e_ident[EI_OSABI] == 64 /*ELFOSABI_AMDGPU_HSA*/, "unexpected ei_osabi");
CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
CHECK_VA2FO(
e_ident[EI_ABIVERSION] == 2 /*ELFABIVERSION_AMDGPU_HSA_V4*/ ||
e_ident[EI_ABIVERSION] == 3 /*ELFABIVERSION_AMDGPU_HSA_V5*/ , "unexpected ei_abiversion");
e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
e_ident[EI_ABIVERSION] == 3, "unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)mem;
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Ehdr), "buffer is not large enough");
CHECK_VA2FO(ehdr, "ehdr is nullptr");
CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine");
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine"); */
CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
Elf64_Phdr *phdr = (Elf64_Phdr*)((uint8_t*)mem + ehdr->e_phoff);
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr), "buffer is not large enough");
Elf64_Phdr *phdr = (Elf64_Phdr*)((uint8_t*)buffer.data() + ehdr->e_phoff);
CHECK_VA2FO(phdr, "phdr is nullptr");
for (uint16_t i = 0; i < ehdr->e_phnum; ++i)
@@ -304,3 +317,58 @@ std::optional<uint64_t> DisassemblyInstance::va2fo(void *mem, uint64_t va)
}
return std::nullopt;
}
#undef CHECK_VA2FO
#define CHECK_VA2FO(x, msg) if (!(x)) { \
std::cerr << __FILE__ << ' ' << __LINE__ << ' ' << msg << std::endl; \
return {}; \
}
std::vector<std::pair<uint64_t, uint64_t>> DisassemblyInstance::getSegments()
{
/*CHECK_VA2FO(buffer.size(), "buffer is not large enough");
uint8_t *e_ident = (uint8_t*)buffer.data();
CHECK_VA2FO(e_ident, "e_ident is nullptr");
CHECK_VA2FO(
e_ident[EI_MAG0] == ELFMAG0 ||
e_ident[EI_MAG1] == ELFMAG1 ||
e_ident[EI_MAG2] == ELFMAG2 ||
e_ident[EI_MAG3] == ELFMAG3, "unexpected ei_mag");
CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
CHECK_VA2FO(
e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
e_ident[EI_ABIVERSION] == 3, "unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Ehdr), "buffer is not large enough");
CHECK_VA2FO(ehdr, "ehdr is nullptr");
CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine"); */
CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr), "buffer is not large enough");
Elf64_Phdr *phdr = (Elf64_Phdr*)((uint8_t*)buffer.data() + ehdr->e_phoff);
CHECK_VA2FO(phdr, "phdr is nullptr");
std::vector<std::pair<uint64_t, uint64_t>> segments;
for (Elf64_Half i = 0; i < ehdr->e_phnum; ++i)
{
if (phdr[i].p_type != PT_LOAD)
continue;
segments.push_back({phdr[i].p_vaddr - phdr[i].p_offset, phdr[i].p_memsz});
}
return segments;
}
+10 -5
Просмотреть файл
@@ -20,9 +20,9 @@
#pragma once
#include <amd_comgr/amd_comgr.h>
#include <string>
#include <vector>
#include <amd_comgr/amd_comgr.h>
#include <memory>
#include <limits>
@@ -48,7 +48,11 @@ struct SymbolInfo
class DisassemblyInstance {
public:
DisassemblyInstance(class code_object_decoder_t& decoder);
DisassemblyInstance(
const char* codeobj_data,
uint64_t codeobj_size,
std::optional<std::string> input_isa
);
~DisassemblyInstance();
uint64_t ReadInstruction(uint64_t faddr, uint64_t vaddr, const char* cpp_line);
@@ -57,10 +61,11 @@ class DisassemblyInstance {
static uint64_t memory_callback(uint64_t from, char* to, uint64_t size, void* user_data);
static void inst_callback(const char* instruction, void* user_data);
static amd_comgr_status_t symbol_callback(amd_comgr_symbol_t symbol, void* user_data);
static std::optional<uint64_t> va2fo(void *mem, uint64_t va);
void* buffer;
int64_t size;
std::optional<uint64_t> va2fo(uint64_t va);
std::vector<std::pair<uint64_t, uint64_t>> getSegments();
std::vector<char> buffer;
instruction_instance_t last_instruction;
amd_comgr_disassembly_info_t info;
amd_comgr_data_t data;
+139
Просмотреть файл
@@ -0,0 +1,139 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include <string>
#include <vector>
#include <iostream>
#include <random>
#include <unordered_set>
#include <algorithm>
template<typename Type>
class ordered_vector : public std::vector<Type>
{
using Super = std::vector<Type>;
public:
void insert(const Type& elem)
{
size_t loc = lower_bound(elem.begin());
if (this->size() && get(loc).begin() < elem.begin())
loc ++;
this->Super::insert(this->begin()+loc, elem);
}
bool remove(const Type& elem)
{
if (!this->size()) return false;
size_t loc = lower_bound(elem.begin());
if (get(loc) != elem) return false;
this->Super::erase(this->begin()+loc);
return true;
}
bool remove(uint64_t elem_begin)
{
if (!this->size()) return false;
size_t loc = lower_bound(elem_begin);
if (get(loc).begin() != elem_begin) return false;
this->Super::erase(this->begin()+loc);
return true;
}
size_t lower_bound(size_t addr) const
{
if (!this->size()) return 0;
return binary_search(addr, 0, this->size()-1);
}
size_t binary_search(size_t addr, size_t s, size_t e) const
{
if (s >= e)
return s;
else if (s+1 == e)
return (get(e).begin() <= addr) ? e : s;
size_t mid = (s+e)/2;
if (get(mid).begin() <= addr)
return binary_search(addr, mid, e);
else
return binary_search(addr, s, mid);
}
const Type& get(size_t i) const { return this->operator[](i); }
};
struct address_range_t
{
uint64_t vbegin;
uint32_t size;
uint32_t id;
uint32_t offset;
bool operator<(const address_range_t& other) const { return vbegin < other.vbegin; }
bool inrange(uint64_t addr) const { return addr >= vbegin && addr < vbegin+size; };
uint64_t begin() const { return vbegin; }
};
/**
* @brief Finds a candidate codeobj for the given vaddr
*/
class CodeobjTableTranslator : protected ordered_vector<address_range_t>
{
using Super = ordered_vector<address_range_t>;
public:
CodeobjTableTranslator() { reset(); }
const address_range_t& find_codeobj_in_range(uint64_t addr)
{
if (cached_segment < size() && get(cached_segment).inrange(addr))
return get(cached_segment);
size_t lb = lower_bound(addr);
if (lb >= size() || !get(lb).inrange(addr))
throw std::string("segment addr out of range");
cached_segment = lb;
return get(cached_segment);
}
uint64_t find_codeobj_addr_in_range(uint64_t addr) {
return find_codeobj_in_range(addr).vbegin;
}
const address_range_t& get(size_t index) const { return data()[index]; }
void insert(const address_range_t& elem) { this->Super::insert(elem); }
void insert_list(std::vector<address_range_t> arange)
{
for (auto& elem : arange) push_back(elem);
std::sort(
this->begin(),
this->end(),
[](const address_range_t& a, const address_range_t& b) { return a < b; }
);
};
void reset() { cached_segment = ~0; }
void clear() { reset(); this->Super::clear(); }
bool remove(uint64_t addr) { reset(); return this->Super::remove(addr); }
private:
size_t cached_segment = ~0;
};
+98 -55
Просмотреть файл
@@ -1,95 +1,138 @@
#!/usr/bin/env python3
import sys
if sys.version_info[0] < 3:
raise Exception("Must be using Python 3")
import ctypes
from ctypes import *
import os
pluginpath = '/home/giovanni/Desktop/rocprofiler/build/lib/rocprofiler/libatt_plugin.so'
HEADER_OFFSET = 62
HEADER_MASK = 0x3
ID_OFFSET = 30
ID_MASK = (1<<32)-1
OFFSET_MASK = (1<<30)-1
attplugin = ctypes.CDLL(pluginpath)
pluginpath = '../../../lib/rocprofiler/libatt_plugin.so'
filedir = os.path.dirname(os.path.realpath(__file__))
attplugin = CDLL(os.path.join(filedir, pluginpath))
attplugin.createService.restype = ctypes.c_uint64
attplugin.createService.argtypes = [ctypes.c_char_p, ctypes.c_uint64]
attplugin.deleteService.restype = ctypes.c_int
attplugin.deleteService.argtypes = [ctypes.c_uint64]
attplugin.getInstruction.restype = ctypes.c_char_p
attplugin.getInstruction.argtypes = [ctypes.c_uint64, ctypes.c_uint64]
attplugin.getCppref.restype = ctypes.c_char_p
attplugin.getCppref.argtypes = [ctypes.c_uint64, ctypes.c_uint64]
attplugin.getInstSize.restype = ctypes.c_size_t
attplugin.getInstSize.argtypes = [ctypes.c_uint64, ctypes.c_uint64]
attplugin.getSymbolName.restype = ctypes.c_char_p
attplugin.getSymbolName.argtypes = [ctypes.c_uint64]
attplugin.getSymbolName.restype = c_char_p
attplugin.getSymbolName.argtypes = [c_uint64]
class instruction_info_t(ctypes.Structure):
_fields_ = [('inst', c_char_p),
('cpp', c_char_p),
('size', c_size_t)]
attplugin.getInstructionFromAddr.restype = instruction_info_t
attplugin.getInstructionFromAddr.argtypes = [c_uint64]
attplugin.getInstructionFromID.restype = instruction_info_t
attplugin.getInstructionFromID.argtypes = [c_uint32, c_uint64]
attplugin.addDecoder.restype = c_int
attplugin.addDecoder.argtypes = [c_char_p, c_uint32, c_uint64, c_uint64]
attplugin.removeDecoder.restype = c_int
attplugin.removeDecoder.argtypes = [c_uint32, c_uint64]
def IsRawPC(addr):
return addr >> HEADER_OFFSET == 0
def getID(addr):
return (addr >> ID_OFFSET) & ID_MASK
def getOffset(addr):
return addr & OFFSET_MASK
class CodeobjInstance:
def __init__(self, line, classification_func):
def __init__(self, line):
tokens = line.split(' ')
self.load_base = int(tokens[0], 16)
self.load_end = self.load_base + int(tokens[1], 16)
self.att_marker = int(tokens[2])
self.memsize = int(tokens[1], 16)
self.att_id = int(tokens[2])
self.fpath = tokens[3]
self.handle = attplugin.createService(self.fpath.encode('utf-8'), self.load_base)
self.classifier = classification_func
if self.handle == 0:
encoded = self.fpath.encode('utf-8')
self.error = attplugin.addDecoder(encoded, self.att_id, self.load_base, self.memsize)
if self.error != 0:
print('Warning: Could not open', line)
raise
def release(self):
attplugin.deleteService(self.handle)
def inrange(self, addr):
return addr >= self.load_base and addr < self.load_end+0x1000
def GetInstruction(self, addr):
inst = attplugin.getInstruction(self.handle, addr)
if inst is None:
return None
inst = inst.decode()
while len(inst) and (inst[0] == '\t' or inst[0] == ' '):
inst = inst[1:]
while len(inst) and (inst[-1] == '\t' or inst[-1] == ' '):
inst = inst[:-1]
cpp = attplugin.getCppref(self.handle, addr)
if cpp:
cpp = cpp.decode()
size = attplugin.getInstSize(self.handle, addr)
if size and inst:
return (self.classifier(inst.encode('utf-8'), len(inst)), inst, cpp, size)
return None
attplugin.removeDecoder(self.att_id, self.load_base)
class CodeobjService:
def __init__(self, att_kernel_txt, cfunc) -> None:
def __init__(self, att_kernel_txt, cfunc):
cfunc.restype = ctypes.c_int
cfunc.argtypes = [ctypes.c_char_p, ctypes.c_size_t]
self.classifier = cfunc
self.last_instance = None
self.services = []
self.services = {}
for line in att_kernel_txt:
try:
if 'memory://' == line[0:len('memory://')]:
continue
self.services.append(CodeobjInstance(line, cfunc))
service = CodeobjInstance(line)
self.services[service.att_id] = service
except:
pass
def ToRawPC(self, addr):
if IsRawPC(addr):
return addr
return self.services[getID(addr)].load_base + getOffset(addr)
def release(self):
for _, _, instance in self.services:
for _, instance in self.services.items():
instance.release()
def GetInstruction(self, addr):
if self.last_instance and self.last_instance.inrange(addr):
return self.last_instance.GetInstruction(addr)
if addr >> HEADER_OFFSET != 0:
return self.GetInstructionFromID(getID(addr), getOffset(addr))
else:
return self.GetInstructionFromAddr(addr)
for instance in self.services:
if instance.inrange(addr):
self.last_instance = instance
return instance.GetInstruction(addr)
def GetInstructionFromAddr(self, addr):
info_inst = attplugin.getInstructionFromAddr(addr)
if info_inst.size == 0 or info_inst.inst is None:
return None
inst = info_inst.inst.decode()
cpp = info_inst.cpp
if cpp:
cpp = cpp.decode()
return None
while len(inst) and (inst[0] == '\t' or inst[0] == ' '):
inst = inst[1:]
while len(inst) and (inst[-1] == '\t' or inst[-1] == ' '):
inst = inst[:-1]
return (self.classifier(info_inst.inst, len(inst)), inst, cpp, info_inst.size)
def GetInstructionFromID(self, id, offset):
info_inst = attplugin.getInstructionFromID(id, offset)
if info_inst.size == 0 or info_inst.inst is None:
return None
inst = info_inst.inst.decode()
cpp = info_inst.cpp
if cpp:
cpp = cpp.decode()
else:
cpp = ''
while len(inst) and (inst[0] == '\t' or inst[0] == ' '):
inst = inst[1:]
while len(inst) and (inst[-1] == '\t' or inst[-1] == ' '):
inst = inst[:-1]
return (self.classifier(info_inst.inst, len(inst)), inst, cpp, info_inst.size)
def getSymbolName(self, addr):
name = attplugin.getSymbolName(addr)
name = attplugin.getSymbolName(self.ToRawPC(addr))
if name:
return name.decode()
return "Addr #"+hex(addr)
return "Addr #"+hex(self.ToRawPC(addr))
+18 -4
Просмотреть файл
@@ -186,6 +186,18 @@ class PCTranslator:
def jump(self, as_line):
return self.jump_map[as_line[-3]]
def addsymbol(self, addr):
if addr in self.addrmap:
return
symbol = self.codeservice.getSymbolName(addr)
if symbol is None:
symbol = "Unkown symbol at 0x" + hex(addr)
last_line = self.raw_code[-1]
newline = ['; ' + symbol, 100, last_line[2], 0, last_line[4], last_line[5], -1, 0, 0]
self.raw_code.append(newline)
def getcode(self, addr):
try:
return self.addrmap[addr]
@@ -330,6 +342,7 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
return None
watchlist = PCTranslator(insts, code, raw_code, reverse_map, codeservice)
watchlist.addsymbol(firstinst.cycles)
line = firstinst.cycles
lineincrement = watchlist.getincrement(line)
pcskip.append(0)
@@ -517,8 +530,8 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
insts[i] = insts[i + 1]
insts[i + 1] = temp
next = line
elif "s_waitcnt " in as_line[0] or "_load_" in as_line[0]:
if skipped_immed > 0 and "s_waitcnt " in as_line[0]:
elif "s_waitcnt" in as_line[0] or "_load_" in as_line[0]:
if skipped_immed > 0 and "s_waitcnt" in as_line[0]:
matched = True
skipped_immed -= 1
elif 'scratch_' not in as_line[0]:
@@ -530,11 +543,12 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
result.append(inst)
i += 1
num_failed_stitches = 0
elif not bGFX9 and inst.type == IMMED and line != next:
elif inst.type == IMMED and line != next and (not bGFX9 or 's_barrier' in as_line[0]):
skipped_immed += 1
inst.asmline = reverse_map[line]
result.append(inst)
next = line
if 's_barrier' in as_line[0]:
next = line + lineincrement
i += 1
else:
num_failed_stitches += 1
+20 -27
Просмотреть файл
@@ -682,13 +682,7 @@
const token_id = "token" + scroll_to
const token = d3.select('#'+token_id).node()
// scrolling not consistent without the timer
setTimeout(() => {
token.scrollIntoView({behavior: "smooth", inline: "start"})
}, 800)
const inView = isInViewport(src_line)
if (!inView) {
src_line.scrollIntoView({behavior: "smooth", block: "start"})
}
src_line.scrollIntoView({behavior: "smooth", block: "start"})
}
})
.text((d,i) => {
@@ -952,28 +946,27 @@
.attr("fill", (d) => { return INST_TYPE[d[1]][1] })
.style("cursor", "pointer")
.datum((d) => { return {data:d} })
.on('mouseenter', (d) => {
if (d) {
try {
const token = d3.select(d.toElement)
last_color = token.attr('fill')
token.attr('fill', '#e0115f')
let token_data = token.datum().data
d3.select(".highlight") .classed("highlight", false)
d3.select('.line_' + token_data[4]) .classed("highlight", true)
src_line = d3.select('.line_' + token_data[4]).node()
const inView = isInViewport(src_line)
if (!inView) {
src_line.scrollIntoView({behavior: "smooth", block: "start"})
}
} catch {}
}
.on('mouseenter', (e, d) => {
try {
d3.select('.line_' + d.data[4]).classed("highlight", true)
d3.select(".highlight") .classed("highlight", true)
} catch {}
})
.on("mouseleave", (d) => {
if (d) {
d3.select(d.fromElement) .attr("fill", last_color)
.on("mouseleave", (e, d) => {
try {
d3.select('.line_' + d.data[4]).classed("highlight", false)
d3.select(".highlight") .classed("highlight", false)
}
} catch {}
})
.on('click', (e, d) => {
try {
d3.select('.line_' + d.data[4]).classed("highlight", true)
src_line = d3.select('.line_' + d.data[4]).node()
const inView = isInViewport(src_line)
if (!inView) {
src_line.scrollIntoView({behavior: "smooth", block: "start"})
}
} catch {}
})
.append("svg:title")
.text((d) => {
+4 -5
Просмотреть файл
@@ -3,11 +3,10 @@ global: rocprofiler_plugin_initialize;
rocprofiler_plugin_finalize;
rocprofiler_plugin_write_buffer_records;
rocprofiler_plugin_write_record;
createService;
deleteService;
getInstruction;
getCppref;
getInstSize;
addDecoder;
removeDecoder;
getInstructionFromAddr;
getInstructionFromID;
getSymbolName;
local: *;
};
+1 -1
Просмотреть файл
@@ -503,7 +503,7 @@ hsa_status_t CodeObjectCallback(hsa_executable_t executable,
if (data.codeobj.unload)
codeobj_capture_instance::Unload(data.codeobj.load_base);
else
codeobj_capture_instance::Load(data.codeobj.load_base,
codeobj_capture_instance::Load(data.codeobj.load_base, data.codeobj.load_size,
uri_str, data.codeobj.memory_base, data.codeobj.memory_size);
hsa_executable_iterate_agent_symbols(executable, data.codeobj.agent,
+6 -9
Просмотреть файл
@@ -18,8 +18,6 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
//#define HSA_ATT_MARKER_ENABLE
#include "packets_generator.h"
#include "src/api/rocprofiler_singleton.h"
@@ -618,10 +616,13 @@ hsa_ven_amd_aqlprofile_profile_t* GenerateATTPackets(
// Generate ATT tracer marker packets. Also generate and return
// the descriptor object which has the PM4 buffer for inserting data
hsa_ven_amd_aqlprofile_descriptor_t
GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_t data)
hsa_ven_amd_aqlprofile_descriptor_t GenerateATTMarkerPackets(
hsa_agent_t gpu_agent,
packet_t& marker_packet,
uint32_t data,
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
)
{
#ifdef HSA_ATT_MARKER_ENABLE
// Preparing the profile structure to get the packets
auto pool = rocprofiler::HSASupport_Singleton::GetInstance()
.GetHSAAgentInfo(gpu_agent.handle)
@@ -635,7 +636,6 @@ GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_
{}, desc
};
hsa_ven_amd_aqlprofile_att_marker_channel_t channel = HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2;
hsa_status_t status = hsa_ven_amd_aqlprofile_att_marker(&profile, &marker_packet, data, channel);
if (status != HSA_STATUS_SUCCESS)
{
@@ -645,9 +645,6 @@ GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_
}
return desc;
#else
return {nullptr,0};
#endif
}
void AddVendorSpecificPacket(const packet_t* packet,
+6 -1
Просмотреть файл
@@ -61,7 +61,12 @@ hsa_ven_amd_aqlprofile_profile_t* GenerateATTPackets(
packet_t* stop_packet, size_t att_buffer_size);
hsa_ven_amd_aqlprofile_descriptor_t
GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_t data);
GenerateATTMarkerPackets(
hsa_agent_t gpu_agent,
packet_t& marker_packet,
uint32_t data,
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
);
uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_pool_t* cpu_pool);
+6 -4
Просмотреть файл
@@ -48,7 +48,7 @@ std::mutex codeobj_record::mutex;
std::unordered_map<uint64_t, CodeobjPtr> codeobj_record::codeobjs{};
std::unordered_map<uint64_t, codeobj_record::RecordInstance> codeobj_record::record_id_map{};
std::unordered_set<codeobj_record*> codeobj_record::listeners;
std::atomic<uint32_t> codeobj_capture_instance::loadcount{0};
std::atomic<uint32_t> codeobj_capture_instance::eventcount{0};
// Codeobj Record
codeobj_record::codeobj_record(rocprofiler_codeobj_capture_mode_t mode) : capture_mode(mode){};
@@ -71,16 +71,17 @@ void codeobj_record::stop_capture() {
// Codeobj Capture
void codeobj_capture_instance::Load(
uint64_t addr,
uint64_t load_size,
const std::string& URI,
uint64_t mem_addr,
uint64_t size
uint64_t mem_size
) {
uint32_t id = loadcount.fetch_add(1, std::memory_order_relaxed)+1;
uint32_t id = eventcount.fetch_add(1, std::memory_order_relaxed)+1;
auto time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
std::lock_guard<std::mutex> lock(codeobj_record::mutex);
auto inst = std::make_shared<codeobj_capture_instance>(addr, URI, mem_addr, size, time, id);
auto inst = std::make_shared<codeobj_capture_instance>(addr, load_size, URI, mem_addr, mem_size, time, id);
codeobj_record::codeobjs[addr] = inst;
for (auto* listen : codeobj_record::listeners) listen->addcapture(inst);
}
@@ -90,6 +91,7 @@ void codeobj_capture_instance::Unload(uint64_t addr) {
if (codeobj_record::codeobjs.find(addr) == codeobj_record::codeobjs.end()) return;
eventcount.fetch_add(1, std::memory_order_relaxed)+1;
auto time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
codeobj_record::codeobjs.at(addr)->end_time = time;
codeobj_record::codeobjs.erase(addr);
+21 -7
Просмотреть файл
@@ -34,30 +34,44 @@
*/
class codeobj_capture_instance {
public:
codeobj_capture_instance(uint64_t _addr, const std::string& _uri, uint64_t mem_addr,
uint64_t mem_size, uint64_t start_time, uint32_t id)
: addr(_addr), start_time(start_time), URI(_uri),
codeobj_capture_instance(
uint64_t _addr,
uint64_t _load_size,
const std::string& _uri,
uint64_t mem_addr,
uint64_t mem_size,
uint64_t start_time,
uint32_t id
)
: addr(_addr), load_size(_load_size), start_time(start_time), URI(_uri),
mem_addr(mem_addr), mem_size(mem_size), load_id(id) {};
void setmode(rocprofiler_codeobj_capture_mode_t mode);
rocprofiler_intercepted_codeobj_t get() const {
const char* buf_ptr = buffer.size() ? buffer.data() : nullptr;
return {URI.c_str(), addr, mem_size, buf_ptr, buffer.size(), start_time, end_time, load_id};
return {URI.c_str(), addr, load_size, buf_ptr, buffer.size(), start_time, end_time, load_id};
};
const uint64_t addr;
const uint64_t load_size;
const uint64_t start_time;
const uint32_t load_id;
static void Load(uint64_t addr, const std::string& URI, uint64_t mem_addr, uint64_t mem_size);
static void Load(
uint64_t addr,
uint64_t load_size,
const std::string& URI,
uint64_t mem_addr,
uint64_t mem_size
);
static void Unload(uint64_t addr);
static uint32_t GetLoadCount() { return loadcount.load(std::memory_order_relaxed); }
static uint32_t GetEventCount() { return eventcount.load(std::memory_order_relaxed); }
private:
//! 32 bits ID because this is the natural channel width for ATT Markers.
//! There is no world in which 4 billions markers can be sent anyway.
static std::atomic<uint32_t> loadcount;
static std::atomic<uint32_t> eventcount;
void reset(rocprofiler_codeobj_capture_mode_t mode);
std::pair<size_t, size_t> parse_uri();
+15 -1
Просмотреть файл
@@ -91,10 +91,21 @@ public:
);
void InsertMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
uint32_t data,
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
);
void InsertUnloadMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
uint32_t data
);
void InsertLoadMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
rocprofiler_intercepted_codeobj_t codeobj
);
void SetParameters(const std::vector<rocprofiler_att_parameter_t>& params) {
att_parameters_data = params;
@@ -125,7 +136,7 @@ protected:
static std::mutex att_enable_disable_mutex;
private:
uint32_t codeobj_load_cnt = 0;
uint32_t codeobj_event_cnt = 0;
static void AddAttRecord(
rocprofiler_record_att_tracer_t* record,
@@ -176,6 +187,9 @@ private:
std::mutex sessions_pending_signals_lock_;
std::map<uint32_t, std::vector<att_pending_signal_t>> sessions_pending_signals_;
rocprofiler_record_id_t capture_id;
std::unordered_set<uint32_t> active_capture_event_ids;
};
} // namespace att
+100 -20
Просмотреть файл
@@ -27,19 +27,71 @@
#define __NR_gettid 186
#define ATT_MARKER_HEADER_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0
#define ATT_MARKER_SIZE_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1
#define ATT_MARKER_LO_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2
#define ATT_MARKER_HI_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
enum rocprofiler_att_marker_type_t {
ROCPROFILER_ATT_MARKER_LOAD = 0,
ROCPROFILER_ATT_MARKER_UNLOAD = 1
};
union att_header_marker_t
{
uint32_t raw;
struct {
uint32_t type : 2;
uint32_t id : 30;
};
};
namespace rocprofiler {
namespace att {
void AttTracer::InsertMarker(
void AttTracer::InsertUnloadMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
uint32_t data
) {
att_header_marker_t header{.raw = 0};
header.type = ROCPROFILER_ATT_MARKER_UNLOAD;
header.id = data;
hsa_ven_amd_aqlprofile_att_marker_channel_t channel = ATT_MARKER_HEADER_CHANNEL;
this->InsertMarker(transformed_packets, agent, header.raw, channel);
}
void AttTracer::InsertLoadMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
rocprofiler_intercepted_codeobj_t codeobj
) {
this->InsertMarker(transformed_packets, agent, codeobj.mem_size, ATT_MARKER_SIZE_CHANNEL);
uint64_t addr = codeobj.base_address;
this->InsertMarker(transformed_packets, agent, addr & ((1ul << 32)-1), ATT_MARKER_LO_CHANNEL);
this->InsertMarker(transformed_packets, agent, addr >> 32, ATT_MARKER_HI_CHANNEL);
att_header_marker_t header{.raw = 0};
header.type = ROCPROFILER_ATT_MARKER_LOAD;
header.id = codeobj.att_marker_id;
this->InsertMarker(transformed_packets, agent, header.raw, ATT_MARKER_HEADER_CHANNEL);
}
void AttTracer::InsertMarker(
std::vector<packet_t>& transformed_packets,
hsa_agent_t agent,
uint32_t data,
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
) {
packet_t marker_packet{};
auto desc = Packet::GenerateATTMarkerPackets(agent, marker_packet, data);
auto desc = Packet::GenerateATTMarkerPackets(agent, marker_packet, data, channel);
if (desc.ptr && desc.size)
Packet::AddVendorSpecificPacket(&marker_packet, &transformed_packets, hsa_signal_t{.handle = 0});
else
rocprofiler::warning("Could not add ATT Marker");
}
@@ -63,25 +115,26 @@ bool AttTracer::ATTContiguousWriteInterceptor(
// att start
// Getting Queue Data and Information
auto agent_handle = queue_info.GetGPUAgent().handle;
rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance()
.GetHSAAgentInfo(queue_info.GetGPUAgent().handle);
.GetHSAAgentInfo(agent_handle);
auto dispatchPackets = Packet::ExtractDispatchPackets(packets, pkt_count);
if (dispatchPackets.size() == 0) return false;
size_t writer_id = WRITER_ID.fetch_add(dispatchPackets.size(), std::memory_order_relaxed);
uint32_t new_load_cnt = codeobj_capture_instance::GetLoadCount();
uint32_t new_load_cnt = codeobj_capture_instance::GetEventCount();
auto bInsertStart = RequiresStartPacket(writer_id, dispatchPackets.size());
{
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
// If att_start already exists, don't start again
auto agent_pending_packets = pending_stop_packets.find(queue_info.GetGPUAgent().handle);
auto agent_pending_packets = pending_stop_packets.find(agent_handle);
if (agent_pending_packets != pending_stop_packets.end())
bInsertStart = {};
// If nothing will be added or removed, return
if (!bInsertStart && codeobj_load_cnt == new_load_cnt)
if (!bInsertStart && codeobj_event_cnt == new_load_cnt)
{
if (
agent_pending_packets == pending_stop_packets.end() ||
@@ -107,7 +160,7 @@ bool AttTracer::ATTContiguousWriteInterceptor(
}
uint64_t IsGFX9 = HSASupport_Singleton::GetInstance()
.GetHSAAgentInfo(queue_info.GetGPUAgent().handle)
.GetHSAAgentInfo(agent_handle)
.GetDeviceInfo()
.getName()
.find("gfx9") != std::string::npos;
@@ -134,33 +187,58 @@ bool AttTracer::ATTContiguousWriteInterceptor(
0
);
codeobj_record::make_capture(rocprofiler_record_id_t{record_id}, capturem, IsGFX9);
codeobj_record::start_capture(rocprofiler_record_id_t{record_id});
this->capture_id = rocprofiler_record_id_t{record_id};
codeobj_record::make_capture(this->capture_id, capturem, IsGFX9);
codeobj_record::start_capture(this->capture_id);
stop_packet.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
pending_stop_packets[queue_info.GetGPUAgent().handle]
pending_stop_packets[agent_handle]
= {record_id, writer_id, bInsertStart->second, session_id_, stop_packet};
}
if (codeobj_load_cnt != new_load_cnt)
bool bHasPending = false;
{
codeobj_load_cnt = new_load_cnt;
InsertMarker(transformed_packets, queue_info.GetGPUAgent(), new_load_cnt);
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
bHasPending = pending_stop_packets.find(agent_handle) != pending_stop_packets.end();
}
if (bHasPending && (bInsertStart || codeobj_event_cnt != new_load_cnt))
{
codeobj_event_cnt = new_load_cnt;
auto symbols = codeobj_record::get_capture(this->capture_id);
std::unordered_set<uint32_t> current_ids;
for (size_t s=0; s<symbols.count; s++)
current_ids.insert(symbols.symbols[s].att_marker_id);
for (uint32_t prev_id : active_capture_event_ids)
if (current_ids.find(prev_id) == current_ids.end())
InsertUnloadMarker(transformed_packets, queue_info.GetGPUAgent(), prev_id);
for (size_t s=0; s<symbols.count; s++)
{
auto& symbol = symbols.symbols[s];
if (active_capture_event_ids.find(symbol.att_marker_id) == active_capture_event_ids.end())
InsertLoadMarker(transformed_packets, queue_info.GetGPUAgent(), symbol);
}
active_capture_event_ids = std::move(current_ids);
}
// Searching across all the packets given during this write
for (size_t i = 0; i < pkt_count; ++i)
transformed_packets.emplace_back(packets_arr[i]);
if (bHasPending)
{
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
auto agent_pending_packets = pending_stop_packets.find(queue_info.GetGPUAgent().handle);
auto agent_pending_packets = pending_stop_packets.at(agent_handle);
if (agent_pending_packets != pending_stop_packets.end() &&
agent_pending_packets->second.last_kernel_exec <= writer_id + dispatchPackets.size()
) {
const ATTRecordSignal& rsignal = agent_pending_packets->second;
if (agent_pending_packets.last_kernel_exec <= writer_id + dispatchPackets.size())
{
const ATTRecordSignal& rsignal = agent_pending_packets;
// Adding a barrier packet with the original packet's completion signal.
hsa_signal_t interrupt_signal;
CreateSignal(0, &interrupt_signal);
@@ -180,8 +258,10 @@ bool AttTracer::ATTContiguousWriteInterceptor(
interrupt_signal
});
codeobj_record::stop_capture(rocprofiler_record_id_t{rsignal.record_id});
pending_stop_packets.erase(queue_info.GetGPUAgent().handle);
//codeobj_record::stop_capture(rocprofiler_record_id_t{rsignal.record_id});
codeobj_record::stop_capture(this->capture_id);
active_capture_event_ids.clear();
pending_stop_packets.erase(agent_handle);
}
}
-2
Просмотреть файл
@@ -316,10 +316,8 @@ hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
api->hsa_ven_amd_aqlprofile_iterate_data =
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym(
handle, "hsa_ven_amd_aqlprofile_iterate_data");
#ifdef HSA_ATT_MARKER_ENABLE
api->hsa_ven_amd_aqlprofile_att_marker = (decltype(::hsa_ven_amd_aqlprofile_att_marker)*)
dlsym(handle, "hsa_ven_amd_aqlprofile_att_marker");
#endif
return HSA_STATUS_SUCCESS;
}