SWDEV-432445: ATT continuous mode update part2. Added codeobj tracking.
Change-Id: I1b58af70d221bbeb9b4cab960d26357a504045dd
[ROCm/rocprofiler commit: edf93d48ab]
Этот коммит содержится в:
коммит произвёл
Giovanni Baraldi
родитель
34fd4840d1
Коммит
f275cdd602
@@ -56,6 +56,7 @@ configure_file(trace_view.py att/trace_view.py COPYONLY)
|
||||
configure_file(stitch.py att/stitch.py COPYONLY)
|
||||
configure_file(drawing.py att/drawing.py COPYONLY)
|
||||
configure_file(att_to_csv.py att/att_to_csv.py COPYONLY)
|
||||
configure_file(service.py att/service.py COPYONLY)
|
||||
configure_file(ui/index.html att/ui/index.html COPYONLY)
|
||||
configure_file(ui/logo.svg att/ui/logo.svg COPYONLY)
|
||||
configure_file(ui/styles.css att/ui/styles.css COPYONLY)
|
||||
|
||||
@@ -69,11 +69,8 @@ catch (...) \
|
||||
return returndata; \
|
||||
}
|
||||
|
||||
code_object_decoder_t::code_object_decoder_t(const char* codeobj_data, uint64_t codeobj_size) {
|
||||
buffer = std::vector<char>{};
|
||||
buffer.resize(codeobj_size);
|
||||
std::memcpy(buffer.data(), codeobj_data, codeobj_size);
|
||||
|
||||
CodeObjDecoderComponent::CodeObjDecoderComponent(const char* codeobj_data, uint64_t codeobj_size)
|
||||
{
|
||||
m_fd = -1;
|
||||
#if defined(_GNU_SOURCE) && defined(MFD_ALLOW_SEALING) && defined(MFD_CLOEXEC)
|
||||
m_fd = ::memfd_create(m_uri.c_str(), MFD_ALLOW_SEALING | MFD_CLOEXEC);
|
||||
@@ -86,7 +83,7 @@ code_object_decoder_t::code_object_decoder_t(const char* codeobj_data, uint64_t
|
||||
return;
|
||||
}
|
||||
|
||||
if (size_t size = ::write(m_fd, buffer.data(), buffer.size()); size != buffer.size()) {
|
||||
if (size_t size = ::write(m_fd, codeobj_data, codeobj_size); size != codeobj_size) {
|
||||
printf("could not write to the temporary file\n");
|
||||
return;
|
||||
}
|
||||
@@ -130,26 +127,20 @@ code_object_decoder_t::code_object_decoder_t(const char* codeobj_data, uint64_t
|
||||
// load_symbol_map();
|
||||
}
|
||||
|
||||
try {
|
||||
disassembly = std::make_unique<DisassemblyInstance>(*this); // Can throw
|
||||
} catch(std::exception& e) {
|
||||
return;
|
||||
}
|
||||
disassembly = std::make_unique<DisassemblyInstance>(codeobj_data, codeobj_size, std::nullopt); // Can throw
|
||||
try {
|
||||
m_symbol_map = disassembly->GetKernelMap(); // Can throw
|
||||
} catch(std::exception& e) {
|
||||
return;
|
||||
}
|
||||
} catch(...) {}
|
||||
|
||||
//disassemble_kernels();
|
||||
}
|
||||
|
||||
|
||||
code_object_decoder_t::~code_object_decoder_t() {
|
||||
CodeObjDecoderComponent::~CodeObjDecoderComponent() {
|
||||
if (m_fd) ::close(m_fd);
|
||||
}
|
||||
|
||||
std::optional<SymbolInfo> code_object_decoder_t::find_symbol(uint64_t vaddr) {
|
||||
std::optional<SymbolInfo> CodeObjDecoderComponent::find_symbol(uint64_t vaddr) {
|
||||
/* Load the symbol table. */
|
||||
auto it = m_symbol_map.upper_bound(vaddr);
|
||||
if (it == m_symbol_map.begin())
|
||||
@@ -172,7 +163,7 @@ std::optional<SymbolInfo> code_object_decoder_t::find_symbol(uint64_t vaddr) {
|
||||
}
|
||||
|
||||
std::pair<instruction_instance_t, size_t>
|
||||
code_object_decoder_t::disassemble_instruction(uint64_t faddr, uint64_t vaddr)
|
||||
CodeObjDecoderComponent::disassemble_instruction(uint64_t faddr, uint64_t vaddr)
|
||||
{
|
||||
if (!disassembly)
|
||||
throw std::exception();
|
||||
@@ -191,7 +182,7 @@ code_object_decoder_t::disassemble_instruction(uint64_t faddr, uint64_t vaddr)
|
||||
return {disassembly->last_instruction, size};
|
||||
}
|
||||
|
||||
void code_object_decoder_t::disassemble_kernel(uint64_t faddr, uint64_t vaddr)
|
||||
void CodeObjDecoderComponent::disassemble_kernel(uint64_t faddr, uint64_t vaddr)
|
||||
{
|
||||
if (!disassembly) return;
|
||||
auto symbol = find_symbol(vaddr);
|
||||
@@ -216,17 +207,18 @@ void code_object_decoder_t::disassemble_kernel(uint64_t faddr, uint64_t vaddr)
|
||||
}
|
||||
}
|
||||
|
||||
void code_object_decoder_t::disassemble_kernels() {
|
||||
void CodeObjDecoderComponent::disassemble_kernels() {
|
||||
for (auto& [vaddr, v] : m_symbol_map) disassemble_kernel(v.faddr, vaddr);
|
||||
}
|
||||
|
||||
void code_object_decoder_t::disassemble_single_kernel(uint64_t kaddr) {
|
||||
void CodeObjDecoderComponent::disassemble_single_kernel(uint64_t kaddr) {
|
||||
for (auto& [vaddr, v] : m_symbol_map)
|
||||
if (kaddr >= vaddr && kaddr < vaddr + v.mem_size)
|
||||
disassemble_kernel(v.faddr, vaddr);
|
||||
}
|
||||
|
||||
CodeobjService::CodeobjService(const char* filepath, uint64_t load_base): load_base(load_base)
|
||||
CodeobjDecoder::CodeobjDecoder(const char* filepath, uint64_t loadbase, uint64_t mem_size):
|
||||
loadbase(loadbase), load_end(loadbase + mem_size)
|
||||
{
|
||||
if (!filepath) throw "Empty filepath.";
|
||||
|
||||
@@ -245,22 +237,23 @@ CodeobjService::CodeobjService(const char* filepath, uint64_t load_base): load_b
|
||||
file.seekg(0, file.beg);
|
||||
file.read(buffer.data(), buffer.size());
|
||||
|
||||
decoder = std::make_unique<code_object_decoder_t>(buffer.data(), buffer.size());
|
||||
decoder = std::make_unique<CodeObjDecoderComponent>(buffer.data(), buffer.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
std::unique_ptr<CodeObjectBinary> binary = std::make_unique<CodeObjectBinary>(filepath);
|
||||
decoder = std::make_unique<code_object_decoder_t>(binary->buffer.data(), binary->buffer.size());
|
||||
auto& buffer = binary->buffer;
|
||||
decoder = std::make_unique<CodeObjDecoderComponent>(buffer.data(), buffer.size());
|
||||
}
|
||||
|
||||
auto elf_segments = decoder->disassembly->getSegments();
|
||||
}
|
||||
|
||||
bool CodeobjService::decode_single(uint64_t vaddr, uint64_t faddr)
|
||||
bool CodeobjDecoder::add_to_map(uint64_t faddr, uint64_t vaddr, uint64_t voffset)
|
||||
{
|
||||
if (!decoder->disassembly) return false;
|
||||
|
||||
try
|
||||
{
|
||||
decoded_map[vaddr] = decoder->disassemble_instruction(faddr, vaddr-load_base);
|
||||
decoded_map[vaddr] = decoder->disassemble_instruction(faddr, voffset);
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
@@ -269,28 +262,27 @@ bool CodeobjService::decode_single(uint64_t vaddr, uint64_t faddr)
|
||||
return true;
|
||||
}
|
||||
|
||||
std::pair<instruction_instance_t, size_t>& CodeobjService::getDecoded(uint64_t addr)
|
||||
bool CodeobjDecoder::decode_single_at_offset(uint64_t vaddr, uint64_t voffset)
|
||||
{
|
||||
auto faddr = decoder->disassembly->va2fo(voffset);
|
||||
if (!faddr)
|
||||
return false;
|
||||
|
||||
return add_to_map(*faddr, vaddr, voffset);
|
||||
}
|
||||
|
||||
bool CodeobjDecoder::decode_single(uint64_t vaddr)
|
||||
{
|
||||
if (!decoder || vaddr < loadbase) return false;
|
||||
return decode_single_at_offset(vaddr, vaddr-loadbase);
|
||||
}
|
||||
|
||||
std::pair<instruction_instance_t, size_t>& CodeobjDecoder::getDecoded(uint64_t addr)
|
||||
{
|
||||
if (decoded_map.find(addr) != decoded_map.end())
|
||||
return decoded_map[addr];
|
||||
|
||||
std::optional<uint64_t> faddr{};
|
||||
|
||||
if (!bNotElfFILE)
|
||||
{
|
||||
faddr = DisassemblyInstance::va2fo(decoder->buffer.data(), addr-load_base);
|
||||
if (!faddr)
|
||||
bNotElfFILE = true;
|
||||
}
|
||||
|
||||
if (bNotElfFILE && decoder->buffer.size() > 0x100) {
|
||||
uint64_t f_offset = *reinterpret_cast<uint32_t*>(decoder->buffer.data()+0xb8);
|
||||
uint64_t v_offset = *reinterpret_cast<uint32_t*>(decoder->buffer.data()+0xc8);
|
||||
|
||||
faddr = addr+f_offset-load_base-v_offset;
|
||||
}
|
||||
|
||||
if (!faddr || !decode_single(addr, *faddr))
|
||||
if (!decode_single(addr))
|
||||
{
|
||||
std::cerr << "Invalid addr: " << std::hex << addr << std::dec << std::endl;
|
||||
throw std::exception();
|
||||
@@ -299,62 +291,49 @@ std::pair<instruction_instance_t, size_t>& CodeobjService::getDecoded(uint64_t a
|
||||
return decoded_map[addr];
|
||||
}
|
||||
|
||||
std::unordered_map<uint64_t, std::unique_ptr<CodeobjService>> services{};
|
||||
std::atomic<uint64_t> shandles{1};
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
|
||||
CodeobjTableTranslation table;
|
||||
|
||||
extern "C"
|
||||
{
|
||||
PUBLIC_API uint64_t createService(const char* filename, uint64_t load_base)
|
||||
PUBLIC_API int addDecoder(const char* filename, uint32_t id, uint64_t loadbase, uint64_t memsize)
|
||||
{
|
||||
C_API_BEGIN
|
||||
|
||||
uint64_t handle = shandles.fetch_add(1);
|
||||
services[handle] = std::make_unique<CodeobjService>(filename, load_base);
|
||||
return handle;
|
||||
table.addDecoder(filename, id, loadbase, memsize);
|
||||
return 0;
|
||||
|
||||
C_API_END(0)
|
||||
C_API_END(1)
|
||||
}
|
||||
PUBLIC_API int deleteService(uint64_t handle)
|
||||
PUBLIC_API int removeDecoder(uint32_t id, uint64_t loadbase)
|
||||
{
|
||||
return services.erase(handle);
|
||||
return table.removeDecoder(id, loadbase) != false;
|
||||
}
|
||||
PUBLIC_API const char* getInstruction(uint64_t handle, uint64_t addr)
|
||||
PUBLIC_API instruction_info_t getInstructionFromAddr(uint64_t vaddr)
|
||||
{
|
||||
static instruction_info_t default_info{nullptr, nullptr, 0};
|
||||
C_API_BEGIN
|
||||
|
||||
return services.at(handle)->getInstruction(addr);
|
||||
return table.get(vaddr);
|
||||
|
||||
C_API_END(nullptr)
|
||||
C_API_END(default_info)
|
||||
}
|
||||
PUBLIC_API const char* getCppref(uint64_t handle, uint64_t addr)
|
||||
{
|
||||
C_API_BEGIN
|
||||
|
||||
return services.at(handle)->getCppref(addr);
|
||||
|
||||
C_API_END(nullptr)
|
||||
}
|
||||
PUBLIC_API size_t getInstSize(uint64_t handle, uint64_t addr)
|
||||
PUBLIC_API instruction_info_t getInstructionFromID(uint32_t id, uint64_t offset)
|
||||
{
|
||||
static instruction_info_t default_info{nullptr, nullptr, 0};
|
||||
C_API_BEGIN
|
||||
|
||||
return services.at(handle)->getSize(addr);
|
||||
return table.get(id, offset);
|
||||
|
||||
C_API_END(0)
|
||||
C_API_END(default_info)
|
||||
}
|
||||
PUBLIC_API const char* getSymbolName(uint64_t addr)
|
||||
{
|
||||
C_API_BEGIN
|
||||
|
||||
for (auto& [handle, service] : services)
|
||||
{
|
||||
if (!service->inrange(addr)) continue;
|
||||
return service->getSymbolName(addr);
|
||||
}
|
||||
return nullptr;
|
||||
return table.getSymbolName(addr);
|
||||
|
||||
C_API_END(nullptr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,13 +30,15 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include "disassembly.hpp"
|
||||
#include "segment.hpp"
|
||||
|
||||
class code_object_decoder_t {
|
||||
class CodeObjDecoderComponent
|
||||
{
|
||||
public:
|
||||
std::optional<SymbolInfo> find_symbol(uint64_t address);
|
||||
|
||||
code_object_decoder_t(const char* codeobj_data, uint64_t codeobj_size);
|
||||
~code_object_decoder_t();
|
||||
CodeObjDecoderComponent(const char* codeobj_data, uint64_t codeobj_size);
|
||||
~CodeObjDecoderComponent();
|
||||
|
||||
std::pair<instruction_instance_t, size_t>
|
||||
disassemble_instruction(uint64_t faddr, uint64_t vaddr);
|
||||
@@ -50,44 +52,133 @@ public:
|
||||
std::map<uint64_t, SymbolInfo> m_symbol_map{};
|
||||
|
||||
std::string m_uri;
|
||||
std::vector<char> buffer;
|
||||
std::vector<instruction_instance_t> instructions{};
|
||||
std::unique_ptr<DisassemblyInstance> disassembly{};
|
||||
};
|
||||
|
||||
class CodeobjService
|
||||
typedef struct {
|
||||
const char* inst;
|
||||
const char* cpp;
|
||||
size_t size;
|
||||
} instruction_info_t;
|
||||
|
||||
class CodeobjDecoder
|
||||
{
|
||||
public:
|
||||
CodeobjService(const char* filepath, uint64_t load_base);
|
||||
bool decode_single(uint64_t vaddr, uint64_t faddr);
|
||||
CodeobjDecoder(const char* filepath, uint64_t loadbase, uint64_t memsize);
|
||||
bool decode_single(uint64_t vaddr);
|
||||
bool decode_single_at_offset(uint64_t vaddr, uint64_t voffset);
|
||||
bool add_to_map(uint64_t faddr, uint64_t vaddr, uint64_t voffset);
|
||||
|
||||
std::pair<instruction_instance_t, size_t>& getDecoded(uint64_t addr);
|
||||
const char* getInstruction(uint64_t addr) { return getDecoded(addr).first.instruction; }
|
||||
const char* getCppref(uint64_t addr) { return getDecoded(addr).first.cpp_reference; }
|
||||
size_t getSize(uint64_t addr) { return getDecoded(addr).second; }
|
||||
|
||||
uint64_t size() const {
|
||||
if (!decoder) return 0;
|
||||
return decoder->buffer.size();
|
||||
instruction_info_t get(uint64_t addr) {
|
||||
auto& inst = getDecoded(addr);
|
||||
return {inst.first.instruction, inst.first.cpp_reference, inst.second};
|
||||
}
|
||||
uint64_t begin() const { return load_base; };
|
||||
uint64_t end() const { return begin() + size(); }
|
||||
|
||||
uint64_t begin() const { return loadbase; };
|
||||
uint64_t end() const { return load_end; }
|
||||
uint64_t size() const { return load_end-loadbase; }
|
||||
bool inrange(uint64_t addr) const { return addr >= begin() && addr < end(); }
|
||||
|
||||
const char* getSymbolName(uint64_t addr) const {
|
||||
if (!decoder) return nullptr;
|
||||
|
||||
auto it = decoder->m_symbol_map.find(addr-load_base);
|
||||
auto it = decoder->m_symbol_map.find(addr-loadbase);
|
||||
if (it != decoder->m_symbol_map.end())
|
||||
return it->second.name.data();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<std::pair<uint64_t, uint64_t>> elf_segments{};
|
||||
|
||||
private:
|
||||
const uint64_t load_base;
|
||||
const uint64_t loadbase;
|
||||
uint64_t load_end = 0;
|
||||
|
||||
std::unordered_map<uint64_t, std::pair<instruction_instance_t, size_t>> decoded_map;
|
||||
std::unique_ptr<code_object_decoder_t> decoder{nullptr};
|
||||
|
||||
bool bNotElfFILE = false;
|
||||
std::unique_ptr<CodeObjDecoderComponent> decoder{nullptr};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Maps ID and offsets into instructions
|
||||
*/
|
||||
class CodeobjList
|
||||
{
|
||||
public:
|
||||
CodeobjList() = default;
|
||||
|
||||
virtual void addDecoder(const char* filepath, uint32_t id, uint64_t loadbase, uint64_t memsize)
|
||||
{
|
||||
decoders[id] = std::make_shared<CodeobjDecoder>(filepath, loadbase, memsize);
|
||||
}
|
||||
|
||||
virtual bool removeDecoder(uint32_t id)
|
||||
{
|
||||
return decoders.erase(id) != 0;
|
||||
}
|
||||
|
||||
instruction_info_t get(uint32_t id, uint64_t offset)
|
||||
{
|
||||
auto& decoder = decoders.at(id);
|
||||
auto& inst = decoder->getDecoded(decoder->begin() + offset);
|
||||
return {inst.first.instruction, inst.first.cpp_reference, inst.second};
|
||||
}
|
||||
|
||||
const char* getSymbolName(uint32_t id, uint64_t offset)
|
||||
{
|
||||
auto& decoder = decoders.at(id);
|
||||
uint64_t vaddr = decoder->begin() + offset;
|
||||
if (decoder->inrange(vaddr))
|
||||
return decoder->getSymbolName(vaddr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::unordered_map<uint32_t, std::shared_ptr<CodeobjDecoder>> decoders{};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Translates virtual addresses to elf file offsets
|
||||
*/
|
||||
class CodeobjTableTranslation : protected CodeobjList
|
||||
{
|
||||
using Super = CodeobjList;
|
||||
public:
|
||||
CodeobjTableTranslation() = default;
|
||||
|
||||
void addDecoder(const char* filepath, uint32_t id, uint64_t loadbase, uint64_t memsize) override
|
||||
{
|
||||
this->Super::addDecoder(filepath, id, loadbase, memsize);
|
||||
auto ptr = decoders.at(id);
|
||||
table.insert({ptr->begin(), static_cast<uint32_t>(ptr->size()), id, 0});
|
||||
}
|
||||
|
||||
virtual bool removeDecoder(uint32_t id, uint64_t loadbase)
|
||||
{
|
||||
return table.remove(loadbase) && this->Super::removeDecoder(id);
|
||||
}
|
||||
|
||||
instruction_info_t get(uint64_t vaddr)
|
||||
{
|
||||
auto& addr_range = table.find_codeobj_in_range(vaddr);
|
||||
return get(addr_range.id, vaddr - addr_range.vbegin);
|
||||
}
|
||||
instruction_info_t get(uint32_t id, uint64_t offset) { return this->Super::get(id, offset); }
|
||||
|
||||
const char* getSymbolName(uint64_t vaddr)
|
||||
{
|
||||
for (auto& [_, decoder] : decoders)
|
||||
{
|
||||
if (!decoder->inrange(vaddr)) continue;
|
||||
return decoder->getSymbolName(vaddr);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
CodeobjTableTranslator table;
|
||||
};
|
||||
|
||||
@@ -122,58 +122,65 @@ CodeObjectBinary::CodeObjectBinary(const std::string& uri) : m_uri(uri) {
|
||||
});
|
||||
|
||||
buffer = std::vector<char>{};
|
||||
try {
|
||||
size_t offset{0}, size{0};
|
||||
size_t offset{0}, size{0};
|
||||
|
||||
if (auto offset_it = params.find("offset"); offset_it != params.end()) {
|
||||
offset = std::stoul(offset_it->second, nullptr, 0);
|
||||
}
|
||||
|
||||
if (auto size_it = params.find("size"); size_it != params.end()) {
|
||||
if (!(size = std::stoul(size_it->second, nullptr, 0))) return;
|
||||
}
|
||||
|
||||
if (protocol != "file") throw protocol + " protocol not supported!";
|
||||
|
||||
std::ifstream file(decoded_path, std::ios::in | std::ios::binary);
|
||||
if (!file || !file.is_open()) throw "could not open " + decoded_path;
|
||||
|
||||
if (!size) {
|
||||
file.ignore(std::numeric_limits<std::streamsize>::max());
|
||||
size_t bytes = file.gcount();
|
||||
file.clear();
|
||||
|
||||
if (bytes < offset)
|
||||
throw "invalid uri " + decoded_path + " (file size < offset)";
|
||||
|
||||
size = bytes - offset;
|
||||
}
|
||||
|
||||
file.seekg(offset, std::ios_base::beg);
|
||||
buffer.resize(size);
|
||||
file.read(&buffer[0], size);
|
||||
} catch (...) {
|
||||
if (auto offset_it = params.find("offset"); offset_it != params.end()) {
|
||||
offset = std::stoul(offset_it->second, nullptr, 0);
|
||||
}
|
||||
|
||||
if (auto size_it = params.find("size"); size_it != params.end()) {
|
||||
if (!(size = std::stoul(size_it->second, nullptr, 0))) return;
|
||||
}
|
||||
|
||||
if (protocol != "file") throw protocol + " protocol not supported!";
|
||||
|
||||
std::ifstream file(decoded_path, std::ios::in | std::ios::binary);
|
||||
if (!file || !file.is_open()) throw "could not open " + decoded_path;
|
||||
|
||||
if (!size) {
|
||||
file.ignore(std::numeric_limits<std::streamsize>::max());
|
||||
size_t bytes = file.gcount();
|
||||
file.clear();
|
||||
|
||||
if (bytes < offset)
|
||||
throw "invalid uri " + decoded_path + " (file size < offset)";
|
||||
|
||||
size = bytes - offset;
|
||||
}
|
||||
|
||||
file.seekg(offset, std::ios_base::beg);
|
||||
buffer.resize(size);
|
||||
file.read(&buffer[0], size);
|
||||
}
|
||||
|
||||
DisassemblyInstance::DisassemblyInstance(code_object_decoder_t& decoder)
|
||||
: buffer(reinterpret_cast<void*>(decoder.buffer.data())),
|
||||
size(decoder.buffer.size())
|
||||
DisassemblyInstance::DisassemblyInstance(
|
||||
const char* codeobj_data,
|
||||
uint64_t codeobj_size,
|
||||
std::optional<std::string> input_isa
|
||||
)
|
||||
{
|
||||
THROW_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data));
|
||||
THROW_COMGR(amd_comgr_set_data(data, size, decoder.buffer.data()));
|
||||
buffer = std::vector<char>(codeobj_size, 0);
|
||||
std::memcpy(buffer.data(), codeobj_data, codeobj_size);
|
||||
|
||||
/*std::cout << "checking isa" << std::endl;
|
||||
char isa_name[128];
|
||||
size_t isa_size = sizeof(isa_name);
|
||||
CHECK_COMGR(amd_comgr_get_data_isa_name(data, &isa_size, isa_name));
|
||||
std::cout << isa_name << std::endl; */
|
||||
const char* isa_name = "amdgcn-amd-amdhsa--gfx1100";
|
||||
THROW_COMGR(amd_comgr_create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &data));
|
||||
THROW_COMGR(amd_comgr_set_data(data, buffer.size(), buffer.data()));
|
||||
if (!input_isa)
|
||||
{
|
||||
input_isa = "amdgcn-amd-amdhsa--gfx1100";
|
||||
}
|
||||
if (!input_isa)
|
||||
{
|
||||
input_isa = std::string();
|
||||
input_isa->resize(128);
|
||||
size_t isa_size = sizeof(input_isa->size());
|
||||
THROW_COMGR(amd_comgr_get_data_isa_name(data, &isa_size, input_isa->data()));
|
||||
}
|
||||
|
||||
THROW_COMGR(amd_comgr_create_disassembly_info(
|
||||
isa_name,
|
||||
input_isa->data(),
|
||||
&DisassemblyInstance::memory_callback, &DisassemblyInstance::inst_callback,
|
||||
[](uint64_t address, void* user_data) {}, &info));
|
||||
|
||||
}
|
||||
|
||||
static bool IsKernelType(amd_comgr_symbol_type_t type)
|
||||
@@ -208,7 +215,7 @@ amd_comgr_status_t DisassemblyInstance::symbol_callback(amd_comgr_symbol_t symbo
|
||||
RETURN_COMGR(amd_comgr_symbol_get_info(symbol, AMD_COMGR_SYMBOL_INFO_NAME, name.data()));
|
||||
|
||||
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
|
||||
std::optional<uint64_t> faddr = va2fo(instance.buffer, vaddr);
|
||||
std::optional<uint64_t> faddr = instance.va2fo(vaddr);
|
||||
|
||||
if (faddr)
|
||||
instance.symbol_map[vaddr] = {name, *faddr, mem_size};
|
||||
@@ -230,7 +237,7 @@ DisassemblyInstance::~DisassemblyInstance() {
|
||||
uint64_t DisassemblyInstance::ReadInstruction(uint64_t faddr, uint64_t vaddr, const char* cpp_line)
|
||||
{
|
||||
uint64_t size_read;
|
||||
uint64_t addr_in_buffer = reinterpret_cast<uint64_t>(buffer) + faddr;
|
||||
uint64_t addr_in_buffer = reinterpret_cast<uint64_t>(buffer.data()) + faddr;
|
||||
|
||||
THROW_COMGR(amd_comgr_disassemble_instruction(info, addr_in_buffer, (void*)this, &size_read));
|
||||
|
||||
@@ -242,7 +249,8 @@ uint64_t DisassemblyInstance::ReadInstruction(uint64_t faddr, uint64_t vaddr, co
|
||||
uint64_t DisassemblyInstance::memory_callback(uint64_t from, char* to, uint64_t size,
|
||||
void* user_data) {
|
||||
DisassemblyInstance& instance = *static_cast<DisassemblyInstance*>(user_data);
|
||||
int64_t copysize = reinterpret_cast<int64_t>(instance.buffer) + instance.size - (int64_t)from;
|
||||
int64_t copysize = reinterpret_cast<int64_t>(instance.buffer.data())
|
||||
+ instance.buffer.size() - static_cast<int64_t>(from);
|
||||
copysize = std::min<int64_t>(size, copysize);
|
||||
std::memcpy(to, (char*)from, copysize);
|
||||
return copysize;
|
||||
@@ -261,11 +269,11 @@ void DisassemblyInstance::inst_callback(const char* instruction, void* user_data
|
||||
// mem - input argument, start of the elf
|
||||
// va - input argument, virtual address
|
||||
// return file offset, if found
|
||||
std::optional<uint64_t> DisassemblyInstance::va2fo(void *mem, uint64_t va)
|
||||
std::optional<uint64_t> DisassemblyInstance::va2fo(uint64_t va)
|
||||
{
|
||||
CHECK_VA2FO(mem, "mem is nullptr");
|
||||
/*CHECK_VA2FO(buffer.size(), "buffer is not large enough");
|
||||
|
||||
uint8_t *e_ident = (uint8_t*)mem;
|
||||
uint8_t *e_ident = (uint8_t*)buffer.data();
|
||||
CHECK_VA2FO(e_ident, "e_ident is nullptr");
|
||||
|
||||
CHECK_VA2FO(
|
||||
@@ -277,20 +285,25 @@ std::optional<uint64_t> DisassemblyInstance::va2fo(void *mem, uint64_t va)
|
||||
CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
|
||||
CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
|
||||
CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
|
||||
CHECK_VA2FO(e_ident[EI_OSABI] == 64 /*ELFOSABI_AMDGPU_HSA*/, "unexpected ei_osabi");
|
||||
CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
|
||||
|
||||
CHECK_VA2FO(
|
||||
e_ident[EI_ABIVERSION] == 2 /*ELFABIVERSION_AMDGPU_HSA_V4*/ ||
|
||||
e_ident[EI_ABIVERSION] == 3 /*ELFABIVERSION_AMDGPU_HSA_V5*/ , "unexpected ei_abiversion");
|
||||
e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
|
||||
e_ident[EI_ABIVERSION] == 3, "unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
|
||||
|
||||
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)mem;
|
||||
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
|
||||
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Ehdr), "buffer is not large enough");
|
||||
CHECK_VA2FO(ehdr, "ehdr is nullptr");
|
||||
CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
|
||||
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine");
|
||||
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine"); */
|
||||
|
||||
CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
|
||||
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
|
||||
CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
|
||||
|
||||
Elf64_Phdr *phdr = (Elf64_Phdr*)((uint8_t*)mem + ehdr->e_phoff);
|
||||
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr), "buffer is not large enough");
|
||||
|
||||
Elf64_Phdr *phdr = (Elf64_Phdr*)((uint8_t*)buffer.data() + ehdr->e_phoff);
|
||||
CHECK_VA2FO(phdr, "phdr is nullptr");
|
||||
|
||||
for (uint16_t i = 0; i < ehdr->e_phnum; ++i)
|
||||
@@ -304,3 +317,58 @@ std::optional<uint64_t> DisassemblyInstance::va2fo(void *mem, uint64_t va)
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
#undef CHECK_VA2FO
|
||||
#define CHECK_VA2FO(x, msg) if (!(x)) { \
|
||||
std::cerr << __FILE__ << ' ' << __LINE__ << ' ' << msg << std::endl; \
|
||||
return {}; \
|
||||
}
|
||||
|
||||
std::vector<std::pair<uint64_t, uint64_t>> DisassemblyInstance::getSegments()
|
||||
{
|
||||
/*CHECK_VA2FO(buffer.size(), "buffer is not large enough");
|
||||
|
||||
uint8_t *e_ident = (uint8_t*)buffer.data();
|
||||
CHECK_VA2FO(e_ident, "e_ident is nullptr");
|
||||
|
||||
CHECK_VA2FO(
|
||||
e_ident[EI_MAG0] == ELFMAG0 ||
|
||||
e_ident[EI_MAG1] == ELFMAG1 ||
|
||||
e_ident[EI_MAG2] == ELFMAG2 ||
|
||||
e_ident[EI_MAG3] == ELFMAG3, "unexpected ei_mag");
|
||||
|
||||
CHECK_VA2FO(e_ident[EI_CLASS] == ELFCLASS64, "unexpected ei_class");
|
||||
CHECK_VA2FO(e_ident[EI_DATA] == ELFDATA2LSB, "unexpected ei_data");
|
||||
CHECK_VA2FO(e_ident[EI_VERSION] == EV_CURRENT, "unexpected ei_version");
|
||||
CHECK_VA2FO(e_ident[EI_OSABI] == 64, "unexpected ei_osabi"); // ELFOSABI_AMDGPU_HSA
|
||||
|
||||
CHECK_VA2FO(
|
||||
e_ident[EI_ABIVERSION] == 2 || // ELFABIVERSION_AMDGPU_HSA_V4
|
||||
e_ident[EI_ABIVERSION] == 3, "unexpected ei_abiversion"); // ELFABIVERSION_AMDGPU_HSA_V5
|
||||
|
||||
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
|
||||
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Ehdr), "buffer is not large enough");
|
||||
CHECK_VA2FO(ehdr, "ehdr is nullptr");
|
||||
CHECK_VA2FO(ehdr->e_type == ET_DYN, "unexpected e_type");
|
||||
CHECK_VA2FO(ehdr->e_machine == ELF::EM_AMDGPU, "unexpected e_machine"); */
|
||||
|
||||
CHECK_VA2FO(buffer.size() > sizeof(Elf64_Ehdr), "buffer is not large enough");
|
||||
Elf64_Ehdr *ehdr = (Elf64_Ehdr*)buffer.data();
|
||||
CHECK_VA2FO(ehdr->e_phoff != 0, "unexpected e_phoff");
|
||||
|
||||
CHECK_VA2FO(buffer.size() > ehdr->e_phoff + sizeof(Elf64_Phdr), "buffer is not large enough");
|
||||
|
||||
Elf64_Phdr *phdr = (Elf64_Phdr*)((uint8_t*)buffer.data() + ehdr->e_phoff);
|
||||
CHECK_VA2FO(phdr, "phdr is nullptr");
|
||||
|
||||
std::vector<std::pair<uint64_t, uint64_t>> segments;
|
||||
for (Elf64_Half i = 0; i < ehdr->e_phnum; ++i)
|
||||
{
|
||||
if (phdr[i].p_type != PT_LOAD)
|
||||
continue;
|
||||
|
||||
segments.push_back({phdr[i].p_vaddr - phdr[i].p_offset, phdr[i].p_memsz});
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
@@ -20,9 +20,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <amd_comgr/amd_comgr.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <amd_comgr/amd_comgr.h>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
|
||||
@@ -48,7 +48,11 @@ struct SymbolInfo
|
||||
|
||||
class DisassemblyInstance {
|
||||
public:
|
||||
DisassemblyInstance(class code_object_decoder_t& decoder);
|
||||
DisassemblyInstance(
|
||||
const char* codeobj_data,
|
||||
uint64_t codeobj_size,
|
||||
std::optional<std::string> input_isa
|
||||
);
|
||||
~DisassemblyInstance();
|
||||
|
||||
uint64_t ReadInstruction(uint64_t faddr, uint64_t vaddr, const char* cpp_line);
|
||||
@@ -57,10 +61,11 @@ class DisassemblyInstance {
|
||||
static uint64_t memory_callback(uint64_t from, char* to, uint64_t size, void* user_data);
|
||||
static void inst_callback(const char* instruction, void* user_data);
|
||||
static amd_comgr_status_t symbol_callback(amd_comgr_symbol_t symbol, void* user_data);
|
||||
static std::optional<uint64_t> va2fo(void *mem, uint64_t va);
|
||||
|
||||
void* buffer;
|
||||
int64_t size;
|
||||
std::optional<uint64_t> va2fo(uint64_t va);
|
||||
std::vector<std::pair<uint64_t, uint64_t>> getSegments();
|
||||
|
||||
std::vector<char> buffer;
|
||||
instruction_instance_t last_instruction;
|
||||
amd_comgr_disassembly_info_t info;
|
||||
amd_comgr_data_t data;
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
#include <unordered_set>
|
||||
#include <algorithm>
|
||||
|
||||
template<typename Type>
|
||||
class ordered_vector : public std::vector<Type>
|
||||
{
|
||||
using Super = std::vector<Type>;
|
||||
public:
|
||||
void insert(const Type& elem)
|
||||
{
|
||||
size_t loc = lower_bound(elem.begin());
|
||||
if (this->size() && get(loc).begin() < elem.begin())
|
||||
loc ++;
|
||||
this->Super::insert(this->begin()+loc, elem);
|
||||
}
|
||||
bool remove(const Type& elem)
|
||||
{
|
||||
if (!this->size()) return false;
|
||||
size_t loc = lower_bound(elem.begin());
|
||||
if (get(loc) != elem) return false;
|
||||
|
||||
this->Super::erase(this->begin()+loc);
|
||||
return true;
|
||||
}
|
||||
bool remove(uint64_t elem_begin)
|
||||
{
|
||||
if (!this->size()) return false;
|
||||
size_t loc = lower_bound(elem_begin);
|
||||
if (get(loc).begin() != elem_begin) return false;
|
||||
|
||||
this->Super::erase(this->begin()+loc);
|
||||
return true;
|
||||
}
|
||||
size_t lower_bound(size_t addr) const
|
||||
{
|
||||
if (!this->size()) return 0;
|
||||
return binary_search(addr, 0, this->size()-1);
|
||||
}
|
||||
|
||||
size_t binary_search(size_t addr, size_t s, size_t e) const
|
||||
{
|
||||
if (s >= e)
|
||||
return s;
|
||||
else if (s+1 == e)
|
||||
return (get(e).begin() <= addr) ? e : s;
|
||||
|
||||
size_t mid = (s+e)/2;
|
||||
if (get(mid).begin() <= addr)
|
||||
return binary_search(addr, mid, e);
|
||||
else
|
||||
return binary_search(addr, s, mid);
|
||||
}
|
||||
const Type& get(size_t i) const { return this->operator[](i); }
|
||||
};
|
||||
|
||||
struct address_range_t
|
||||
{
|
||||
uint64_t vbegin;
|
||||
uint32_t size;
|
||||
uint32_t id;
|
||||
uint32_t offset;
|
||||
|
||||
bool operator<(const address_range_t& other) const { return vbegin < other.vbegin; }
|
||||
bool inrange(uint64_t addr) const { return addr >= vbegin && addr < vbegin+size; };
|
||||
uint64_t begin() const { return vbegin; }
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* @brief Finds a candidate codeobj for the given vaddr
|
||||
*/
|
||||
class CodeobjTableTranslator : protected ordered_vector<address_range_t>
|
||||
{
|
||||
using Super = ordered_vector<address_range_t>;
|
||||
public:
|
||||
CodeobjTableTranslator() { reset(); }
|
||||
|
||||
const address_range_t& find_codeobj_in_range(uint64_t addr)
|
||||
{
|
||||
if (cached_segment < size() && get(cached_segment).inrange(addr))
|
||||
return get(cached_segment);
|
||||
|
||||
size_t lb = lower_bound(addr);
|
||||
if (lb >= size() || !get(lb).inrange(addr))
|
||||
throw std::string("segment addr out of range");
|
||||
|
||||
cached_segment = lb;
|
||||
return get(cached_segment);
|
||||
}
|
||||
|
||||
uint64_t find_codeobj_addr_in_range(uint64_t addr) {
|
||||
return find_codeobj_in_range(addr).vbegin;
|
||||
}
|
||||
|
||||
const address_range_t& get(size_t index) const { return data()[index]; }
|
||||
|
||||
void insert(const address_range_t& elem) { this->Super::insert(elem); }
|
||||
void insert_list(std::vector<address_range_t> arange)
|
||||
{
|
||||
for (auto& elem : arange) push_back(elem);
|
||||
std::sort(
|
||||
this->begin(),
|
||||
this->end(),
|
||||
[](const address_range_t& a, const address_range_t& b) { return a < b; }
|
||||
);
|
||||
};
|
||||
|
||||
void reset() { cached_segment = ~0; }
|
||||
void clear() { reset(); this->Super::clear(); }
|
||||
bool remove(uint64_t addr) { reset(); return this->Super::remove(addr); }
|
||||
|
||||
private:
|
||||
size_t cached_segment = ~0;
|
||||
};
|
||||
@@ -1,95 +1,138 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
raise Exception("Must be using Python 3")
|
||||
|
||||
import ctypes
|
||||
from ctypes import *
|
||||
import os
|
||||
|
||||
pluginpath = '/home/giovanni/Desktop/rocprofiler/build/lib/rocprofiler/libatt_plugin.so'
|
||||
HEADER_OFFSET = 62
|
||||
HEADER_MASK = 0x3
|
||||
ID_OFFSET = 30
|
||||
ID_MASK = (1<<32)-1
|
||||
OFFSET_MASK = (1<<30)-1
|
||||
|
||||
attplugin = ctypes.CDLL(pluginpath)
|
||||
pluginpath = '../../../lib/rocprofiler/libatt_plugin.so'
|
||||
filedir = os.path.dirname(os.path.realpath(__file__))
|
||||
attplugin = CDLL(os.path.join(filedir, pluginpath))
|
||||
|
||||
attplugin.createService.restype = ctypes.c_uint64
|
||||
attplugin.createService.argtypes = [ctypes.c_char_p, ctypes.c_uint64]
|
||||
attplugin.deleteService.restype = ctypes.c_int
|
||||
attplugin.deleteService.argtypes = [ctypes.c_uint64]
|
||||
attplugin.getInstruction.restype = ctypes.c_char_p
|
||||
attplugin.getInstruction.argtypes = [ctypes.c_uint64, ctypes.c_uint64]
|
||||
attplugin.getCppref.restype = ctypes.c_char_p
|
||||
attplugin.getCppref.argtypes = [ctypes.c_uint64, ctypes.c_uint64]
|
||||
attplugin.getInstSize.restype = ctypes.c_size_t
|
||||
attplugin.getInstSize.argtypes = [ctypes.c_uint64, ctypes.c_uint64]
|
||||
attplugin.getSymbolName.restype = ctypes.c_char_p
|
||||
attplugin.getSymbolName.argtypes = [ctypes.c_uint64]
|
||||
attplugin.getSymbolName.restype = c_char_p
|
||||
attplugin.getSymbolName.argtypes = [c_uint64]
|
||||
|
||||
class instruction_info_t(ctypes.Structure):
|
||||
_fields_ = [('inst', c_char_p),
|
||||
('cpp', c_char_p),
|
||||
('size', c_size_t)]
|
||||
|
||||
attplugin.getInstructionFromAddr.restype = instruction_info_t
|
||||
attplugin.getInstructionFromAddr.argtypes = [c_uint64]
|
||||
|
||||
attplugin.getInstructionFromID.restype = instruction_info_t
|
||||
attplugin.getInstructionFromID.argtypes = [c_uint32, c_uint64]
|
||||
|
||||
attplugin.addDecoder.restype = c_int
|
||||
attplugin.addDecoder.argtypes = [c_char_p, c_uint32, c_uint64, c_uint64]
|
||||
|
||||
attplugin.removeDecoder.restype = c_int
|
||||
attplugin.removeDecoder.argtypes = [c_uint32, c_uint64]
|
||||
|
||||
def IsRawPC(addr):
|
||||
return addr >> HEADER_OFFSET == 0
|
||||
|
||||
def getID(addr):
|
||||
return (addr >> ID_OFFSET) & ID_MASK
|
||||
|
||||
def getOffset(addr):
|
||||
return addr & OFFSET_MASK
|
||||
|
||||
class CodeobjInstance:
|
||||
def __init__(self, line, classification_func):
|
||||
def __init__(self, line):
|
||||
tokens = line.split(' ')
|
||||
self.load_base = int(tokens[0], 16)
|
||||
self.load_end = self.load_base + int(tokens[1], 16)
|
||||
self.att_marker = int(tokens[2])
|
||||
self.memsize = int(tokens[1], 16)
|
||||
self.att_id = int(tokens[2])
|
||||
self.fpath = tokens[3]
|
||||
|
||||
self.handle = attplugin.createService(self.fpath.encode('utf-8'), self.load_base)
|
||||
self.classifier = classification_func
|
||||
|
||||
if self.handle == 0:
|
||||
encoded = self.fpath.encode('utf-8')
|
||||
self.error = attplugin.addDecoder(encoded, self.att_id, self.load_base, self.memsize)
|
||||
if self.error != 0:
|
||||
print('Warning: Could not open', line)
|
||||
raise
|
||||
|
||||
def release(self):
|
||||
attplugin.deleteService(self.handle)
|
||||
|
||||
def inrange(self, addr):
|
||||
return addr >= self.load_base and addr < self.load_end+0x1000
|
||||
|
||||
def GetInstruction(self, addr):
|
||||
inst = attplugin.getInstruction(self.handle, addr)
|
||||
if inst is None:
|
||||
return None
|
||||
inst = inst.decode()
|
||||
while len(inst) and (inst[0] == '\t' or inst[0] == ' '):
|
||||
inst = inst[1:]
|
||||
while len(inst) and (inst[-1] == '\t' or inst[-1] == ' '):
|
||||
inst = inst[:-1]
|
||||
cpp = attplugin.getCppref(self.handle, addr)
|
||||
if cpp:
|
||||
cpp = cpp.decode()
|
||||
size = attplugin.getInstSize(self.handle, addr)
|
||||
if size and inst:
|
||||
return (self.classifier(inst.encode('utf-8'), len(inst)), inst, cpp, size)
|
||||
return None
|
||||
attplugin.removeDecoder(self.att_id, self.load_base)
|
||||
|
||||
|
||||
class CodeobjService:
|
||||
def __init__(self, att_kernel_txt, cfunc) -> None:
|
||||
def __init__(self, att_kernel_txt, cfunc):
|
||||
cfunc.restype = ctypes.c_int
|
||||
cfunc.argtypes = [ctypes.c_char_p, ctypes.c_size_t]
|
||||
|
||||
self.classifier = cfunc
|
||||
self.last_instance = None
|
||||
self.services = []
|
||||
self.services = {}
|
||||
for line in att_kernel_txt:
|
||||
try:
|
||||
if 'memory://' == line[0:len('memory://')]:
|
||||
continue
|
||||
self.services.append(CodeobjInstance(line, cfunc))
|
||||
service = CodeobjInstance(line)
|
||||
self.services[service.att_id] = service
|
||||
except:
|
||||
pass
|
||||
|
||||
def ToRawPC(self, addr):
|
||||
if IsRawPC(addr):
|
||||
return addr
|
||||
return self.services[getID(addr)].load_base + getOffset(addr)
|
||||
|
||||
def release(self):
|
||||
for _, _, instance in self.services:
|
||||
for _, instance in self.services.items():
|
||||
instance.release()
|
||||
|
||||
def GetInstruction(self, addr):
|
||||
if self.last_instance and self.last_instance.inrange(addr):
|
||||
return self.last_instance.GetInstruction(addr)
|
||||
if addr >> HEADER_OFFSET != 0:
|
||||
return self.GetInstructionFromID(getID(addr), getOffset(addr))
|
||||
else:
|
||||
return self.GetInstructionFromAddr(addr)
|
||||
|
||||
for instance in self.services:
|
||||
if instance.inrange(addr):
|
||||
self.last_instance = instance
|
||||
return instance.GetInstruction(addr)
|
||||
def GetInstructionFromAddr(self, addr):
|
||||
info_inst = attplugin.getInstructionFromAddr(addr)
|
||||
if info_inst.size == 0 or info_inst.inst is None:
|
||||
return None
|
||||
inst = info_inst.inst.decode()
|
||||
cpp = info_inst.cpp
|
||||
if cpp:
|
||||
cpp = cpp.decode()
|
||||
|
||||
return None
|
||||
while len(inst) and (inst[0] == '\t' or inst[0] == ' '):
|
||||
inst = inst[1:]
|
||||
while len(inst) and (inst[-1] == '\t' or inst[-1] == ' '):
|
||||
inst = inst[:-1]
|
||||
|
||||
return (self.classifier(info_inst.inst, len(inst)), inst, cpp, info_inst.size)
|
||||
|
||||
def GetInstructionFromID(self, id, offset):
|
||||
info_inst = attplugin.getInstructionFromID(id, offset)
|
||||
if info_inst.size == 0 or info_inst.inst is None:
|
||||
return None
|
||||
inst = info_inst.inst.decode()
|
||||
cpp = info_inst.cpp
|
||||
if cpp:
|
||||
cpp = cpp.decode()
|
||||
else:
|
||||
cpp = ''
|
||||
|
||||
while len(inst) and (inst[0] == '\t' or inst[0] == ' '):
|
||||
inst = inst[1:]
|
||||
while len(inst) and (inst[-1] == '\t' or inst[-1] == ' '):
|
||||
inst = inst[:-1]
|
||||
|
||||
return (self.classifier(info_inst.inst, len(inst)), inst, cpp, info_inst.size)
|
||||
|
||||
def getSymbolName(self, addr):
|
||||
name = attplugin.getSymbolName(addr)
|
||||
name = attplugin.getSymbolName(self.ToRawPC(addr))
|
||||
if name:
|
||||
return name.decode()
|
||||
return "Addr #"+hex(addr)
|
||||
return "Addr #"+hex(self.ToRawPC(addr))
|
||||
|
||||
@@ -186,6 +186,18 @@ class PCTranslator:
|
||||
def jump(self, as_line):
|
||||
return self.jump_map[as_line[-3]]
|
||||
|
||||
def addsymbol(self, addr):
|
||||
if addr in self.addrmap:
|
||||
return
|
||||
|
||||
symbol = self.codeservice.getSymbolName(addr)
|
||||
if symbol is None:
|
||||
symbol = "Unkown symbol at 0x" + hex(addr)
|
||||
|
||||
last_line = self.raw_code[-1]
|
||||
newline = ['; ' + symbol, 100, last_line[2], 0, last_line[4], last_line[5], -1, 0, 0]
|
||||
self.raw_code.append(newline)
|
||||
|
||||
def getcode(self, addr):
|
||||
try:
|
||||
return self.addrmap[addr]
|
||||
@@ -330,6 +342,7 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
|
||||
return None
|
||||
|
||||
watchlist = PCTranslator(insts, code, raw_code, reverse_map, codeservice)
|
||||
watchlist.addsymbol(firstinst.cycles)
|
||||
line = firstinst.cycles
|
||||
lineincrement = watchlist.getincrement(line)
|
||||
pcskip.append(0)
|
||||
@@ -517,8 +530,8 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
|
||||
insts[i] = insts[i + 1]
|
||||
insts[i + 1] = temp
|
||||
next = line
|
||||
elif "s_waitcnt " in as_line[0] or "_load_" in as_line[0]:
|
||||
if skipped_immed > 0 and "s_waitcnt " in as_line[0]:
|
||||
elif "s_waitcnt" in as_line[0] or "_load_" in as_line[0]:
|
||||
if skipped_immed > 0 and "s_waitcnt" in as_line[0]:
|
||||
matched = True
|
||||
skipped_immed -= 1
|
||||
elif 'scratch_' not in as_line[0]:
|
||||
@@ -530,11 +543,12 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto, codeservice):
|
||||
result.append(inst)
|
||||
i += 1
|
||||
num_failed_stitches = 0
|
||||
elif not bGFX9 and inst.type == IMMED and line != next:
|
||||
elif inst.type == IMMED and line != next and (not bGFX9 or 's_barrier' in as_line[0]):
|
||||
skipped_immed += 1
|
||||
inst.asmline = reverse_map[line]
|
||||
result.append(inst)
|
||||
next = line
|
||||
if 's_barrier' in as_line[0]:
|
||||
next = line + lineincrement
|
||||
i += 1
|
||||
else:
|
||||
num_failed_stitches += 1
|
||||
|
||||
@@ -682,13 +682,7 @@
|
||||
const token_id = "token" + scroll_to
|
||||
const token = d3.select('#'+token_id).node()
|
||||
// scrolling not consistent without the timer
|
||||
setTimeout(() => {
|
||||
token.scrollIntoView({behavior: "smooth", inline: "start"})
|
||||
}, 800)
|
||||
const inView = isInViewport(src_line)
|
||||
if (!inView) {
|
||||
src_line.scrollIntoView({behavior: "smooth", block: "start"})
|
||||
}
|
||||
src_line.scrollIntoView({behavior: "smooth", block: "start"})
|
||||
}
|
||||
})
|
||||
.text((d,i) => {
|
||||
@@ -952,28 +946,27 @@
|
||||
.attr("fill", (d) => { return INST_TYPE[d[1]][1] })
|
||||
.style("cursor", "pointer")
|
||||
.datum((d) => { return {data:d} })
|
||||
.on('mouseenter', (d) => {
|
||||
if (d) {
|
||||
try {
|
||||
const token = d3.select(d.toElement)
|
||||
last_color = token.attr('fill')
|
||||
token.attr('fill', '#e0115f')
|
||||
let token_data = token.datum().data
|
||||
d3.select(".highlight") .classed("highlight", false)
|
||||
d3.select('.line_' + token_data[4]) .classed("highlight", true)
|
||||
src_line = d3.select('.line_' + token_data[4]).node()
|
||||
const inView = isInViewport(src_line)
|
||||
if (!inView) {
|
||||
src_line.scrollIntoView({behavior: "smooth", block: "start"})
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
.on('mouseenter', (e, d) => {
|
||||
try {
|
||||
d3.select('.line_' + d.data[4]).classed("highlight", true)
|
||||
d3.select(".highlight") .classed("highlight", true)
|
||||
} catch {}
|
||||
})
|
||||
.on("mouseleave", (d) => {
|
||||
if (d) {
|
||||
d3.select(d.fromElement) .attr("fill", last_color)
|
||||
.on("mouseleave", (e, d) => {
|
||||
try {
|
||||
d3.select('.line_' + d.data[4]).classed("highlight", false)
|
||||
d3.select(".highlight") .classed("highlight", false)
|
||||
}
|
||||
} catch {}
|
||||
})
|
||||
.on('click', (e, d) => {
|
||||
try {
|
||||
d3.select('.line_' + d.data[4]).classed("highlight", true)
|
||||
src_line = d3.select('.line_' + d.data[4]).node()
|
||||
const inView = isInViewport(src_line)
|
||||
if (!inView) {
|
||||
src_line.scrollIntoView({behavior: "smooth", block: "start"})
|
||||
}
|
||||
} catch {}
|
||||
})
|
||||
.append("svg:title")
|
||||
.text((d) => {
|
||||
|
||||
@@ -3,11 +3,10 @@ global: rocprofiler_plugin_initialize;
|
||||
rocprofiler_plugin_finalize;
|
||||
rocprofiler_plugin_write_buffer_records;
|
||||
rocprofiler_plugin_write_record;
|
||||
createService;
|
||||
deleteService;
|
||||
getInstruction;
|
||||
getCppref;
|
||||
getInstSize;
|
||||
addDecoder;
|
||||
removeDecoder;
|
||||
getInstructionFromAddr;
|
||||
getInstructionFromID;
|
||||
getSymbolName;
|
||||
local: *;
|
||||
};
|
||||
@@ -503,7 +503,7 @@ hsa_status_t CodeObjectCallback(hsa_executable_t executable,
|
||||
if (data.codeobj.unload)
|
||||
codeobj_capture_instance::Unload(data.codeobj.load_base);
|
||||
else
|
||||
codeobj_capture_instance::Load(data.codeobj.load_base,
|
||||
codeobj_capture_instance::Load(data.codeobj.load_base, data.codeobj.load_size,
|
||||
uri_str, data.codeobj.memory_base, data.codeobj.memory_size);
|
||||
|
||||
hsa_executable_iterate_agent_symbols(executable, data.codeobj.agent,
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
//#define HSA_ATT_MARKER_ENABLE
|
||||
|
||||
#include "packets_generator.h"
|
||||
#include "src/api/rocprofiler_singleton.h"
|
||||
|
||||
@@ -618,10 +616,13 @@ hsa_ven_amd_aqlprofile_profile_t* GenerateATTPackets(
|
||||
|
||||
// Generate ATT tracer marker packets. Also generate and return
|
||||
// the descriptor object which has the PM4 buffer for inserting data
|
||||
hsa_ven_amd_aqlprofile_descriptor_t
|
||||
GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_t data)
|
||||
hsa_ven_amd_aqlprofile_descriptor_t GenerateATTMarkerPackets(
|
||||
hsa_agent_t gpu_agent,
|
||||
packet_t& marker_packet,
|
||||
uint32_t data,
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
||||
)
|
||||
{
|
||||
#ifdef HSA_ATT_MARKER_ENABLE
|
||||
// Preparing the profile structure to get the packets
|
||||
auto pool = rocprofiler::HSASupport_Singleton::GetInstance()
|
||||
.GetHSAAgentInfo(gpu_agent.handle)
|
||||
@@ -635,7 +636,6 @@ GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_
|
||||
{}, desc
|
||||
};
|
||||
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel = HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2;
|
||||
hsa_status_t status = hsa_ven_amd_aqlprofile_att_marker(&profile, &marker_packet, data, channel);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
{
|
||||
@@ -645,9 +645,6 @@ GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_
|
||||
}
|
||||
|
||||
return desc;
|
||||
#else
|
||||
return {nullptr,0};
|
||||
#endif
|
||||
}
|
||||
|
||||
void AddVendorSpecificPacket(const packet_t* packet,
|
||||
|
||||
@@ -61,7 +61,12 @@ hsa_ven_amd_aqlprofile_profile_t* GenerateATTPackets(
|
||||
packet_t* stop_packet, size_t att_buffer_size);
|
||||
|
||||
hsa_ven_amd_aqlprofile_descriptor_t
|
||||
GenerateATTMarkerPackets(hsa_agent_t gpu_agent, packet_t& marker_packet, uint32_t data);
|
||||
GenerateATTMarkerPackets(
|
||||
hsa_agent_t gpu_agent,
|
||||
packet_t& marker_packet,
|
||||
uint32_t data,
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
||||
);
|
||||
|
||||
uint8_t* AllocateSysMemory(hsa_agent_t gpu_agent, size_t size, hsa_amd_memory_pool_t* cpu_pool);
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@ std::mutex codeobj_record::mutex;
|
||||
std::unordered_map<uint64_t, CodeobjPtr> codeobj_record::codeobjs{};
|
||||
std::unordered_map<uint64_t, codeobj_record::RecordInstance> codeobj_record::record_id_map{};
|
||||
std::unordered_set<codeobj_record*> codeobj_record::listeners;
|
||||
std::atomic<uint32_t> codeobj_capture_instance::loadcount{0};
|
||||
std::atomic<uint32_t> codeobj_capture_instance::eventcount{0};
|
||||
|
||||
// Codeobj Record
|
||||
codeobj_record::codeobj_record(rocprofiler_codeobj_capture_mode_t mode) : capture_mode(mode){};
|
||||
@@ -71,16 +71,17 @@ void codeobj_record::stop_capture() {
|
||||
// Codeobj Capture
|
||||
void codeobj_capture_instance::Load(
|
||||
uint64_t addr,
|
||||
uint64_t load_size,
|
||||
const std::string& URI,
|
||||
uint64_t mem_addr,
|
||||
uint64_t size
|
||||
uint64_t mem_size
|
||||
) {
|
||||
uint32_t id = loadcount.fetch_add(1, std::memory_order_relaxed)+1;
|
||||
uint32_t id = eventcount.fetch_add(1, std::memory_order_relaxed)+1;
|
||||
auto time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
|
||||
|
||||
std::lock_guard<std::mutex> lock(codeobj_record::mutex);
|
||||
|
||||
auto inst = std::make_shared<codeobj_capture_instance>(addr, URI, mem_addr, size, time, id);
|
||||
auto inst = std::make_shared<codeobj_capture_instance>(addr, load_size, URI, mem_addr, mem_size, time, id);
|
||||
codeobj_record::codeobjs[addr] = inst;
|
||||
for (auto* listen : codeobj_record::listeners) listen->addcapture(inst);
|
||||
}
|
||||
@@ -90,6 +91,7 @@ void codeobj_capture_instance::Unload(uint64_t addr) {
|
||||
|
||||
if (codeobj_record::codeobjs.find(addr) == codeobj_record::codeobjs.end()) return;
|
||||
|
||||
eventcount.fetch_add(1, std::memory_order_relaxed)+1;
|
||||
auto time = rocprofiler::ROCProfiler_Singleton::GetInstance().timestamp_ns().value;
|
||||
codeobj_record::codeobjs.at(addr)->end_time = time;
|
||||
codeobj_record::codeobjs.erase(addr);
|
||||
|
||||
@@ -34,30 +34,44 @@
|
||||
*/
|
||||
class codeobj_capture_instance {
|
||||
public:
|
||||
codeobj_capture_instance(uint64_t _addr, const std::string& _uri, uint64_t mem_addr,
|
||||
uint64_t mem_size, uint64_t start_time, uint32_t id)
|
||||
: addr(_addr), start_time(start_time), URI(_uri),
|
||||
codeobj_capture_instance(
|
||||
uint64_t _addr,
|
||||
uint64_t _load_size,
|
||||
const std::string& _uri,
|
||||
uint64_t mem_addr,
|
||||
uint64_t mem_size,
|
||||
uint64_t start_time,
|
||||
uint32_t id
|
||||
)
|
||||
: addr(_addr), load_size(_load_size), start_time(start_time), URI(_uri),
|
||||
mem_addr(mem_addr), mem_size(mem_size), load_id(id) {};
|
||||
|
||||
void setmode(rocprofiler_codeobj_capture_mode_t mode);
|
||||
|
||||
rocprofiler_intercepted_codeobj_t get() const {
|
||||
const char* buf_ptr = buffer.size() ? buffer.data() : nullptr;
|
||||
return {URI.c_str(), addr, mem_size, buf_ptr, buffer.size(), start_time, end_time, load_id};
|
||||
return {URI.c_str(), addr, load_size, buf_ptr, buffer.size(), start_time, end_time, load_id};
|
||||
};
|
||||
|
||||
const uint64_t addr;
|
||||
const uint64_t load_size;
|
||||
const uint64_t start_time;
|
||||
const uint32_t load_id;
|
||||
|
||||
static void Load(uint64_t addr, const std::string& URI, uint64_t mem_addr, uint64_t mem_size);
|
||||
static void Load(
|
||||
uint64_t addr,
|
||||
uint64_t load_size,
|
||||
const std::string& URI,
|
||||
uint64_t mem_addr,
|
||||
uint64_t mem_size
|
||||
);
|
||||
static void Unload(uint64_t addr);
|
||||
static uint32_t GetLoadCount() { return loadcount.load(std::memory_order_relaxed); }
|
||||
static uint32_t GetEventCount() { return eventcount.load(std::memory_order_relaxed); }
|
||||
|
||||
private:
|
||||
//! 32 bits ID because this is the natural channel width for ATT Markers.
|
||||
//! There is no world in which 4 billions markers can be sent anyway.
|
||||
static std::atomic<uint32_t> loadcount;
|
||||
static std::atomic<uint32_t> eventcount;
|
||||
void reset(rocprofiler_codeobj_capture_mode_t mode);
|
||||
|
||||
std::pair<size_t, size_t> parse_uri();
|
||||
|
||||
@@ -91,10 +91,21 @@ public:
|
||||
);
|
||||
|
||||
void InsertMarker(
|
||||
std::vector<packet_t>& transformed_packets,
|
||||
hsa_agent_t agent,
|
||||
uint32_t data,
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
||||
);
|
||||
void InsertUnloadMarker(
|
||||
std::vector<packet_t>& transformed_packets,
|
||||
hsa_agent_t agent,
|
||||
uint32_t data
|
||||
);
|
||||
void InsertLoadMarker(
|
||||
std::vector<packet_t>& transformed_packets,
|
||||
hsa_agent_t agent,
|
||||
rocprofiler_intercepted_codeobj_t codeobj
|
||||
);
|
||||
|
||||
void SetParameters(const std::vector<rocprofiler_att_parameter_t>& params) {
|
||||
att_parameters_data = params;
|
||||
@@ -125,7 +136,7 @@ protected:
|
||||
static std::mutex att_enable_disable_mutex;
|
||||
|
||||
private:
|
||||
uint32_t codeobj_load_cnt = 0;
|
||||
uint32_t codeobj_event_cnt = 0;
|
||||
|
||||
static void AddAttRecord(
|
||||
rocprofiler_record_att_tracer_t* record,
|
||||
@@ -176,6 +187,9 @@ private:
|
||||
|
||||
std::mutex sessions_pending_signals_lock_;
|
||||
std::map<uint32_t, std::vector<att_pending_signal_t>> sessions_pending_signals_;
|
||||
|
||||
rocprofiler_record_id_t capture_id;
|
||||
std::unordered_set<uint32_t> active_capture_event_ids;
|
||||
};
|
||||
|
||||
} // namespace att
|
||||
|
||||
@@ -27,19 +27,71 @@
|
||||
|
||||
#define __NR_gettid 186
|
||||
|
||||
#define ATT_MARKER_HEADER_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_0
|
||||
#define ATT_MARKER_SIZE_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_1
|
||||
#define ATT_MARKER_LO_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_2
|
||||
#define ATT_MARKER_HI_CHANNEL HSA_VEN_AMD_AQLPROFILE_ATT_CHANNEL_3
|
||||
|
||||
enum rocprofiler_att_marker_type_t {
|
||||
ROCPROFILER_ATT_MARKER_LOAD = 0,
|
||||
ROCPROFILER_ATT_MARKER_UNLOAD = 1
|
||||
};
|
||||
|
||||
union att_header_marker_t
|
||||
{
|
||||
uint32_t raw;
|
||||
struct {
|
||||
uint32_t type : 2;
|
||||
uint32_t id : 30;
|
||||
};
|
||||
};
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
namespace att {
|
||||
|
||||
void AttTracer::InsertMarker(
|
||||
void AttTracer::InsertUnloadMarker(
|
||||
std::vector<packet_t>& transformed_packets,
|
||||
hsa_agent_t agent,
|
||||
uint32_t data
|
||||
) {
|
||||
att_header_marker_t header{.raw = 0};
|
||||
header.type = ROCPROFILER_ATT_MARKER_UNLOAD;
|
||||
header.id = data;
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel = ATT_MARKER_HEADER_CHANNEL;
|
||||
|
||||
this->InsertMarker(transformed_packets, agent, header.raw, channel);
|
||||
}
|
||||
|
||||
void AttTracer::InsertLoadMarker(
|
||||
std::vector<packet_t>& transformed_packets,
|
||||
hsa_agent_t agent,
|
||||
rocprofiler_intercepted_codeobj_t codeobj
|
||||
) {
|
||||
this->InsertMarker(transformed_packets, agent, codeobj.mem_size, ATT_MARKER_SIZE_CHANNEL);
|
||||
|
||||
uint64_t addr = codeobj.base_address;
|
||||
this->InsertMarker(transformed_packets, agent, addr & ((1ul << 32)-1), ATT_MARKER_LO_CHANNEL);
|
||||
this->InsertMarker(transformed_packets, agent, addr >> 32, ATT_MARKER_HI_CHANNEL);
|
||||
|
||||
att_header_marker_t header{.raw = 0};
|
||||
header.type = ROCPROFILER_ATT_MARKER_LOAD;
|
||||
header.id = codeobj.att_marker_id;
|
||||
this->InsertMarker(transformed_packets, agent, header.raw, ATT_MARKER_HEADER_CHANNEL);
|
||||
}
|
||||
|
||||
void AttTracer::InsertMarker(
|
||||
std::vector<packet_t>& transformed_packets,
|
||||
hsa_agent_t agent,
|
||||
uint32_t data,
|
||||
hsa_ven_amd_aqlprofile_att_marker_channel_t channel
|
||||
) {
|
||||
packet_t marker_packet{};
|
||||
auto desc = Packet::GenerateATTMarkerPackets(agent, marker_packet, data);
|
||||
auto desc = Packet::GenerateATTMarkerPackets(agent, marker_packet, data, channel);
|
||||
if (desc.ptr && desc.size)
|
||||
Packet::AddVendorSpecificPacket(&marker_packet, &transformed_packets, hsa_signal_t{.handle = 0});
|
||||
else
|
||||
rocprofiler::warning("Could not add ATT Marker");
|
||||
}
|
||||
|
||||
|
||||
@@ -63,25 +115,26 @@ bool AttTracer::ATTContiguousWriteInterceptor(
|
||||
|
||||
// att start
|
||||
// Getting Queue Data and Information
|
||||
auto agent_handle = queue_info.GetGPUAgent().handle;
|
||||
rocprofiler::HSAAgentInfo& agentInfo = rocprofiler::HSASupport_Singleton::GetInstance()
|
||||
.GetHSAAgentInfo(queue_info.GetGPUAgent().handle);
|
||||
.GetHSAAgentInfo(agent_handle);
|
||||
|
||||
auto dispatchPackets = Packet::ExtractDispatchPackets(packets, pkt_count);
|
||||
if (dispatchPackets.size() == 0) return false;
|
||||
|
||||
size_t writer_id = WRITER_ID.fetch_add(dispatchPackets.size(), std::memory_order_relaxed);
|
||||
uint32_t new_load_cnt = codeobj_capture_instance::GetLoadCount();
|
||||
uint32_t new_load_cnt = codeobj_capture_instance::GetEventCount();
|
||||
|
||||
auto bInsertStart = RequiresStartPacket(writer_id, dispatchPackets.size());
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
|
||||
// If att_start already exists, don't start again
|
||||
auto agent_pending_packets = pending_stop_packets.find(queue_info.GetGPUAgent().handle);
|
||||
auto agent_pending_packets = pending_stop_packets.find(agent_handle);
|
||||
if (agent_pending_packets != pending_stop_packets.end())
|
||||
bInsertStart = {};
|
||||
|
||||
// If nothing will be added or removed, return
|
||||
if (!bInsertStart && codeobj_load_cnt == new_load_cnt)
|
||||
if (!bInsertStart && codeobj_event_cnt == new_load_cnt)
|
||||
{
|
||||
if (
|
||||
agent_pending_packets == pending_stop_packets.end() ||
|
||||
@@ -107,7 +160,7 @@ bool AttTracer::ATTContiguousWriteInterceptor(
|
||||
}
|
||||
|
||||
uint64_t IsGFX9 = HSASupport_Singleton::GetInstance()
|
||||
.GetHSAAgentInfo(queue_info.GetGPUAgent().handle)
|
||||
.GetHSAAgentInfo(agent_handle)
|
||||
.GetDeviceInfo()
|
||||
.getName()
|
||||
.find("gfx9") != std::string::npos;
|
||||
@@ -134,33 +187,58 @@ bool AttTracer::ATTContiguousWriteInterceptor(
|
||||
0
|
||||
);
|
||||
|
||||
codeobj_record::make_capture(rocprofiler_record_id_t{record_id}, capturem, IsGFX9);
|
||||
codeobj_record::start_capture(rocprofiler_record_id_t{record_id});
|
||||
this->capture_id = rocprofiler_record_id_t{record_id};
|
||||
codeobj_record::make_capture(this->capture_id, capturem, IsGFX9);
|
||||
codeobj_record::start_capture(this->capture_id);
|
||||
|
||||
stop_packet.header = HSA_PACKET_TYPE_VENDOR_SPECIFIC << HSA_PACKET_HEADER_TYPE;
|
||||
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
|
||||
pending_stop_packets[queue_info.GetGPUAgent().handle]
|
||||
pending_stop_packets[agent_handle]
|
||||
= {record_id, writer_id, bInsertStart->second, session_id_, stop_packet};
|
||||
}
|
||||
|
||||
if (codeobj_load_cnt != new_load_cnt)
|
||||
bool bHasPending = false;
|
||||
{
|
||||
codeobj_load_cnt = new_load_cnt;
|
||||
InsertMarker(transformed_packets, queue_info.GetGPUAgent(), new_load_cnt);
|
||||
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
|
||||
bHasPending = pending_stop_packets.find(agent_handle) != pending_stop_packets.end();
|
||||
}
|
||||
|
||||
if (bHasPending && (bInsertStart || codeobj_event_cnt != new_load_cnt))
|
||||
{
|
||||
codeobj_event_cnt = new_load_cnt;
|
||||
|
||||
auto symbols = codeobj_record::get_capture(this->capture_id);
|
||||
std::unordered_set<uint32_t> current_ids;
|
||||
|
||||
for (size_t s=0; s<symbols.count; s++)
|
||||
current_ids.insert(symbols.symbols[s].att_marker_id);
|
||||
|
||||
for (uint32_t prev_id : active_capture_event_ids)
|
||||
if (current_ids.find(prev_id) == current_ids.end())
|
||||
InsertUnloadMarker(transformed_packets, queue_info.GetGPUAgent(), prev_id);
|
||||
|
||||
for (size_t s=0; s<symbols.count; s++)
|
||||
{
|
||||
auto& symbol = symbols.symbols[s];
|
||||
if (active_capture_event_ids.find(symbol.att_marker_id) == active_capture_event_ids.end())
|
||||
InsertLoadMarker(transformed_packets, queue_info.GetGPUAgent(), symbol);
|
||||
}
|
||||
|
||||
active_capture_event_ids = std::move(current_ids);
|
||||
}
|
||||
|
||||
// Searching across all the packets given during this write
|
||||
for (size_t i = 0; i < pkt_count; ++i)
|
||||
transformed_packets.emplace_back(packets_arr[i]);
|
||||
|
||||
if (bHasPending)
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(att_enable_disable_mutex);
|
||||
auto agent_pending_packets = pending_stop_packets.find(queue_info.GetGPUAgent().handle);
|
||||
auto agent_pending_packets = pending_stop_packets.at(agent_handle);
|
||||
|
||||
if (agent_pending_packets != pending_stop_packets.end() &&
|
||||
agent_pending_packets->second.last_kernel_exec <= writer_id + dispatchPackets.size()
|
||||
) {
|
||||
const ATTRecordSignal& rsignal = agent_pending_packets->second;
|
||||
if (agent_pending_packets.last_kernel_exec <= writer_id + dispatchPackets.size())
|
||||
{
|
||||
const ATTRecordSignal& rsignal = agent_pending_packets;
|
||||
// Adding a barrier packet with the original packet's completion signal.
|
||||
hsa_signal_t interrupt_signal;
|
||||
CreateSignal(0, &interrupt_signal);
|
||||
@@ -180,8 +258,10 @@ bool AttTracer::ATTContiguousWriteInterceptor(
|
||||
interrupt_signal
|
||||
});
|
||||
|
||||
codeobj_record::stop_capture(rocprofiler_record_id_t{rsignal.record_id});
|
||||
pending_stop_packets.erase(queue_info.GetGPUAgent().handle);
|
||||
//codeobj_record::stop_capture(rocprofiler_record_id_t{rsignal.record_id});
|
||||
codeobj_record::stop_capture(this->capture_id);
|
||||
active_capture_event_ids.clear();
|
||||
pending_stop_packets.erase(agent_handle);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -316,10 +316,8 @@ hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) {
|
||||
api->hsa_ven_amd_aqlprofile_iterate_data =
|
||||
(decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym(
|
||||
handle, "hsa_ven_amd_aqlprofile_iterate_data");
|
||||
#ifdef HSA_ATT_MARKER_ENABLE
|
||||
api->hsa_ven_amd_aqlprofile_att_marker = (decltype(::hsa_ven_amd_aqlprofile_att_marker)*)
|
||||
dlsym(handle, "hsa_ven_amd_aqlprofile_att_marker");
|
||||
#endif
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user