Files
Giovanni Lenzi Baraldi 9849073836 SWDEV-540648: Adding realtime clock to v3 tool. Update decoder header. (#666)
* SWDEV-540648: Adding realtime clock to v3 tool. Update header for decoder.

* Adding tests

* Review comments

* Review comment
2025-09-10 12:39:27 +02:00

198 lines
7.1 KiB
C++

// MIT License
//
// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// undefine NDEBUG so asserts are implemented
#ifdef NDEBUG
# undef NDEBUG
#endif
#include "profile_interface.hpp"
#include "perfcounter.hpp"
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder.h>
#include <cxxabi.h>
#include <cstring>
#include <fstream>
namespace rocprofiler
{
namespace att_wrapper
{
void
get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
void* trace_events,
size_t trace_size,
void* userdata)
{
C_API_BEGIN
CHECK_NOTNULL(userdata);
ToolData& tool = *static_cast<ToolData*>(userdata);
if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_INFO)
{
auto* infos = (rocprofiler_thread_trace_decoder_info_t*) trace_events;
for(size_t i = 0; i < trace_size; i++)
ROCP_WARNING << rocprofiler_thread_trace_decoder_info_string(tool.decoder, infos[i]);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP)
{
tool.config.filemgr->gfxip = reinterpret_cast<size_t>(trace_events);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY)
{
for(size_t i = 0; i < trace_size; i++)
tool.config.occupancy.push_back(static_cast<const occupancy_t*>(trace_events)[i]);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT)
{
PerfcounterFile(tool.config, static_cast<perfevent_t*>(trace_events), trace_size);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY)
{
if(tool.config.realtime && trace_size != 0)
tool.config.realtime->frequency = *static_cast<uint64_t*>(trace_events);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME)
{
if(tool.config.realtime && trace_size != 0)
tool.config.realtime->add(
tool.config.shader_engine, static_cast<realtime_t*>(trace_events), trace_size);
}
if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return;
bool bInvalid = false;
for(size_t wave_n = 0; wave_n < trace_size; wave_n++)
{
const auto& wave = static_cast<const wave_t*>(trace_events)[wave_n];
int64_t prev_inst_time = wave.begin_time;
for(size_t j = 0; j < wave.instructions_size; j++)
{
const auto& inst = wave.instructions_array[j];
if(inst.pc.code_object_id == 0 && inst.pc.address == 0) continue;
try
{
auto& line = tool.get(inst.pc);
line.hitcount += 1;
line.latency += inst.duration;
line.stall += inst.stall;
line.idle += std::max<int64_t>(inst.time - prev_inst_time, 0);
} catch(...)
{
bInvalid = true;
}
prev_inst_time = std::max(prev_inst_time, inst.time + inst.duration);
}
WaveFile(tool.config, wave);
}
if(bInvalid) ROCP_WARNING << "Could not fetch some instructions!";
C_API_END
}
ToolData::ToolData(std::vector<char>& _data,
WaveConfig& _config,
rocprofiler_thread_trace_decoder_id_t _decoder)
: cfile(_config.code)
, config(_config)
, decoder(_decoder)
{
auto status =
rocprofiler_trace_decode(decoder, get_trace_data, _data.data(), _data.size(), this);
ROCP_ERROR_IF(status != ROCPROFILER_STATUS_SUCCESS) << ": " << status;
}
ToolData::~ToolData() = default;
std::string
demangle(std::string_view line)
{
int status{0};
char* c_name = abi::__cxa_demangle(line.data(), nullptr, nullptr, &status);
if(c_name == nullptr) return "";
std::string str = c_name;
free(c_name);
return str;
}
CodeLine&
ToolData::get(pcinfo_t _pc)
{
auto& isa_map = cfile->isa_map;
if(isa_map.find(_pc) != isa_map.end()) return *isa_map.at(_pc);
// Attempt to disassemble full kernel
if(_pc.code_object_id != 0u) try
{
rocprofiler::sdk::codeobj::segment::CodeobjTableTranslator symbol_table;
for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.code_object_id))
symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.code_object_id});
auto addr_range = symbol_table.find_codeobj_in_range(_pc.address);
try
{
auto symbol = cfile->table->getSymbolMap(_pc.code_object_id).at(addr_range.addr);
auto pair = KernelName{symbol.name, demangle(symbol.name)};
cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.code_object_id}, pair);
} catch(...)
{
ROCP_INFO << "Missing kernelSymbol at " << _pc.code_object_id << ':'
<< addr_range.addr;
}
for(auto addr = addr_range.addr; addr < addr_range.addr + addr_range.size;)
{
pcinfo_t info{.address = addr, .code_object_id = addr_range.id};
auto& cline = *(isa_map.emplace(info, std::make_unique<CodeLine>()).first->second);
cline.line_number = isa_map.size() + cfile->kernel_names.size() - 1;
cfile->line_numbers[info] = cline.line_number;
cline.code_line = cfile->table->get(addr_range.id, addr);
addr += cline.code_line->size;
if(cline.code_line->size == 0u) throw std::invalid_argument("Line has 0 bytes!");
}
if(isa_map.find(_pc) != isa_map.end()) return *isa_map.at(_pc);
} catch(std::exception& e)
{}
auto& cline = *(isa_map.emplace(_pc, std::make_unique<CodeLine>()).first->second);
cline.line_number = isa_map.size();
cfile->line_numbers[_pc] = cline.line_number;
cline.code_line = cfile->table->get(_pc.code_object_id, _pc.address);
return cline;
}
} // namespace att_wrapper
} // namespace rocprofiler