SWDEV-540648: Adding realtime clock to v3 tool. Update decoder header. (#666)

* SWDEV-540648: Adding realtime clock to v3 tool. Update header for decoder.

* Adding tests

* Review comments

* Review comment
Этот коммит содержится в:
Giovanni Lenzi Baraldi
2025-09-10 12:39:27 +02:00
коммит произвёл GitHub
родитель 4e9acd492d
Коммит 9849073836
18 изменённых файлов: 193 добавлений и 82 удалений
+4 -11
Просмотреть файл
@@ -26,6 +26,7 @@
#endif
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
#include <rocprofiler-sdk/cxx/operators.hpp>
#include <rocprofiler-sdk/buffer.h>
#include <rocprofiler-sdk/callback_tracing.h>
@@ -78,15 +79,6 @@ namespace Results
{
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
struct address_sort_t
{
bool operator()(const pcinfo_t& a, const pcinfo_t& b) const
{
if(a.marker_id == b.marker_id) return a.addr < b.addr;
return a.marker_id < b.marker_id;
}
};
struct Latency
{
uint64_t latency{0};
@@ -94,7 +86,7 @@ struct Latency
};
// Maps address to latency
using LatencyTable = std::map<rocprofiler_thread_trace_decoder_pc_t, Latency, address_sort_t>;
using LatencyTable = std::map<rocprofiler_thread_trace_decoder_pc_t, Latency>;
// Used to disassemble instructions at (id, vaddr) pair
using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate;
@@ -135,7 +127,7 @@ gen_output_stream()
{
auto& addr = sorted.at(i).first;
auto& latency = sorted.at(i).second;
auto inst = table->get(addr.marker_id, addr.addr);
auto inst = table->get(addr.code_object_id, addr.address);
auto comment = inst->comment;
size_t pos = comment.rfind('/');
@@ -309,6 +301,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */)
// This is set by ctests: TODO: move to client.cpp
// If nullptr, searches rocprofiler-sdk install location
const char* lib_path = std::getenv("ROCPROFILER_TRACE_DECODER_LIB_PATH");
if(lib_path == nullptr) lib_path = "/opt/rocm/lib";
DECODER_CALL(rocprofiler_thread_trace_decoder_create(&Decoder::decoder, lib_path));
+15
Просмотреть файл
@@ -150,6 +150,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_ins
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_dimension_info_t&)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_version_triplet_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t)
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_pc_t)
// definitions of operator==
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t)
@@ -226,6 +227,12 @@ operator==(rocprofiler_version_triplet_t lhs, rocprofiler_version_triplet_t rhs)
return std::tie(lhs.major, lhs.minor, lhs.patch) == std::tie(rhs.major, rhs.minor, rhs.patch);
}
inline bool
operator==(rocprofiler_thread_trace_decoder_pc_t lhs, rocprofiler_thread_trace_decoder_pc_t rhs)
{
return std::tie(lhs.code_object_id, lhs.address) == std::tie(rhs.code_object_id, rhs.address);
}
// definitions of operator!=
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t)
@@ -245,6 +252,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_region_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_amd_memory_pool_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_version_triplet_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_id_t)
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_pc_t)
// definitions of operator<
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t)
@@ -321,6 +329,12 @@ operator<(rocprofiler_version_triplet_t lhs, rocprofiler_version_triplet_t rhs)
return std::tie(lhs.major, lhs.minor, lhs.patch) < std::tie(rhs.major, rhs.minor, rhs.patch);
}
inline bool
operator<(rocprofiler_thread_trace_decoder_pc_t lhs, rocprofiler_thread_trace_decoder_pc_t rhs)
{
return std::tie(lhs.code_object_id, lhs.address) < std::tie(rhs.code_object_id, rhs.address);
}
// definitions of operator>, operator<=, operator>=
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t)
@@ -340,6 +354,7 @@ ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_region_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_amd_memory_pool_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_version_triplet_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t)
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_pc_t)
// cleanup defines
#undef ROCPROFILER_CXX_DECLARE_OPERATORS
@@ -49,8 +49,8 @@ typedef enum rocprofiler_thread_trace_decoder_info_t
*/
typedef struct rocprofiler_thread_trace_decoder_pc_t
{
size_t addr; ///< Memory address (marker_id == 0), or ELF vaddr (marker_id != 0).
size_t marker_id; ///< Code object load ID. Zero if no code object was found.
uint64_t address; ///< Address (code_object_id == 0), or ELF vaddr (code_object_id != 0)
uint64_t code_object_id; ///< Zero if no code object was found.
} rocprofiler_thread_trace_decoder_pc_t;
/**
@@ -163,8 +163,8 @@ typedef struct rocprofiler_thread_trace_decoder_wave_t
int64_t begin_time; ///< Wave begin time. Should match occupancy event wave start.
int64_t end_time; ///< Wave end time. Should match occupancy event wave end.
size_t timeline_size; ///< timeline_array size
size_t instructions_size; ///< instructions_array size
uint64_t timeline_size; ///< timeline_array size
uint64_t instructions_size; ///< instructions_array size
rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; ///< wave state change events
rocprofiler_thread_trace_decoder_inst_t* instructions_array; ///< Instructions executed
} rocprofiler_thread_trace_decoder_wave_t;
@@ -217,7 +217,7 @@ typedef struct rocprofiler_thread_trace_decoder_shaderdata_t
*/
typedef enum rocprofiler_thread_trace_decoder_record_type_t
{
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type size_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type uint64_t
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t*
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t*
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t*
+2 -1
Просмотреть файл
@@ -51,6 +51,7 @@ ATTFileMgr::ATTFileMgr(Fspath _dir,
table = std::make_shared<AddressTable>();
codefile = std::make_shared<CodeFile>(dir, table);
filenames = std::make_shared<FilenameMgr>(dir);
realtime = std::make_shared<RealtimeTS>(dir);
for(size_t i = 0; i < ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST; i++)
wstates.at(i) = std::make_shared<WstatesFile>(i, dir);
@@ -97,7 +98,7 @@ ATTFileMgr::addDecoder(const char* filepath, uint64_t id, uint64_t load_addr, ui
void
ATTFileMgr::parseShader(int se_id, std::vector<char>& data)
{
WaveConfig config(se_id, filenames, codefile, wstates);
WaveConfig config(se_id, filenames, codefile, realtime, wstates);
ToolData tooldata(data, config, decoder);
if(!config.occupancy.empty()) occupancy.emplace(se_id, std::move(config.occupancy));
+1
Просмотреть файл
@@ -92,6 +92,7 @@ public:
std::shared_ptr<class CodeFile> codefile{nullptr};
std::shared_ptr<class FilenameMgr> filenames{nullptr};
std::shared_ptr<class RealtimeTS> realtime{nullptr};
std::shared_ptr<AddressTable> table{nullptr};
std::map<size_t, std::vector<occupancy_t>> occupancy{};
std::vector<uint64_t> codeobjs_to_delete{};
+10 -11
Просмотреть файл
@@ -72,8 +72,7 @@ CodeFile::~CodeFile()
vec.end(),
[](const std::pair<pcinfo_t, std::unique_ptr<CodeLine>>& a,
const std::pair<pcinfo_t, std::unique_ptr<CodeLine>>& b) {
if(a.first.marker_id == b.first.marker_id) return a.first.addr < b.first.addr;
return a.first.marker_id < b.first.marker_id;
return a.first < b.first;
});
std::stringstream ofs;
@@ -92,8 +91,8 @@ CodeFile::~CodeFile()
if(kernel_names.find(pc) != kernel_names.end())
{
csv_encoder::write_row(ofs,
pc.marker_id,
pc.addr,
pc.code_object_id,
pc.address,
"; " + kernel_names.at(pc).name,
0,
0,
@@ -102,8 +101,8 @@ CodeFile::~CodeFile()
kernel_names.at(pc).demangled);
}
csv_encoder::write_row(ofs,
pc.marker_id,
pc.addr,
pc.code_object_id,
pc.address,
line->code_line->inst,
line->hitcount,
line->latency,
@@ -138,16 +137,16 @@ CodeFile::~CodeFile()
{
std::stringstream code;
code << "[\"; " << kernel_names.at(line.first).name << "\",0," << (isa.line_number - 1)
<< ",\"" << kernel_names.at(line.first).demangled << "\"," << line.first.marker_id
<< "," << line.first.addr << ",0,0,0,0]";
<< ",\"" << kernel_names.at(line.first).demangled << "\","
<< line.first.code_object_id << "," << line.first.address << ",0,0,0,0]";
jcode.push_back(nlohmann::json::parse(code.str()));
}
std::stringstream code;
code << "[\"" << isa.code_line->inst << "\",0," << isa.line_number << ",\""
<< isa.code_line->comment << "\"," << line.first.marker_id << "," << line.first.addr
<< "," << isa.hitcount << "," << isa.latency << "," << isa.stall << "," << isa.idle
<< "]";
<< isa.code_line->comment << "\"," << line.first.code_object_id << ","
<< line.first.address << "," << isa.hitcount << "," << isa.latency << "," << isa.stall
<< "," << isa.idle << "]";
jcode.push_back(nlohmann::json::parse(code.str()));
+2 -2
Просмотреть файл
@@ -81,10 +81,10 @@ OccupancyFile(const Fspath& dir,
std::stringstream ss;
try
{
ss << table->getSymbolMap(pc.marker_id).at(pc.addr).name;
ss << table->getSymbolMap(pc.code_object_id).at(pc.address).name;
} catch(std::exception& e)
{
ss << pc.marker_id << " / 0x" << std::hex << pc.addr << std::dec;
ss << pc.code_object_id << " / 0x" << std::hex << pc.address << std::dec;
}
jocc["dispatches"][std::to_string(id)] = ss.str();
}
+37
Просмотреть файл
@@ -58,5 +58,42 @@ PerfcounterFile(WaveConfig& config, const perfevent_t* events, size_t event_coun
std::string filename = "se" + std::to_string(config.shader_engine) + "_perfcounter.json";
OutputFile(config.filemgr->dir / filename) << json;
}
void
RealtimeTS::add(int shader, const realtime_t* events, size_t event_count)
{
if(event_count == 0) return;
auto& storage = aggregated[shader];
storage.insert(storage.end(), events, events + event_count);
}
RealtimeTS::~RealtimeTS()
{
if(aggregated.empty()) return;
nlohmann::json json;
json["metadata"]["descriptor"] = "[gfx_clock, realtime_clock]";
json["metadata"]["frequency"] = frequency;
for(auto& [shader, realtime] : aggregated)
{
if(realtime.empty()) continue;
nlohmann::json data;
for(auto& event : realtime)
{
nlohmann::json json_event;
json_event.push_back(event.shader_clock);
json_event.push_back(event.realtime_clock);
data.push_back(json_event);
}
json["SE" + std::to_string(shader)] = data;
}
OutputFile(this->path) << json;
}
} // namespace att_wrapper
} // namespace rocprofiler
+22
Просмотреть файл
@@ -22,6 +22,9 @@
#pragma once
#include <memory>
#include <unordered_map>
#include <vector>
#include "att_lib_wrapper.hpp"
namespace rocprofiler
@@ -30,5 +33,24 @@ namespace att_wrapper
{
void
PerfcounterFile(class WaveConfig& config, const perfevent_t* events, size_t event_count);
class RealtimeTS
{
public:
using realtime_vec_t = std::vector<realtime_t>;
using shader_map_t = std::unordered_map<int, realtime_vec_t>;
RealtimeTS(const Fspath& dir)
: path(dir / "realtime.json"){};
~RealtimeTS();
void add(int shader, const realtime_t* events, size_t event_count);
uint64_t frequency{0};
private:
const Fspath path;
shader_map_t aggregated{}; ///< Stores all RT values so far.
};
} // namespace att_wrapper
} // namespace rocprofiler
+22 -10
Просмотреть файл
@@ -68,6 +68,17 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
{
PerfcounterFile(tool.config, static_cast<perfevent_t*>(trace_events), trace_size);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY)
{
if(tool.config.realtime && trace_size != 0)
tool.config.realtime->frequency = *static_cast<uint64_t*>(trace_events);
}
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME)
{
if(tool.config.realtime && trace_size != 0)
tool.config.realtime->add(
tool.config.shader_engine, static_cast<realtime_t*>(trace_events), trace_size);
}
if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return;
@@ -80,7 +91,7 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
for(size_t j = 0; j < wave.instructions_size; j++)
{
const auto& inst = wave.instructions_array[j];
if(inst.pc.marker_id == 0 && inst.pc.addr == 0) continue;
if(inst.pc.code_object_id == 0 && inst.pc.address == 0) continue;
try
{
@@ -137,26 +148,27 @@ ToolData::get(pcinfo_t _pc)
if(isa_map.find(_pc) != isa_map.end()) return *isa_map.at(_pc);
// Attempt to disassemble full kernel
if(_pc.marker_id != 0u) try
if(_pc.code_object_id != 0u) try
{
rocprofiler::sdk::codeobj::segment::CodeobjTableTranslator symbol_table;
for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.marker_id))
symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.marker_id});
for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.code_object_id))
symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.code_object_id});
auto addr_range = symbol_table.find_codeobj_in_range(_pc.addr);
auto addr_range = symbol_table.find_codeobj_in_range(_pc.address);
try
{
auto symbol = cfile->table->getSymbolMap(_pc.marker_id).at(addr_range.addr);
auto symbol = cfile->table->getSymbolMap(_pc.code_object_id).at(addr_range.addr);
auto pair = KernelName{symbol.name, demangle(symbol.name)};
cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.marker_id}, pair);
cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.code_object_id}, pair);
} catch(...)
{
ROCP_INFO << "Missing kernelSymbol at " << _pc.marker_id << ':' << addr_range.addr;
ROCP_INFO << "Missing kernelSymbol at " << _pc.code_object_id << ':'
<< addr_range.addr;
}
for(auto addr = addr_range.addr; addr < addr_range.addr + addr_range.size;)
{
pcinfo_t info{.addr = addr, .marker_id = addr_range.id};
pcinfo_t info{.address = addr, .code_object_id = addr_range.id};
auto& cline = *(isa_map.emplace(info, std::make_unique<CodeLine>()).first->second);
cline.line_number = isa_map.size() + cfile->kernel_names.size() - 1;
@@ -176,7 +188,7 @@ ToolData::get(pcinfo_t _pc)
cline.line_number = isa_map.size();
cfile->line_numbers[_pc] = cline.line_number;
cline.code_line = cfile->table->get(_pc.marker_id, _pc.addr);
cline.code_line = cfile->table->get(_pc.code_object_id, _pc.address);
return cline;
}
+11 -23
Просмотреть файл
@@ -29,45 +29,33 @@
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
#include <rocprofiler-sdk/cxx/operators.hpp>
#include "lib/common/logging.hpp"
#include <memory>
#include <string>
#include <string_view>
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t;
using wave_t = rocprofiler_thread_trace_decoder_wave_t;
using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t;
using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t;
template <>
struct std::hash<pcinfo_t>
struct std::hash<rocprofiler_thread_trace_decoder_pc_t>
{
public:
size_t operator()(const pcinfo_t& a) const
size_t operator()(const rocprofiler_thread_trace_decoder_pc_t& a) const noexcept
{
return (a.marker_id << 32) ^ (a.marker_id >> 32) ^ a.addr;
return (a.code_object_id << 32) ^ (a.code_object_id >> 32) ^ a.address;
}
};
inline bool
operator==(const pcinfo_t& a, const pcinfo_t& b)
{
return a.addr == b.addr && a.marker_id == b.marker_id;
};
inline bool
operator<(const pcinfo_t& a, const pcinfo_t& b)
{
if(a.marker_id == b.marker_id) return a.addr < b.addr;
return a.marker_id < b.marker_id;
};
namespace rocprofiler
{
namespace att_wrapper
{
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t;
using wave_t = rocprofiler_thread_trace_decoder_wave_t;
using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t;
using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t;
using realtime_t = rocprofiler_thread_trace_decoder_realtime_t;
class GlobalDefs
{
public:
+1 -1
Просмотреть файл
@@ -127,7 +127,7 @@ WaitcntList::gfx10_construct(const wave_t& wave, isa_map_t& isa_map)
for(size_t i = 0; i < wave.instructions_size; i++)
{
auto& event = wave.instructions_array[i];
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
if(event.pc.code_object_id == 0 && event.pc.address == 0) continue;
auto it = isa_map.find(event.pc);
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
+1 -1
Просмотреть файл
@@ -162,7 +162,7 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
for(size_t i = 0; i < wave.instructions_size; i++)
{
auto& event = wave.instructions_array[i];
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
if(event.pc.code_object_id == 0 && event.pc.address == 0) continue;
auto it = isa_map.find(event.pc);
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
+1 -1
Просмотреть файл
@@ -90,7 +90,7 @@ WaitcntList::gfx9_construct(const wave_t& wave, isa_map_t& isa_map)
for(size_t i = 0; i < wave.instructions_size; i++)
{
auto& event = wave.instructions_array[i];
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
if(event.pc.code_object_id == 0 && event.pc.address == 0) continue;
auto it = isa_map.find(event.pc);
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
+10 -10
Просмотреть файл
@@ -44,8 +44,8 @@ TEST(att_decoder_waitcnt_test, gfx9)
auto append_isa = [&](size_t line_number, const char* line) {
pcinfo_t pc{};
pc.addr = line_number + LINE_OFFSET;
pc.marker_id = 0;
pc.address = line_number + LINE_OFFSET;
pc.code_object_id = 0;
auto code = std::make_unique<CodeLine>();
code->code_line = std::make_shared<CodeLine::Instruction>();
@@ -86,7 +86,7 @@ TEST(att_decoder_waitcnt_test, gfx9)
for(size_t i = 0; i < isa_map.size(); i++)
{
wave_instruction_t inst{};
inst.pc.addr = i + LINE_OFFSET;
inst.pc.address = i + LINE_OFFSET;
insts.push_back(inst);
}
}
@@ -129,8 +129,8 @@ TEST(att_decoder_waitcnt_test, gfx10)
auto append_isa = [&](size_t line_number, const char* line) {
pcinfo_t pc{};
pc.addr = line_number + LINE_OFFSET;
pc.marker_id = 0;
pc.address = line_number + LINE_OFFSET;
pc.code_object_id = 0;
auto code = std::make_unique<CodeLine>();
code->code_line = std::make_shared<CodeLine::Instruction>();
@@ -176,7 +176,7 @@ TEST(att_decoder_waitcnt_test, gfx10)
for(size_t i = 0; i < isa_map.size(); i++)
{
wave_instruction_t inst{};
inst.pc.addr = i + LINE_OFFSET;
inst.pc.address = i + LINE_OFFSET;
insts.push_back(inst);
}
@@ -222,8 +222,8 @@ TEST(att_decoder_waitcnt_test, gfx12)
auto append_isa = [&](size_t line_number, const char* line) {
pcinfo_t pc{};
pc.addr = line_number + LINE_OFFSET;
pc.marker_id = 0;
pc.address = line_number + LINE_OFFSET;
pc.code_object_id = 0;
auto code = std::make_unique<CodeLine>();
code->code_line = std::make_shared<CodeLine::Instruction>();
@@ -296,7 +296,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
for(size_t i = 0; i < isa_map.size(); i++)
{
wave_instruction_t inst{};
inst.pc.addr = i + LINE_OFFSET;
inst.pc.address = i + LINE_OFFSET;
insts.push_back(inst);
}
@@ -347,7 +347,7 @@ TEST(att_decoder_waitcnt_test, fail_conditions)
for(size_t i = 0; i < 10; i++)
{
wave_instruction_t inst{};
inst.pc.addr = i + LINE_OFFSET;
inst.pc.address = i + LINE_OFFSET;
insts.push_back(inst);
}
+7 -4
Просмотреть файл
@@ -22,13 +22,13 @@
#pragma once
#include "att_lib_wrapper.hpp"
#include "code.hpp"
#include "filenames.hpp"
#include "perfcounter.hpp"
#include "waitcnt/analysis.hpp"
#include "wstates.hpp"
#include "att_lib_wrapper.hpp"
#include <map>
#include <unordered_map>
#include <vector>
@@ -50,19 +50,22 @@ public:
WaveConfig(int se_id,
std::shared_ptr<FilenameMgr>& _mgr,
std::shared_ptr<CodeFile>& _code,
std::shared_ptr<RealtimeTS>& _ts,
WavestateArray& _wstates)
: shader_engine(se_id)
, wstates(_wstates)
, code(_code)
, filemgr(_mgr)
, realtime(_ts)
{}
const int shader_engine;
WavestateArray wstates;
std::array<SIMD, SIMD_NUM> id_count{};
std::shared_ptr<CodeFile> code;
std::shared_ptr<FilenameMgr> filemgr;
std::shared_ptr<CodeFile> code{};
std::shared_ptr<FilenameMgr> filemgr{};
std::shared_ptr<RealtimeTS> realtime{};
std::map<pcinfo_t, KernelName> kernel_names{};
std::vector<occupancy_t> occupancy{};
+2 -2
Просмотреть файл
@@ -182,7 +182,7 @@ isa_callback(char* isa_instruction,
try
{
auto instruction = decoder->table.wlock(
[&](AddressTable& table) { return table.get(pc.marker_id, pc.addr); });
[&](AddressTable& table) { return table.get(pc.code_object_id, pc.address); });
if(!instruction) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT;
@@ -199,7 +199,7 @@ isa_callback(char* isa_instruction,
} catch(std::exception& e)
{
ROCP_CI_LOG(INFO) << pc.marker_id << ":" << pc.addr << ' ' << e.what();
ROCP_CI_LOG(INFO) << pc.code_object_id << ":" << pc.address << ' ' << e.what();
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR;
}
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS;
+40
Просмотреть файл
@@ -26,6 +26,8 @@ import sys
import pytest
import re
import os
import glob
import json
def test_json_data(json_data):
@@ -65,6 +67,44 @@ def test_code_object_memory(code_object_file_path, json_data, output_path):
assert found == True
def test_realtime_clock(output_path):
def verify_sorted(timestamps):
# Sort by shader_clock (index 0)
timestamps_sorted = sorted(timestamps, key=lambda ts: ts[0])
# Ensure realtime clock is non descreasing
assert all(
curr[1] >= prev[1]
for prev, curr in zip(timestamps_sorted, timestamps_sorted[1:])
)
def verify_gfxclock(timestamps, rt_frequency):
delta_shader_clock = timestamps[-1][0] - timestamps[0][0]
delta_realtime_ts = timestamps[-1][1] - timestamps[0][1]
gfxclock = rt_frequency * delta_shader_clock / delta_realtime_ts
# gfxclock must be positive
assert gfxclock > 0
# gfxclock must be <10GHz
assert gfxclock < 1e10
pattern = os.path.join(output_path, "ui_output_*", "realtime.json")
for rt_file in glob.glob(pattern):
with open(rt_file, "r", encoding="utf-8") as f:
json_file = json.load(f)
frequency = json_file["metadata"]["frequency"]
# frequency = 0 means aqlprofile is not instrumented
if frequency > 0:
for key, value in json_file.items():
# Exclude metadata and single-clock timestamps
if "metadata" not in key and len(value) >= 2:
verify_sorted(value)
verify_gfxclock(value, frequency)
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)