SWDEV-540648: Adding realtime clock to v3 tool. Update decoder header. (#666)
* SWDEV-540648: Adding realtime clock to v3 tool. Update header for decoder. * Adding tests * Review comments * Review comment
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
4e9acd492d
Коммит
9849073836
@@ -26,6 +26,7 @@
|
||||
#endif
|
||||
|
||||
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
@@ -78,15 +79,6 @@ namespace Results
|
||||
{
|
||||
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
|
||||
|
||||
struct address_sort_t
|
||||
{
|
||||
bool operator()(const pcinfo_t& a, const pcinfo_t& b) const
|
||||
{
|
||||
if(a.marker_id == b.marker_id) return a.addr < b.addr;
|
||||
return a.marker_id < b.marker_id;
|
||||
}
|
||||
};
|
||||
|
||||
struct Latency
|
||||
{
|
||||
uint64_t latency{0};
|
||||
@@ -94,7 +86,7 @@ struct Latency
|
||||
};
|
||||
|
||||
// Maps address to latency
|
||||
using LatencyTable = std::map<rocprofiler_thread_trace_decoder_pc_t, Latency, address_sort_t>;
|
||||
using LatencyTable = std::map<rocprofiler_thread_trace_decoder_pc_t, Latency>;
|
||||
// Used to disassemble instructions at (id, vaddr) pair
|
||||
using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate;
|
||||
|
||||
@@ -135,7 +127,7 @@ gen_output_stream()
|
||||
{
|
||||
auto& addr = sorted.at(i).first;
|
||||
auto& latency = sorted.at(i).second;
|
||||
auto inst = table->get(addr.marker_id, addr.addr);
|
||||
auto inst = table->get(addr.code_object_id, addr.address);
|
||||
|
||||
auto comment = inst->comment;
|
||||
size_t pos = comment.rfind('/');
|
||||
@@ -309,6 +301,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */)
|
||||
// This is set by ctests: TODO: move to client.cpp
|
||||
// If nullptr, searches rocprofiler-sdk install location
|
||||
const char* lib_path = std::getenv("ROCPROFILER_TRACE_DECODER_LIB_PATH");
|
||||
if(lib_path == nullptr) lib_path = "/opt/rocm/lib";
|
||||
|
||||
DECODER_CALL(rocprofiler_thread_trace_decoder_create(&Decoder::decoder, lib_path));
|
||||
|
||||
|
||||
@@ -150,6 +150,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_ins
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_dimension_info_t&)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_version_triplet_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t)
|
||||
ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_pc_t)
|
||||
|
||||
// definitions of operator==
|
||||
ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
@@ -226,6 +227,12 @@ operator==(rocprofiler_version_triplet_t lhs, rocprofiler_version_triplet_t rhs)
|
||||
return std::tie(lhs.major, lhs.minor, lhs.patch) == std::tie(rhs.major, rhs.minor, rhs.patch);
|
||||
}
|
||||
|
||||
inline bool
|
||||
operator==(rocprofiler_thread_trace_decoder_pc_t lhs, rocprofiler_thread_trace_decoder_pc_t rhs)
|
||||
{
|
||||
return std::tie(lhs.code_object_id, lhs.address) == std::tie(rhs.code_object_id, rhs.address);
|
||||
}
|
||||
|
||||
// definitions of operator!=
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t)
|
||||
@@ -245,6 +252,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_region_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_version_triplet_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_pc_t)
|
||||
|
||||
// definitions of operator<
|
||||
ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t)
|
||||
@@ -321,6 +329,12 @@ operator<(rocprofiler_version_triplet_t lhs, rocprofiler_version_triplet_t rhs)
|
||||
return std::tie(lhs.major, lhs.minor, lhs.patch) < std::tie(rhs.major, rhs.minor, rhs.patch);
|
||||
}
|
||||
|
||||
inline bool
|
||||
operator<(rocprofiler_thread_trace_decoder_pc_t lhs, rocprofiler_thread_trace_decoder_pc_t rhs)
|
||||
{
|
||||
return std::tie(lhs.code_object_id, lhs.address) < std::tie(rhs.code_object_id, rhs.address);
|
||||
}
|
||||
|
||||
// definitions of operator>, operator<=, operator>=
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t)
|
||||
@@ -340,6 +354,7 @@ ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_region_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_amd_memory_pool_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_version_triplet_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t)
|
||||
ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_pc_t)
|
||||
|
||||
// cleanup defines
|
||||
#undef ROCPROFILER_CXX_DECLARE_OPERATORS
|
||||
|
||||
+5
-5
@@ -49,8 +49,8 @@ typedef enum rocprofiler_thread_trace_decoder_info_t
|
||||
*/
|
||||
typedef struct rocprofiler_thread_trace_decoder_pc_t
|
||||
{
|
||||
size_t addr; ///< Memory address (marker_id == 0), or ELF vaddr (marker_id != 0).
|
||||
size_t marker_id; ///< Code object load ID. Zero if no code object was found.
|
||||
uint64_t address; ///< Address (code_object_id == 0), or ELF vaddr (code_object_id != 0)
|
||||
uint64_t code_object_id; ///< Zero if no code object was found.
|
||||
} rocprofiler_thread_trace_decoder_pc_t;
|
||||
|
||||
/**
|
||||
@@ -163,8 +163,8 @@ typedef struct rocprofiler_thread_trace_decoder_wave_t
|
||||
int64_t begin_time; ///< Wave begin time. Should match occupancy event wave start.
|
||||
int64_t end_time; ///< Wave end time. Should match occupancy event wave end.
|
||||
|
||||
size_t timeline_size; ///< timeline_array size
|
||||
size_t instructions_size; ///< instructions_array size
|
||||
uint64_t timeline_size; ///< timeline_array size
|
||||
uint64_t instructions_size; ///< instructions_array size
|
||||
rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; ///< wave state change events
|
||||
rocprofiler_thread_trace_decoder_inst_t* instructions_array; ///< Instructions executed
|
||||
} rocprofiler_thread_trace_decoder_wave_t;
|
||||
@@ -217,7 +217,7 @@ typedef struct rocprofiler_thread_trace_decoder_shaderdata_t
|
||||
*/
|
||||
typedef enum rocprofiler_thread_trace_decoder_record_type_t
|
||||
{
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type size_t
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type uint64_t
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t*
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t*
|
||||
ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t*
|
||||
|
||||
@@ -51,6 +51,7 @@ ATTFileMgr::ATTFileMgr(Fspath _dir,
|
||||
table = std::make_shared<AddressTable>();
|
||||
codefile = std::make_shared<CodeFile>(dir, table);
|
||||
filenames = std::make_shared<FilenameMgr>(dir);
|
||||
realtime = std::make_shared<RealtimeTS>(dir);
|
||||
|
||||
for(size_t i = 0; i < ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST; i++)
|
||||
wstates.at(i) = std::make_shared<WstatesFile>(i, dir);
|
||||
@@ -97,7 +98,7 @@ ATTFileMgr::addDecoder(const char* filepath, uint64_t id, uint64_t load_addr, ui
|
||||
void
|
||||
ATTFileMgr::parseShader(int se_id, std::vector<char>& data)
|
||||
{
|
||||
WaveConfig config(se_id, filenames, codefile, wstates);
|
||||
WaveConfig config(se_id, filenames, codefile, realtime, wstates);
|
||||
ToolData tooldata(data, config, decoder);
|
||||
|
||||
if(!config.occupancy.empty()) occupancy.emplace(se_id, std::move(config.occupancy));
|
||||
|
||||
@@ -92,6 +92,7 @@ public:
|
||||
|
||||
std::shared_ptr<class CodeFile> codefile{nullptr};
|
||||
std::shared_ptr<class FilenameMgr> filenames{nullptr};
|
||||
std::shared_ptr<class RealtimeTS> realtime{nullptr};
|
||||
std::shared_ptr<AddressTable> table{nullptr};
|
||||
std::map<size_t, std::vector<occupancy_t>> occupancy{};
|
||||
std::vector<uint64_t> codeobjs_to_delete{};
|
||||
|
||||
@@ -72,8 +72,7 @@ CodeFile::~CodeFile()
|
||||
vec.end(),
|
||||
[](const std::pair<pcinfo_t, std::unique_ptr<CodeLine>>& a,
|
||||
const std::pair<pcinfo_t, std::unique_ptr<CodeLine>>& b) {
|
||||
if(a.first.marker_id == b.first.marker_id) return a.first.addr < b.first.addr;
|
||||
return a.first.marker_id < b.first.marker_id;
|
||||
return a.first < b.first;
|
||||
});
|
||||
|
||||
std::stringstream ofs;
|
||||
@@ -92,8 +91,8 @@ CodeFile::~CodeFile()
|
||||
if(kernel_names.find(pc) != kernel_names.end())
|
||||
{
|
||||
csv_encoder::write_row(ofs,
|
||||
pc.marker_id,
|
||||
pc.addr,
|
||||
pc.code_object_id,
|
||||
pc.address,
|
||||
"; " + kernel_names.at(pc).name,
|
||||
0,
|
||||
0,
|
||||
@@ -102,8 +101,8 @@ CodeFile::~CodeFile()
|
||||
kernel_names.at(pc).demangled);
|
||||
}
|
||||
csv_encoder::write_row(ofs,
|
||||
pc.marker_id,
|
||||
pc.addr,
|
||||
pc.code_object_id,
|
||||
pc.address,
|
||||
line->code_line->inst,
|
||||
line->hitcount,
|
||||
line->latency,
|
||||
@@ -138,16 +137,16 @@ CodeFile::~CodeFile()
|
||||
{
|
||||
std::stringstream code;
|
||||
code << "[\"; " << kernel_names.at(line.first).name << "\",0," << (isa.line_number - 1)
|
||||
<< ",\"" << kernel_names.at(line.first).demangled << "\"," << line.first.marker_id
|
||||
<< "," << line.first.addr << ",0,0,0,0]";
|
||||
<< ",\"" << kernel_names.at(line.first).demangled << "\","
|
||||
<< line.first.code_object_id << "," << line.first.address << ",0,0,0,0]";
|
||||
jcode.push_back(nlohmann::json::parse(code.str()));
|
||||
}
|
||||
|
||||
std::stringstream code;
|
||||
code << "[\"" << isa.code_line->inst << "\",0," << isa.line_number << ",\""
|
||||
<< isa.code_line->comment << "\"," << line.first.marker_id << "," << line.first.addr
|
||||
<< "," << isa.hitcount << "," << isa.latency << "," << isa.stall << "," << isa.idle
|
||||
<< "]";
|
||||
<< isa.code_line->comment << "\"," << line.first.code_object_id << ","
|
||||
<< line.first.address << "," << isa.hitcount << "," << isa.latency << "," << isa.stall
|
||||
<< "," << isa.idle << "]";
|
||||
|
||||
jcode.push_back(nlohmann::json::parse(code.str()));
|
||||
|
||||
|
||||
@@ -81,10 +81,10 @@ OccupancyFile(const Fspath& dir,
|
||||
std::stringstream ss;
|
||||
try
|
||||
{
|
||||
ss << table->getSymbolMap(pc.marker_id).at(pc.addr).name;
|
||||
ss << table->getSymbolMap(pc.code_object_id).at(pc.address).name;
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
ss << pc.marker_id << " / 0x" << std::hex << pc.addr << std::dec;
|
||||
ss << pc.code_object_id << " / 0x" << std::hex << pc.address << std::dec;
|
||||
}
|
||||
jocc["dispatches"][std::to_string(id)] = ss.str();
|
||||
}
|
||||
|
||||
@@ -58,5 +58,42 @@ PerfcounterFile(WaveConfig& config, const perfevent_t* events, size_t event_coun
|
||||
std::string filename = "se" + std::to_string(config.shader_engine) + "_perfcounter.json";
|
||||
OutputFile(config.filemgr->dir / filename) << json;
|
||||
}
|
||||
|
||||
void
|
||||
RealtimeTS::add(int shader, const realtime_t* events, size_t event_count)
|
||||
{
|
||||
if(event_count == 0) return;
|
||||
|
||||
auto& storage = aggregated[shader];
|
||||
storage.insert(storage.end(), events, events + event_count);
|
||||
}
|
||||
|
||||
RealtimeTS::~RealtimeTS()
|
||||
{
|
||||
if(aggregated.empty()) return;
|
||||
|
||||
nlohmann::json json;
|
||||
json["metadata"]["descriptor"] = "[gfx_clock, realtime_clock]";
|
||||
json["metadata"]["frequency"] = frequency;
|
||||
|
||||
for(auto& [shader, realtime] : aggregated)
|
||||
{
|
||||
if(realtime.empty()) continue;
|
||||
|
||||
nlohmann::json data;
|
||||
for(auto& event : realtime)
|
||||
{
|
||||
nlohmann::json json_event;
|
||||
json_event.push_back(event.shader_clock);
|
||||
json_event.push_back(event.realtime_clock);
|
||||
|
||||
data.push_back(json_event);
|
||||
}
|
||||
json["SE" + std::to_string(shader)] = data;
|
||||
}
|
||||
|
||||
OutputFile(this->path) << json;
|
||||
}
|
||||
|
||||
} // namespace att_wrapper
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -22,6 +22,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "att_lib_wrapper.hpp"
|
||||
|
||||
namespace rocprofiler
|
||||
@@ -30,5 +33,24 @@ namespace att_wrapper
|
||||
{
|
||||
void
|
||||
PerfcounterFile(class WaveConfig& config, const perfevent_t* events, size_t event_count);
|
||||
|
||||
class RealtimeTS
|
||||
{
|
||||
public:
|
||||
using realtime_vec_t = std::vector<realtime_t>;
|
||||
using shader_map_t = std::unordered_map<int, realtime_vec_t>;
|
||||
|
||||
RealtimeTS(const Fspath& dir)
|
||||
: path(dir / "realtime.json"){};
|
||||
~RealtimeTS();
|
||||
|
||||
void add(int shader, const realtime_t* events, size_t event_count);
|
||||
uint64_t frequency{0};
|
||||
|
||||
private:
|
||||
const Fspath path;
|
||||
shader_map_t aggregated{}; ///< Stores all RT values so far.
|
||||
};
|
||||
|
||||
} // namespace att_wrapper
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -68,6 +68,17 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
|
||||
{
|
||||
PerfcounterFile(tool.config, static_cast<perfevent_t*>(trace_events), trace_size);
|
||||
}
|
||||
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY)
|
||||
{
|
||||
if(tool.config.realtime && trace_size != 0)
|
||||
tool.config.realtime->frequency = *static_cast<uint64_t*>(trace_events);
|
||||
}
|
||||
else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME)
|
||||
{
|
||||
if(tool.config.realtime && trace_size != 0)
|
||||
tool.config.realtime->add(
|
||||
tool.config.shader_engine, static_cast<realtime_t*>(trace_events), trace_size);
|
||||
}
|
||||
|
||||
if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return;
|
||||
|
||||
@@ -80,7 +91,7 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id,
|
||||
for(size_t j = 0; j < wave.instructions_size; j++)
|
||||
{
|
||||
const auto& inst = wave.instructions_array[j];
|
||||
if(inst.pc.marker_id == 0 && inst.pc.addr == 0) continue;
|
||||
if(inst.pc.code_object_id == 0 && inst.pc.address == 0) continue;
|
||||
|
||||
try
|
||||
{
|
||||
@@ -137,26 +148,27 @@ ToolData::get(pcinfo_t _pc)
|
||||
if(isa_map.find(_pc) != isa_map.end()) return *isa_map.at(_pc);
|
||||
|
||||
// Attempt to disassemble full kernel
|
||||
if(_pc.marker_id != 0u) try
|
||||
if(_pc.code_object_id != 0u) try
|
||||
{
|
||||
rocprofiler::sdk::codeobj::segment::CodeobjTableTranslator symbol_table;
|
||||
for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.marker_id))
|
||||
symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.marker_id});
|
||||
for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.code_object_id))
|
||||
symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.code_object_id});
|
||||
|
||||
auto addr_range = symbol_table.find_codeobj_in_range(_pc.addr);
|
||||
auto addr_range = symbol_table.find_codeobj_in_range(_pc.address);
|
||||
try
|
||||
{
|
||||
auto symbol = cfile->table->getSymbolMap(_pc.marker_id).at(addr_range.addr);
|
||||
auto symbol = cfile->table->getSymbolMap(_pc.code_object_id).at(addr_range.addr);
|
||||
auto pair = KernelName{symbol.name, demangle(symbol.name)};
|
||||
cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.marker_id}, pair);
|
||||
cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.code_object_id}, pair);
|
||||
} catch(...)
|
||||
{
|
||||
ROCP_INFO << "Missing kernelSymbol at " << _pc.marker_id << ':' << addr_range.addr;
|
||||
ROCP_INFO << "Missing kernelSymbol at " << _pc.code_object_id << ':'
|
||||
<< addr_range.addr;
|
||||
}
|
||||
|
||||
for(auto addr = addr_range.addr; addr < addr_range.addr + addr_range.size;)
|
||||
{
|
||||
pcinfo_t info{.addr = addr, .marker_id = addr_range.id};
|
||||
pcinfo_t info{.address = addr, .code_object_id = addr_range.id};
|
||||
auto& cline = *(isa_map.emplace(info, std::make_unique<CodeLine>()).first->second);
|
||||
|
||||
cline.line_number = isa_map.size() + cfile->kernel_names.size() - 1;
|
||||
@@ -176,7 +188,7 @@ ToolData::get(pcinfo_t _pc)
|
||||
cline.line_number = isa_map.size();
|
||||
cfile->line_numbers[_pc] = cline.line_number;
|
||||
|
||||
cline.code_line = cfile->table->get(_pc.marker_id, _pc.addr);
|
||||
cline.code_line = cfile->table->get(_pc.code_object_id, _pc.address);
|
||||
|
||||
return cline;
|
||||
}
|
||||
|
||||
@@ -29,45 +29,33 @@
|
||||
|
||||
#include <rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h>
|
||||
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include "lib/common/logging.hpp"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
|
||||
using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t;
|
||||
using wave_t = rocprofiler_thread_trace_decoder_wave_t;
|
||||
using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t;
|
||||
using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t;
|
||||
|
||||
template <>
|
||||
struct std::hash<pcinfo_t>
|
||||
struct std::hash<rocprofiler_thread_trace_decoder_pc_t>
|
||||
{
|
||||
public:
|
||||
size_t operator()(const pcinfo_t& a) const
|
||||
size_t operator()(const rocprofiler_thread_trace_decoder_pc_t& a) const noexcept
|
||||
{
|
||||
return (a.marker_id << 32) ^ (a.marker_id >> 32) ^ a.addr;
|
||||
return (a.code_object_id << 32) ^ (a.code_object_id >> 32) ^ a.address;
|
||||
}
|
||||
};
|
||||
|
||||
inline bool
|
||||
operator==(const pcinfo_t& a, const pcinfo_t& b)
|
||||
{
|
||||
return a.addr == b.addr && a.marker_id == b.marker_id;
|
||||
};
|
||||
|
||||
inline bool
|
||||
operator<(const pcinfo_t& a, const pcinfo_t& b)
|
||||
{
|
||||
if(a.marker_id == b.marker_id) return a.addr < b.addr;
|
||||
return a.marker_id < b.marker_id;
|
||||
};
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace att_wrapper
|
||||
{
|
||||
using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t;
|
||||
using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t;
|
||||
using wave_t = rocprofiler_thread_trace_decoder_wave_t;
|
||||
using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t;
|
||||
using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t;
|
||||
using realtime_t = rocprofiler_thread_trace_decoder_realtime_t;
|
||||
|
||||
class GlobalDefs
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -127,7 +127,7 @@ WaitcntList::gfx10_construct(const wave_t& wave, isa_map_t& isa_map)
|
||||
for(size_t i = 0; i < wave.instructions_size; i++)
|
||||
{
|
||||
auto& event = wave.instructions_array[i];
|
||||
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
|
||||
if(event.pc.code_object_id == 0 && event.pc.address == 0) continue;
|
||||
|
||||
auto it = isa_map.find(event.pc);
|
||||
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
|
||||
|
||||
@@ -162,7 +162,7 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
|
||||
for(size_t i = 0; i < wave.instructions_size; i++)
|
||||
{
|
||||
auto& event = wave.instructions_array[i];
|
||||
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
|
||||
if(event.pc.code_object_id == 0 && event.pc.address == 0) continue;
|
||||
|
||||
auto it = isa_map.find(event.pc);
|
||||
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
|
||||
|
||||
@@ -90,7 +90,7 @@ WaitcntList::gfx9_construct(const wave_t& wave, isa_map_t& isa_map)
|
||||
for(size_t i = 0; i < wave.instructions_size; i++)
|
||||
{
|
||||
auto& event = wave.instructions_array[i];
|
||||
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
|
||||
if(event.pc.code_object_id == 0 && event.pc.address == 0) continue;
|
||||
|
||||
auto it = isa_map.find(event.pc);
|
||||
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
|
||||
|
||||
+10
-10
@@ -44,8 +44,8 @@ TEST(att_decoder_waitcnt_test, gfx9)
|
||||
|
||||
auto append_isa = [&](size_t line_number, const char* line) {
|
||||
pcinfo_t pc{};
|
||||
pc.addr = line_number + LINE_OFFSET;
|
||||
pc.marker_id = 0;
|
||||
pc.address = line_number + LINE_OFFSET;
|
||||
pc.code_object_id = 0;
|
||||
|
||||
auto code = std::make_unique<CodeLine>();
|
||||
code->code_line = std::make_shared<CodeLine::Instruction>();
|
||||
@@ -86,7 +86,7 @@ TEST(att_decoder_waitcnt_test, gfx9)
|
||||
for(size_t i = 0; i < isa_map.size(); i++)
|
||||
{
|
||||
wave_instruction_t inst{};
|
||||
inst.pc.addr = i + LINE_OFFSET;
|
||||
inst.pc.address = i + LINE_OFFSET;
|
||||
insts.push_back(inst);
|
||||
}
|
||||
}
|
||||
@@ -129,8 +129,8 @@ TEST(att_decoder_waitcnt_test, gfx10)
|
||||
|
||||
auto append_isa = [&](size_t line_number, const char* line) {
|
||||
pcinfo_t pc{};
|
||||
pc.addr = line_number + LINE_OFFSET;
|
||||
pc.marker_id = 0;
|
||||
pc.address = line_number + LINE_OFFSET;
|
||||
pc.code_object_id = 0;
|
||||
|
||||
auto code = std::make_unique<CodeLine>();
|
||||
code->code_line = std::make_shared<CodeLine::Instruction>();
|
||||
@@ -176,7 +176,7 @@ TEST(att_decoder_waitcnt_test, gfx10)
|
||||
for(size_t i = 0; i < isa_map.size(); i++)
|
||||
{
|
||||
wave_instruction_t inst{};
|
||||
inst.pc.addr = i + LINE_OFFSET;
|
||||
inst.pc.address = i + LINE_OFFSET;
|
||||
insts.push_back(inst);
|
||||
}
|
||||
|
||||
@@ -222,8 +222,8 @@ TEST(att_decoder_waitcnt_test, gfx12)
|
||||
|
||||
auto append_isa = [&](size_t line_number, const char* line) {
|
||||
pcinfo_t pc{};
|
||||
pc.addr = line_number + LINE_OFFSET;
|
||||
pc.marker_id = 0;
|
||||
pc.address = line_number + LINE_OFFSET;
|
||||
pc.code_object_id = 0;
|
||||
|
||||
auto code = std::make_unique<CodeLine>();
|
||||
code->code_line = std::make_shared<CodeLine::Instruction>();
|
||||
@@ -296,7 +296,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
|
||||
for(size_t i = 0; i < isa_map.size(); i++)
|
||||
{
|
||||
wave_instruction_t inst{};
|
||||
inst.pc.addr = i + LINE_OFFSET;
|
||||
inst.pc.address = i + LINE_OFFSET;
|
||||
insts.push_back(inst);
|
||||
}
|
||||
|
||||
@@ -347,7 +347,7 @@ TEST(att_decoder_waitcnt_test, fail_conditions)
|
||||
for(size_t i = 0; i < 10; i++)
|
||||
{
|
||||
wave_instruction_t inst{};
|
||||
inst.pc.addr = i + LINE_OFFSET;
|
||||
inst.pc.address = i + LINE_OFFSET;
|
||||
insts.push_back(inst);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,13 +22,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "att_lib_wrapper.hpp"
|
||||
#include "code.hpp"
|
||||
#include "filenames.hpp"
|
||||
#include "perfcounter.hpp"
|
||||
#include "waitcnt/analysis.hpp"
|
||||
#include "wstates.hpp"
|
||||
|
||||
#include "att_lib_wrapper.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@@ -50,19 +50,22 @@ public:
|
||||
WaveConfig(int se_id,
|
||||
std::shared_ptr<FilenameMgr>& _mgr,
|
||||
std::shared_ptr<CodeFile>& _code,
|
||||
std::shared_ptr<RealtimeTS>& _ts,
|
||||
WavestateArray& _wstates)
|
||||
: shader_engine(se_id)
|
||||
, wstates(_wstates)
|
||||
, code(_code)
|
||||
, filemgr(_mgr)
|
||||
, realtime(_ts)
|
||||
{}
|
||||
|
||||
const int shader_engine;
|
||||
WavestateArray wstates;
|
||||
|
||||
std::array<SIMD, SIMD_NUM> id_count{};
|
||||
std::shared_ptr<CodeFile> code;
|
||||
std::shared_ptr<FilenameMgr> filemgr;
|
||||
std::shared_ptr<CodeFile> code{};
|
||||
std::shared_ptr<FilenameMgr> filemgr{};
|
||||
std::shared_ptr<RealtimeTS> realtime{};
|
||||
|
||||
std::map<pcinfo_t, KernelName> kernel_names{};
|
||||
std::vector<occupancy_t> occupancy{};
|
||||
|
||||
@@ -182,7 +182,7 @@ isa_callback(char* isa_instruction,
|
||||
try
|
||||
{
|
||||
auto instruction = decoder->table.wlock(
|
||||
[&](AddressTable& table) { return table.get(pc.marker_id, pc.addr); });
|
||||
[&](AddressTable& table) { return table.get(pc.code_object_id, pc.address); });
|
||||
|
||||
if(!instruction) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
|
||||
@@ -199,7 +199,7 @@ isa_callback(char* isa_instruction,
|
||||
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
ROCP_CI_LOG(INFO) << pc.marker_id << ":" << pc.addr << ' ' << e.what();
|
||||
ROCP_CI_LOG(INFO) << pc.code_object_id << ":" << pc.address << ' ' << e.what();
|
||||
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR;
|
||||
}
|
||||
return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS;
|
||||
|
||||
@@ -26,6 +26,8 @@ import sys
|
||||
import pytest
|
||||
import re
|
||||
import os
|
||||
import glob
|
||||
import json
|
||||
|
||||
|
||||
def test_json_data(json_data):
|
||||
@@ -65,6 +67,44 @@ def test_code_object_memory(code_object_file_path, json_data, output_path):
|
||||
assert found == True
|
||||
|
||||
|
||||
def test_realtime_clock(output_path):
|
||||
|
||||
def verify_sorted(timestamps):
|
||||
|
||||
# Sort by shader_clock (index 0)
|
||||
timestamps_sorted = sorted(timestamps, key=lambda ts: ts[0])
|
||||
# Ensure realtime clock is non descreasing
|
||||
assert all(
|
||||
curr[1] >= prev[1]
|
||||
for prev, curr in zip(timestamps_sorted, timestamps_sorted[1:])
|
||||
)
|
||||
|
||||
def verify_gfxclock(timestamps, rt_frequency):
|
||||
|
||||
delta_shader_clock = timestamps[-1][0] - timestamps[0][0]
|
||||
delta_realtime_ts = timestamps[-1][1] - timestamps[0][1]
|
||||
gfxclock = rt_frequency * delta_shader_clock / delta_realtime_ts
|
||||
|
||||
# gfxclock must be positive
|
||||
assert gfxclock > 0
|
||||
# gfxclock must be <10GHz
|
||||
assert gfxclock < 1e10
|
||||
|
||||
pattern = os.path.join(output_path, "ui_output_*", "realtime.json")
|
||||
for rt_file in glob.glob(pattern):
|
||||
with open(rt_file, "r", encoding="utf-8") as f:
|
||||
json_file = json.load(f)
|
||||
|
||||
frequency = json_file["metadata"]["frequency"]
|
||||
# frequency = 0 means aqlprofile is not instrumented
|
||||
if frequency > 0:
|
||||
for key, value in json_file.items():
|
||||
# Exclude metadata and single-clock timestamps
|
||||
if "metadata" not in key and len(value) >= 2:
|
||||
verify_sorted(value)
|
||||
verify_gfxclock(value, frequency)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
|
||||
Ссылка в новой задаче
Block a user