From 9849073836517172105408ad32caa7a4f42cf4ea Mon Sep 17 00:00:00 2001 From: Giovanni Lenzi Baraldi Date: Wed, 10 Sep 2025 12:39:27 +0200 Subject: [PATCH] SWDEV-540648: Adding realtime clock to v3 tool. Update decoder header. (#666) * SWDEV-540648: Adding realtime clock to v3 tool. Update header for decoder. * Adding tests * Review comments * Review comment --- .../samples/thread_trace/agent.cpp | 15 ++----- .../include/rocprofiler-sdk/cxx/operators.hpp | 15 +++++++ .../thread-trace/trace_decoder_types.h | 10 ++--- .../source/lib/att-tool/att_lib_wrapper.cpp | 3 +- .../source/lib/att-tool/att_lib_wrapper.hpp | 1 + .../source/lib/att-tool/code.cpp | 21 +++++----- .../source/lib/att-tool/occupancy.cpp | 4 +- .../source/lib/att-tool/perfcounter.cpp | 37 +++++++++++++++++ .../source/lib/att-tool/perfcounter.hpp | 22 ++++++++++ .../source/lib/att-tool/profile_interface.cpp | 32 ++++++++++----- .../source/lib/att-tool/util.hpp | 34 +++++----------- .../source/lib/att-tool/waitcnt/gfx10.cpp | 2 +- .../source/lib/att-tool/waitcnt/gfx12.cpp | 2 +- .../source/lib/att-tool/waitcnt/gfx9.cpp | 2 +- .../tests/att_decoder_waitcnt_test.cpp | 20 +++++----- .../source/lib/att-tool/wave.hpp | 11 +++-- .../rocprofiler-sdk/thread_trace/decode.cpp | 4 +- .../advanced-thread-trace/validate.py | 40 +++++++++++++++++++ 18 files changed, 193 insertions(+), 82 deletions(-) diff --git a/projects/rocprofiler-sdk/samples/thread_trace/agent.cpp b/projects/rocprofiler-sdk/samples/thread_trace/agent.cpp index e8d9240e29..b2ccb49c20 100644 --- a/projects/rocprofiler-sdk/samples/thread_trace/agent.cpp +++ b/projects/rocprofiler-sdk/samples/thread_trace/agent.cpp @@ -26,6 +26,7 @@ #endif #include +#include #include #include @@ -78,15 +79,6 @@ namespace Results { using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t; -struct address_sort_t -{ - bool operator()(const pcinfo_t& a, const pcinfo_t& b) const - { - if(a.marker_id == b.marker_id) return a.addr < b.addr; - return a.marker_id < b.marker_id; - } -}; - struct Latency { uint64_t latency{0}; @@ -94,7 +86,7 @@ struct Latency }; // Maps address to latency -using LatencyTable = std::map; +using LatencyTable = std::map; // Used to disassemble instructions at (id, vaddr) pair using AddressTable = rocprofiler::sdk::codeobj::disassembly::CodeobjAddressTranslate; @@ -135,7 +127,7 @@ gen_output_stream() { auto& addr = sorted.at(i).first; auto& latency = sorted.at(i).second; - auto inst = table->get(addr.marker_id, addr.addr); + auto inst = table->get(addr.code_object_id, addr.address); auto comment = inst->comment; size_t pos = comment.rfind('/'); @@ -309,6 +301,7 @@ tool_init(rocprofiler_client_finalize_t /* fini_func */, void* /* tool_data */) // This is set by ctests: TODO: move to client.cpp // If nullptr, searches rocprofiler-sdk install location const char* lib_path = std::getenv("ROCPROFILER_TRACE_DECODER_LIB_PATH"); + if(lib_path == nullptr) lib_path = "/opt/rocm/lib"; DECODER_CALL(rocprofiler_thread_trace_decoder_create(&Decoder::decoder, lib_path)); diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/operators.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/operators.hpp index d09956e124..75f41d2f4f 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/operators.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/operators.hpp @@ -150,6 +150,7 @@ ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_record_dimension_ins ROCPROFILER_CXX_DECLARE_OPERATORS(const rocprofiler_counter_dimension_info_t&) ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_version_triplet_t) ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t) +ROCPROFILER_CXX_DECLARE_OPERATORS(rocprofiler_thread_trace_decoder_pc_t) // definitions of operator== ROCPROFILER_CXX_DEFINE_EQ_HANDLE_OPERATOR(rocprofiler_context_id_t) @@ -226,6 +227,12 @@ operator==(rocprofiler_version_triplet_t lhs, rocprofiler_version_triplet_t rhs) return std::tie(lhs.major, lhs.minor, lhs.patch) == std::tie(rhs.major, rhs.minor, rhs.patch); } +inline bool +operator==(rocprofiler_thread_trace_decoder_pc_t lhs, rocprofiler_thread_trace_decoder_pc_t rhs) +{ + return std::tie(lhs.code_object_id, lhs.address) == std::tie(rhs.code_object_id, rhs.address); +} + // definitions of operator!= ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_context_id_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_address_t) @@ -245,6 +252,7 @@ ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_region_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(hsa_amd_memory_pool_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_version_triplet_t) ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_id_t) +ROCPROFILER_CXX_DEFINE_NE_OPERATOR(rocprofiler_thread_trace_decoder_pc_t) // definitions of operator< ROCPROFILER_CXX_DEFINE_LT_HANDLE_OPERATOR(rocprofiler_context_id_t) @@ -321,6 +329,12 @@ operator<(rocprofiler_version_triplet_t lhs, rocprofiler_version_triplet_t rhs) return std::tie(lhs.major, lhs.minor, lhs.patch) < std::tie(rhs.major, rhs.minor, rhs.patch); } +inline bool +operator<(rocprofiler_thread_trace_decoder_pc_t lhs, rocprofiler_thread_trace_decoder_pc_t rhs) +{ + return std::tie(lhs.code_object_id, lhs.address) < std::tie(rhs.code_object_id, rhs.address); +} + // definitions of operator>, operator<=, operator>= ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_context_id_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_address_t) @@ -340,6 +354,7 @@ ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_region_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(hsa_amd_memory_pool_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_version_triplet_t) ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_id_t) +ROCPROFILER_CXX_DEFINE_COMPARE_OPERATORS(rocprofiler_thread_trace_decoder_pc_t) // cleanup defines #undef ROCPROFILER_CXX_DECLARE_OPERATORS diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h index 573aa3c90c..99145cdd13 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/experimental/thread-trace/trace_decoder_types.h @@ -49,8 +49,8 @@ typedef enum rocprofiler_thread_trace_decoder_info_t */ typedef struct rocprofiler_thread_trace_decoder_pc_t { - size_t addr; ///< Memory address (marker_id == 0), or ELF vaddr (marker_id != 0). - size_t marker_id; ///< Code object load ID. Zero if no code object was found. + uint64_t address; ///< Address (code_object_id == 0), or ELF vaddr (code_object_id != 0) + uint64_t code_object_id; ///< Zero if no code object was found. } rocprofiler_thread_trace_decoder_pc_t; /** @@ -163,8 +163,8 @@ typedef struct rocprofiler_thread_trace_decoder_wave_t int64_t begin_time; ///< Wave begin time. Should match occupancy event wave start. int64_t end_time; ///< Wave end time. Should match occupancy event wave end. - size_t timeline_size; ///< timeline_array size - size_t instructions_size; ///< instructions_array size + uint64_t timeline_size; ///< timeline_array size + uint64_t instructions_size; ///< instructions_array size rocprofiler_thread_trace_decoder_wave_state_t* timeline_array; ///< wave state change events rocprofiler_thread_trace_decoder_inst_t* instructions_array; ///< Instructions executed } rocprofiler_thread_trace_decoder_wave_t; @@ -217,7 +217,7 @@ typedef struct rocprofiler_thread_trace_decoder_shaderdata_t */ typedef enum rocprofiler_thread_trace_decoder_record_type_t { - ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type size_t + ROCPROFILER_THREAD_TRACE_DECODER_RECORD_GFXIP = 0, ///< Record is gfxip_major, type uint64_t ROCPROFILER_THREAD_TRACE_DECODER_RECORD_OCCUPANCY, ///< rocprofiler_thread_trace_decoder_occupancy_t* ROCPROFILER_THREAD_TRACE_DECODER_RECORD_PERFEVENT, ///< rocprofiler_thread_trace_decoder_perfevent_t* ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE, ///< rocprofiler_thread_trace_decoder_wave_t* diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.cpp index d07800b36f..aa56dfabdd 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.cpp @@ -51,6 +51,7 @@ ATTFileMgr::ATTFileMgr(Fspath _dir, table = std::make_shared(); codefile = std::make_shared(dir, table); filenames = std::make_shared(dir); + realtime = std::make_shared(dir); for(size_t i = 0; i < ROCPROFILER_THREAD_TRACE_DECODER_WSTATE_LAST; i++) wstates.at(i) = std::make_shared(i, dir); @@ -97,7 +98,7 @@ ATTFileMgr::addDecoder(const char* filepath, uint64_t id, uint64_t load_addr, ui void ATTFileMgr::parseShader(int se_id, std::vector& data) { - WaveConfig config(se_id, filenames, codefile, wstates); + WaveConfig config(se_id, filenames, codefile, realtime, wstates); ToolData tooldata(data, config, decoder); if(!config.occupancy.empty()) occupancy.emplace(se_id, std::move(config.occupancy)); diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.hpp index b5a8dcb493..e7e58c5991 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/att_lib_wrapper.hpp @@ -92,6 +92,7 @@ public: std::shared_ptr codefile{nullptr}; std::shared_ptr filenames{nullptr}; + std::shared_ptr realtime{nullptr}; std::shared_ptr table{nullptr}; std::map> occupancy{}; std::vector codeobjs_to_delete{}; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp index b1cb6ed77f..f7ba78b3df 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp @@ -72,8 +72,7 @@ CodeFile::~CodeFile() vec.end(), [](const std::pair>& a, const std::pair>& b) { - if(a.first.marker_id == b.first.marker_id) return a.first.addr < b.first.addr; - return a.first.marker_id < b.first.marker_id; + return a.first < b.first; }); std::stringstream ofs; @@ -92,8 +91,8 @@ CodeFile::~CodeFile() if(kernel_names.find(pc) != kernel_names.end()) { csv_encoder::write_row(ofs, - pc.marker_id, - pc.addr, + pc.code_object_id, + pc.address, "; " + kernel_names.at(pc).name, 0, 0, @@ -102,8 +101,8 @@ CodeFile::~CodeFile() kernel_names.at(pc).demangled); } csv_encoder::write_row(ofs, - pc.marker_id, - pc.addr, + pc.code_object_id, + pc.address, line->code_line->inst, line->hitcount, line->latency, @@ -138,16 +137,16 @@ CodeFile::~CodeFile() { std::stringstream code; code << "[\"; " << kernel_names.at(line.first).name << "\",0," << (isa.line_number - 1) - << ",\"" << kernel_names.at(line.first).demangled << "\"," << line.first.marker_id - << "," << line.first.addr << ",0,0,0,0]"; + << ",\"" << kernel_names.at(line.first).demangled << "\"," + << line.first.code_object_id << "," << line.first.address << ",0,0,0,0]"; jcode.push_back(nlohmann::json::parse(code.str())); } std::stringstream code; code << "[\"" << isa.code_line->inst << "\",0," << isa.line_number << ",\"" - << isa.code_line->comment << "\"," << line.first.marker_id << "," << line.first.addr - << "," << isa.hitcount << "," << isa.latency << "," << isa.stall << "," << isa.idle - << "]"; + << isa.code_line->comment << "\"," << line.first.code_object_id << "," + << line.first.address << "," << isa.hitcount << "," << isa.latency << "," << isa.stall + << "," << isa.idle << "]"; jcode.push_back(nlohmann::json::parse(code.str())); diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/occupancy.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/occupancy.cpp index 33a26a4543..214da17a15 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/occupancy.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/occupancy.cpp @@ -81,10 +81,10 @@ OccupancyFile(const Fspath& dir, std::stringstream ss; try { - ss << table->getSymbolMap(pc.marker_id).at(pc.addr).name; + ss << table->getSymbolMap(pc.code_object_id).at(pc.address).name; } catch(std::exception& e) { - ss << pc.marker_id << " / 0x" << std::hex << pc.addr << std::dec; + ss << pc.code_object_id << " / 0x" << std::hex << pc.address << std::dec; } jocc["dispatches"][std::to_string(id)] = ss.str(); } diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.cpp index 45dd8f24ea..73e6d52b35 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.cpp @@ -58,5 +58,42 @@ PerfcounterFile(WaveConfig& config, const perfevent_t* events, size_t event_coun std::string filename = "se" + std::to_string(config.shader_engine) + "_perfcounter.json"; OutputFile(config.filemgr->dir / filename) << json; } + +void +RealtimeTS::add(int shader, const realtime_t* events, size_t event_count) +{ + if(event_count == 0) return; + + auto& storage = aggregated[shader]; + storage.insert(storage.end(), events, events + event_count); +} + +RealtimeTS::~RealtimeTS() +{ + if(aggregated.empty()) return; + + nlohmann::json json; + json["metadata"]["descriptor"] = "[gfx_clock, realtime_clock]"; + json["metadata"]["frequency"] = frequency; + + for(auto& [shader, realtime] : aggregated) + { + if(realtime.empty()) continue; + + nlohmann::json data; + for(auto& event : realtime) + { + nlohmann::json json_event; + json_event.push_back(event.shader_clock); + json_event.push_back(event.realtime_clock); + + data.push_back(json_event); + } + json["SE" + std::to_string(shader)] = data; + } + + OutputFile(this->path) << json; +} + } // namespace att_wrapper } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.hpp index 15faf840c9..d5c02a72a4 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/perfcounter.hpp @@ -22,6 +22,9 @@ #pragma once +#include +#include +#include #include "att_lib_wrapper.hpp" namespace rocprofiler @@ -30,5 +33,24 @@ namespace att_wrapper { void PerfcounterFile(class WaveConfig& config, const perfevent_t* events, size_t event_count); + +class RealtimeTS +{ +public: + using realtime_vec_t = std::vector; + using shader_map_t = std::unordered_map; + + RealtimeTS(const Fspath& dir) + : path(dir / "realtime.json"){}; + ~RealtimeTS(); + + void add(int shader, const realtime_t* events, size_t event_count); + uint64_t frequency{0}; + +private: + const Fspath path; + shader_map_t aggregated{}; ///< Stores all RT values so far. +}; + } // namespace att_wrapper } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp index 1d50c422df..9e8045e99c 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp @@ -68,6 +68,17 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id, { PerfcounterFile(tool.config, static_cast(trace_events), trace_size); } + else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_RT_FREQUENCY) + { + if(tool.config.realtime && trace_size != 0) + tool.config.realtime->frequency = *static_cast(trace_events); + } + else if(trace_id == ROCPROFILER_THREAD_TRACE_DECODER_RECORD_REALTIME) + { + if(tool.config.realtime && trace_size != 0) + tool.config.realtime->add( + tool.config.shader_engine, static_cast(trace_events), trace_size); + } if(trace_id != ROCPROFILER_THREAD_TRACE_DECODER_RECORD_WAVE) return; @@ -80,7 +91,7 @@ get_trace_data(rocprofiler_thread_trace_decoder_record_type_t trace_id, for(size_t j = 0; j < wave.instructions_size; j++) { const auto& inst = wave.instructions_array[j]; - if(inst.pc.marker_id == 0 && inst.pc.addr == 0) continue; + if(inst.pc.code_object_id == 0 && inst.pc.address == 0) continue; try { @@ -137,26 +148,27 @@ ToolData::get(pcinfo_t _pc) if(isa_map.find(_pc) != isa_map.end()) return *isa_map.at(_pc); // Attempt to disassemble full kernel - if(_pc.marker_id != 0u) try + if(_pc.code_object_id != 0u) try { rocprofiler::sdk::codeobj::segment::CodeobjTableTranslator symbol_table; - for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.marker_id)) - symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.marker_id}); + for(auto& [vaddr, symbol] : cfile->table->getSymbolMap(_pc.code_object_id)) + symbol_table.insert({symbol.vaddr, symbol.mem_size, _pc.code_object_id}); - auto addr_range = symbol_table.find_codeobj_in_range(_pc.addr); + auto addr_range = symbol_table.find_codeobj_in_range(_pc.address); try { - auto symbol = cfile->table->getSymbolMap(_pc.marker_id).at(addr_range.addr); + auto symbol = cfile->table->getSymbolMap(_pc.code_object_id).at(addr_range.addr); auto pair = KernelName{symbol.name, demangle(symbol.name)}; - cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.marker_id}, pair); + cfile->kernel_names.emplace(pcinfo_t{addr_range.addr, _pc.code_object_id}, pair); } catch(...) { - ROCP_INFO << "Missing kernelSymbol at " << _pc.marker_id << ':' << addr_range.addr; + ROCP_INFO << "Missing kernelSymbol at " << _pc.code_object_id << ':' + << addr_range.addr; } for(auto addr = addr_range.addr; addr < addr_range.addr + addr_range.size;) { - pcinfo_t info{.addr = addr, .marker_id = addr_range.id}; + pcinfo_t info{.address = addr, .code_object_id = addr_range.id}; auto& cline = *(isa_map.emplace(info, std::make_unique()).first->second); cline.line_number = isa_map.size() + cfile->kernel_names.size() - 1; @@ -176,7 +188,7 @@ ToolData::get(pcinfo_t _pc) cline.line_number = isa_map.size(); cfile->line_numbers[_pc] = cline.line_number; - cline.code_line = cfile->table->get(_pc.marker_id, _pc.addr); + cline.code_line = cfile->table->get(_pc.code_object_id, _pc.address); return cline; } diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/util.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/util.hpp index a0220f608a..3c55abf4ed 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/util.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/util.hpp @@ -29,45 +29,33 @@ #include #include +#include #include "lib/common/logging.hpp" #include #include #include -using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t; -using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t; -using wave_t = rocprofiler_thread_trace_decoder_wave_t; -using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t; -using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t; - template <> -struct std::hash +struct std::hash { -public: - size_t operator()(const pcinfo_t& a) const + size_t operator()(const rocprofiler_thread_trace_decoder_pc_t& a) const noexcept { - return (a.marker_id << 32) ^ (a.marker_id >> 32) ^ a.addr; + return (a.code_object_id << 32) ^ (a.code_object_id >> 32) ^ a.address; } }; -inline bool -operator==(const pcinfo_t& a, const pcinfo_t& b) -{ - return a.addr == b.addr && a.marker_id == b.marker_id; -}; - -inline bool -operator<(const pcinfo_t& a, const pcinfo_t& b) -{ - if(a.marker_id == b.marker_id) return a.addr < b.addr; - return a.marker_id < b.marker_id; -}; - namespace rocprofiler { namespace att_wrapper { +using pcinfo_t = rocprofiler_thread_trace_decoder_pc_t; +using occupancy_t = rocprofiler_thread_trace_decoder_occupancy_t; +using wave_t = rocprofiler_thread_trace_decoder_wave_t; +using perfevent_t = rocprofiler_thread_trace_decoder_perfevent_t; +using wave_instruction_t = rocprofiler_thread_trace_decoder_inst_t; +using realtime_t = rocprofiler_thread_trace_decoder_realtime_t; + class GlobalDefs { public: diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp index a15ecf5aab..11e3f6f56f 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp @@ -127,7 +127,7 @@ WaitcntList::gfx10_construct(const wave_t& wave, isa_map_t& isa_map) for(size_t i = 0; i < wave.instructions_size; i++) { auto& event = wave.instructions_array[i]; - if(event.pc.marker_id == 0 && event.pc.addr == 0) continue; + if(event.pc.code_object_id == 0 && event.pc.address == 0) continue; auto it = isa_map.find(event.pc); if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty()) diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp index cf2081e54f..0474df55b9 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp @@ -162,7 +162,7 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map) for(size_t i = 0; i < wave.instructions_size; i++) { auto& event = wave.instructions_array[i]; - if(event.pc.marker_id == 0 && event.pc.addr == 0) continue; + if(event.pc.code_object_id == 0 && event.pc.address == 0) continue; auto it = isa_map.find(event.pc); if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty()) diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp index 2157586631..27b703f920 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp @@ -90,7 +90,7 @@ WaitcntList::gfx9_construct(const wave_t& wave, isa_map_t& isa_map) for(size_t i = 0; i < wave.instructions_size; i++) { auto& event = wave.instructions_array[i]; - if(event.pc.marker_id == 0 && event.pc.addr == 0) continue; + if(event.pc.code_object_id == 0 && event.pc.address == 0) continue; auto it = isa_map.find(event.pc); if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty()) diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp index a673e73152..2ea0378957 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp @@ -44,8 +44,8 @@ TEST(att_decoder_waitcnt_test, gfx9) auto append_isa = [&](size_t line_number, const char* line) { pcinfo_t pc{}; - pc.addr = line_number + LINE_OFFSET; - pc.marker_id = 0; + pc.address = line_number + LINE_OFFSET; + pc.code_object_id = 0; auto code = std::make_unique(); code->code_line = std::make_shared(); @@ -86,7 +86,7 @@ TEST(att_decoder_waitcnt_test, gfx9) for(size_t i = 0; i < isa_map.size(); i++) { wave_instruction_t inst{}; - inst.pc.addr = i + LINE_OFFSET; + inst.pc.address = i + LINE_OFFSET; insts.push_back(inst); } } @@ -129,8 +129,8 @@ TEST(att_decoder_waitcnt_test, gfx10) auto append_isa = [&](size_t line_number, const char* line) { pcinfo_t pc{}; - pc.addr = line_number + LINE_OFFSET; - pc.marker_id = 0; + pc.address = line_number + LINE_OFFSET; + pc.code_object_id = 0; auto code = std::make_unique(); code->code_line = std::make_shared(); @@ -176,7 +176,7 @@ TEST(att_decoder_waitcnt_test, gfx10) for(size_t i = 0; i < isa_map.size(); i++) { wave_instruction_t inst{}; - inst.pc.addr = i + LINE_OFFSET; + inst.pc.address = i + LINE_OFFSET; insts.push_back(inst); } @@ -222,8 +222,8 @@ TEST(att_decoder_waitcnt_test, gfx12) auto append_isa = [&](size_t line_number, const char* line) { pcinfo_t pc{}; - pc.addr = line_number + LINE_OFFSET; - pc.marker_id = 0; + pc.address = line_number + LINE_OFFSET; + pc.code_object_id = 0; auto code = std::make_unique(); code->code_line = std::make_shared(); @@ -296,7 +296,7 @@ TEST(att_decoder_waitcnt_test, gfx12) for(size_t i = 0; i < isa_map.size(); i++) { wave_instruction_t inst{}; - inst.pc.addr = i + LINE_OFFSET; + inst.pc.address = i + LINE_OFFSET; insts.push_back(inst); } @@ -347,7 +347,7 @@ TEST(att_decoder_waitcnt_test, fail_conditions) for(size_t i = 0; i < 10; i++) { wave_instruction_t inst{}; - inst.pc.addr = i + LINE_OFFSET; + inst.pc.address = i + LINE_OFFSET; insts.push_back(inst); } diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp index 6c427b5824..1a7b1adda6 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp @@ -22,13 +22,13 @@ #pragma once +#include "att_lib_wrapper.hpp" #include "code.hpp" #include "filenames.hpp" +#include "perfcounter.hpp" #include "waitcnt/analysis.hpp" #include "wstates.hpp" -#include "att_lib_wrapper.hpp" - #include #include #include @@ -50,19 +50,22 @@ public: WaveConfig(int se_id, std::shared_ptr& _mgr, std::shared_ptr& _code, + std::shared_ptr& _ts, WavestateArray& _wstates) : shader_engine(se_id) , wstates(_wstates) , code(_code) , filemgr(_mgr) + , realtime(_ts) {} const int shader_engine; WavestateArray wstates; std::array id_count{}; - std::shared_ptr code; - std::shared_ptr filemgr; + std::shared_ptr code{}; + std::shared_ptr filemgr{}; + std::shared_ptr realtime{}; std::map kernel_names{}; std::vector occupancy{}; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/decode.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/decode.cpp index f0e0980d7e..9af82a99e1 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/decode.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/decode.cpp @@ -182,7 +182,7 @@ isa_callback(char* isa_instruction, try { auto instruction = decoder->table.wlock( - [&](AddressTable& table) { return table.get(pc.marker_id, pc.addr); }); + [&](AddressTable& table) { return table.get(pc.code_object_id, pc.address); }); if(!instruction) return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR_INVALID_ARGUMENT; @@ -199,7 +199,7 @@ isa_callback(char* isa_instruction, } catch(std::exception& e) { - ROCP_CI_LOG(INFO) << pc.marker_id << ":" << pc.addr << ' ' << e.what(); + ROCP_CI_LOG(INFO) << pc.code_object_id << ":" << pc.address << ' ' << e.what(); return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_ERROR; } return ROCPROFILER_THREAD_TRACE_DECODER_STATUS_SUCCESS; diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/validate.py b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/validate.py index f969082133..945e4a81e8 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/validate.py +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/validate.py @@ -26,6 +26,8 @@ import sys import pytest import re import os +import glob +import json def test_json_data(json_data): @@ -65,6 +67,44 @@ def test_code_object_memory(code_object_file_path, json_data, output_path): assert found == True +def test_realtime_clock(output_path): + + def verify_sorted(timestamps): + + # Sort by shader_clock (index 0) + timestamps_sorted = sorted(timestamps, key=lambda ts: ts[0]) + # Ensure realtime clock is non descreasing + assert all( + curr[1] >= prev[1] + for prev, curr in zip(timestamps_sorted, timestamps_sorted[1:]) + ) + + def verify_gfxclock(timestamps, rt_frequency): + + delta_shader_clock = timestamps[-1][0] - timestamps[0][0] + delta_realtime_ts = timestamps[-1][1] - timestamps[0][1] + gfxclock = rt_frequency * delta_shader_clock / delta_realtime_ts + + # gfxclock must be positive + assert gfxclock > 0 + # gfxclock must be <10GHz + assert gfxclock < 1e10 + + pattern = os.path.join(output_path, "ui_output_*", "realtime.json") + for rt_file in glob.glob(pattern): + with open(rt_file, "r", encoding="utf-8") as f: + json_file = json.load(f) + + frequency = json_file["metadata"]["frequency"] + # frequency = 0 means aqlprofile is not instrumented + if frequency > 0: + for key, value in json_file.items(): + # Exclude metadata and single-clock timestamps + if "metadata" not in key and len(value) >= 2: + verify_sorted(value) + verify_gfxclock(value, frequency) + + if __name__ == "__main__": exit_code = pytest.main(["-x", __file__] + sys.argv[1:]) sys.exit(exit_code)