From 9a3bbb0037dc0dfc07c293029783fbb5c45ad2a2 Mon Sep 17 00:00:00 2001 From: "Baraldi, Giovanni" Date: Tue, 10 Jun 2025 20:19:00 +0200 Subject: [PATCH] Enable PC sampling to be run alongside ATT. Add ATT to changelog. (#445) * Enable PC sampling to be run alongside ATT. Add ATT to changelog. * Fix tests * Review comments --------- Co-authored-by: Giovanni Baraldi [ROCm/rocprofiler-sdk commit: 2fa95e6d6d07efeb8c8b22241dd0c735aebcddc4] --- projects/rocprofiler-sdk/CHANGELOG.md | 7 +++++++ .../rocprofiler-sdk/source/bin/rocprofv3.py | 8 -------- .../source/lib/att-tool/waitcnt/gfx10.cpp | 4 +++- .../source/lib/att-tool/waitcnt/gfx12.cpp | 4 +++- .../source/lib/att-tool/waitcnt/gfx9.cpp | 4 +++- .../waitcnt/tests/att_decoder_waitcnt_test.cpp | 17 ++++++++++------- .../source/lib/att-tool/wave.cpp | 2 +- 7 files changed, 27 insertions(+), 19 deletions(-) diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index 971ec7797a..87c7281752 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -184,6 +184,12 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - `rocprofiler-sdk-rocpd` DEB and RPM packages - Support `--version` option for `rocprofv3` - Added `rocpd` Python package +- Added thread trace as experimental API +- Added ROCprof Trace Decoder as experimental API + - Requires [ROCprof Trace Decoder plugin](https://github.com/rocm/rocprof-trace-decoder) +- Added thread trace option to the rocprofv3 tool under the --att parameters + - See [using thread trace with rocprofv3](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/amd-mainline/how-to/using-thread-trace.html) + - Requires the ROCprof Trace Decoder plugin installed (see above) ### Changed @@ -193,6 +199,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - default output format for rocprofv3 is now `rocpd` (SQLite3 database) - rocprofv3 avail tool renamed from rocprofv3_avail to rocprofv3-avail tool - rocprofv3 avail tool has support for command line arguments. +- rocprofv3 tool now allows for Thread Trace + PC Sampling on the same agent ### Resolved issues diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3.py b/projects/rocprofiler-sdk/source/bin/rocprofv3.py index 01a7d303eb..447b6d4fc9 100755 --- a/projects/rocprofiler-sdk/source/bin/rocprofv3.py +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3.py @@ -1448,14 +1448,6 @@ def run(app_args, args, **kwargs): f"{type(num_str)} is not supported. {num_str} should be of type integer or string." ) - if ( - args.pc_sampling_beta_enabled - or args.pc_sampling_unit - or args.pc_sampling_method - or args.pc_sampling_interval - ): - fatal_error("Advanced thread trace cannot be enabled with pc sampling") - update_env("ROCPROF_ADVANCED_THREAD_TRACE", True, overwrite=True) if args.att_target_cu is not None: diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp index 9fa7c81b08..a15ecf5aab 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx10.cpp @@ -127,7 +127,9 @@ WaitcntList::gfx10_construct(const wave_t& wave, isa_map_t& isa_map) for(size_t i = 0; i < wave.instructions_size; i++) { auto& event = wave.instructions_array[i]; - auto it = isa_map.find(event.pc); + if(event.pc.marker_id == 0 && event.pc.addr == 0) continue; + + auto it = isa_map.find(event.pc); if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty()) { static thread_local std::once_flag failed_flag{}; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp index c8f5597c7d..cf2081e54f 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp @@ -162,7 +162,9 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map) for(size_t i = 0; i < wave.instructions_size; i++) { auto& event = wave.instructions_array[i]; - auto it = isa_map.find(event.pc); + if(event.pc.marker_id == 0 && event.pc.addr == 0) continue; + + auto it = isa_map.find(event.pc); if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty()) { static thread_local std::once_flag failed_flag{}; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp index 2eba6cbf69..2157586631 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx9.cpp @@ -90,7 +90,9 @@ WaitcntList::gfx9_construct(const wave_t& wave, isa_map_t& isa_map) for(size_t i = 0; i < wave.instructions_size; i++) { auto& event = wave.instructions_array[i]; - auto it = isa_map.find(event.pc); + if(event.pc.marker_id == 0 && event.pc.addr == 0) continue; + + auto it = isa_map.find(event.pc); if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty()) { static thread_local std::once_flag failed_flag{}; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp index c56125d535..a673e73152 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp @@ -31,6 +31,9 @@ namespace rocprofiler { namespace att_wrapper { +// This is used so the first line number dont get skipped because their vaddr==0 +constexpr uint64_t LINE_OFFSET = 1; + TEST(att_decoder_waitcnt_test, gfx9) { registration::init_logging(); @@ -41,7 +44,7 @@ TEST(att_decoder_waitcnt_test, gfx9) auto append_isa = [&](size_t line_number, const char* line) { pcinfo_t pc{}; - pc.addr = line_number; + pc.addr = line_number + LINE_OFFSET; pc.marker_id = 0; auto code = std::make_unique(); @@ -83,7 +86,7 @@ TEST(att_decoder_waitcnt_test, gfx9) for(size_t i = 0; i < isa_map.size(); i++) { wave_instruction_t inst{}; - inst.pc.addr = i; + inst.pc.addr = i + LINE_OFFSET; insts.push_back(inst); } } @@ -126,7 +129,7 @@ TEST(att_decoder_waitcnt_test, gfx10) auto append_isa = [&](size_t line_number, const char* line) { pcinfo_t pc{}; - pc.addr = line_number; + pc.addr = line_number + LINE_OFFSET; pc.marker_id = 0; auto code = std::make_unique(); @@ -173,7 +176,7 @@ TEST(att_decoder_waitcnt_test, gfx10) for(size_t i = 0; i < isa_map.size(); i++) { wave_instruction_t inst{}; - inst.pc.addr = i; + inst.pc.addr = i + LINE_OFFSET; insts.push_back(inst); } @@ -219,7 +222,7 @@ TEST(att_decoder_waitcnt_test, gfx12) auto append_isa = [&](size_t line_number, const char* line) { pcinfo_t pc{}; - pc.addr = line_number; + pc.addr = line_number + LINE_OFFSET; pc.marker_id = 0; auto code = std::make_unique(); @@ -293,7 +296,7 @@ TEST(att_decoder_waitcnt_test, gfx12) for(size_t i = 0; i < isa_map.size(); i++) { wave_instruction_t inst{}; - inst.pc.addr = i; + inst.pc.addr = i + LINE_OFFSET; insts.push_back(inst); } @@ -344,7 +347,7 @@ TEST(att_decoder_waitcnt_test, fail_conditions) for(size_t i = 0; i < 10; i++) { wave_instruction_t inst{}; - inst.pc.addr = i; + inst.pc.addr = i + LINE_OFFSET; insts.push_back(inst); } diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp index dd7bc05544..0eed156f12 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp @@ -35,7 +35,7 @@ namespace att_wrapper WaveFile::WaveFile(WaveConfig& config, const wave_t& wave) { ROCP_WARNING_IF(wave.contexts != 0u) - << "Wave had " << wave.contexts << " context save-restores"; + << "Wave had " << static_cast(wave.contexts) << " context save-restores"; if(!GlobalDefs::get().has_format("json")) return; if(wave.instructions_size == 0 && wave.timeline_size < 3) return;