Enable PC sampling to be run alongside ATT. Add ATT to changelog. (#445)

* Enable PC sampling to be run alongside ATT. Add ATT to changelog.

* Fix tests

* Review comments

---------

Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
This commit is contained in:
Baraldi, Giovanni
2025-06-10 20:19:00 +02:00
کامیت شده توسط GitHub
والد 80e058bf08
کامیت 2fa95e6d6d
7فایلهای تغییر یافته به همراه27 افزوده شده و 19 حذف شده
+7
مشاهده پرونده
@@ -184,6 +184,12 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- `rocprofiler-sdk-rocpd` DEB and RPM packages
- Support `--version` option for `rocprofv3`
- Added `rocpd` Python package
- Added thread trace as experimental API
- Added ROCprof Trace Decoder as experimental API
- Requires [ROCprof Trace Decoder plugin](https://github.com/rocm/rocprof-trace-decoder)
- Added thread trace option to the rocprofv3 tool under the --att parameters
- See [using thread trace with rocprofv3](https://rocm.docs.amd.com/projects/rocprofiler-sdk/en/amd-mainline/how-to/using-thread-trace.html)
- Requires the ROCprof Trace Decoder plugin installed (see above)
### Changed
@@ -193,6 +199,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- default output format for rocprofv3 is now `rocpd` (SQLite3 database)
- rocprofv3 avail tool renamed from rocprofv3_avail to rocprofv3-avail tool
- rocprofv3 avail tool has support for command line arguments.
- rocprofv3 tool now allows for Thread Trace + PC Sampling on the same agent
### Resolved issues
-8
مشاهده پرونده
@@ -1448,14 +1448,6 @@ def run(app_args, args, **kwargs):
f"{type(num_str)} is not supported. {num_str} should be of type integer or string."
)
if (
args.pc_sampling_beta_enabled
or args.pc_sampling_unit
or args.pc_sampling_method
or args.pc_sampling_interval
):
fatal_error("Advanced thread trace cannot be enabled with pc sampling")
update_env("ROCPROF_ADVANCED_THREAD_TRACE", True, overwrite=True)
if args.att_target_cu is not None:
@@ -127,7 +127,9 @@ WaitcntList::gfx10_construct(const wave_t& wave, isa_map_t& isa_map)
for(size_t i = 0; i < wave.instructions_size; i++)
{
auto& event = wave.instructions_array[i];
auto it = isa_map.find(event.pc);
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
auto it = isa_map.find(event.pc);
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
{
static thread_local std::once_flag failed_flag{};
@@ -162,7 +162,9 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
for(size_t i = 0; i < wave.instructions_size; i++)
{
auto& event = wave.instructions_array[i];
auto it = isa_map.find(event.pc);
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
auto it = isa_map.find(event.pc);
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
{
static thread_local std::once_flag failed_flag{};
@@ -90,7 +90,9 @@ WaitcntList::gfx9_construct(const wave_t& wave, isa_map_t& isa_map)
for(size_t i = 0; i < wave.instructions_size; i++)
{
auto& event = wave.instructions_array[i];
auto it = isa_map.find(event.pc);
if(event.pc.marker_id == 0 && event.pc.addr == 0) continue;
auto it = isa_map.find(event.pc);
if(it == isa_map.end() || !it->second->code_line || it->second->code_line->inst.empty())
{
static thread_local std::once_flag failed_flag{};
@@ -31,6 +31,9 @@ namespace rocprofiler
{
namespace att_wrapper
{
// This is used so the first line number dont get skipped because their vaddr==0
constexpr uint64_t LINE_OFFSET = 1;
TEST(att_decoder_waitcnt_test, gfx9)
{
registration::init_logging();
@@ -41,7 +44,7 @@ TEST(att_decoder_waitcnt_test, gfx9)
auto append_isa = [&](size_t line_number, const char* line) {
pcinfo_t pc{};
pc.addr = line_number;
pc.addr = line_number + LINE_OFFSET;
pc.marker_id = 0;
auto code = std::make_unique<CodeLine>();
@@ -83,7 +86,7 @@ TEST(att_decoder_waitcnt_test, gfx9)
for(size_t i = 0; i < isa_map.size(); i++)
{
wave_instruction_t inst{};
inst.pc.addr = i;
inst.pc.addr = i + LINE_OFFSET;
insts.push_back(inst);
}
}
@@ -126,7 +129,7 @@ TEST(att_decoder_waitcnt_test, gfx10)
auto append_isa = [&](size_t line_number, const char* line) {
pcinfo_t pc{};
pc.addr = line_number;
pc.addr = line_number + LINE_OFFSET;
pc.marker_id = 0;
auto code = std::make_unique<CodeLine>();
@@ -173,7 +176,7 @@ TEST(att_decoder_waitcnt_test, gfx10)
for(size_t i = 0; i < isa_map.size(); i++)
{
wave_instruction_t inst{};
inst.pc.addr = i;
inst.pc.addr = i + LINE_OFFSET;
insts.push_back(inst);
}
@@ -219,7 +222,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
auto append_isa = [&](size_t line_number, const char* line) {
pcinfo_t pc{};
pc.addr = line_number;
pc.addr = line_number + LINE_OFFSET;
pc.marker_id = 0;
auto code = std::make_unique<CodeLine>();
@@ -293,7 +296,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
for(size_t i = 0; i < isa_map.size(); i++)
{
wave_instruction_t inst{};
inst.pc.addr = i;
inst.pc.addr = i + LINE_OFFSET;
insts.push_back(inst);
}
@@ -344,7 +347,7 @@ TEST(att_decoder_waitcnt_test, fail_conditions)
for(size_t i = 0; i < 10; i++)
{
wave_instruction_t inst{};
inst.pc.addr = i;
inst.pc.addr = i + LINE_OFFSET;
insts.push_back(inst);
}
+1 -1
مشاهده پرونده
@@ -35,7 +35,7 @@ namespace att_wrapper
WaveFile::WaveFile(WaveConfig& config, const wave_t& wave)
{
ROCP_WARNING_IF(wave.contexts != 0u)
<< "Wave had " << wave.contexts << " context save-restores";
<< "Wave had " << static_cast<int>(wave.contexts) << " context save-restores";
if(!GlobalDefs::get().has_format("json")) return;
if(wave.instructions_size == 0 && wave.timeline_size < 3) return;