* Fix navi3 kernel tracing

- conditional aql::set_profiler_active_on_queue only when counter collection is registered

* Update changelog

* Update following name change

[ROCm/rocprofiler-sdk commit: f7c87e455d]
Этот коммит содержится в:
Jonathan R. Madsen
2024-11-11 11:14:59 -06:00
коммит произвёл GitHub
родитель 36d357337d
Коммит 957b34e01a
2 изменённых файлов: 29 добавлений и 22 удалений
+2 -1
Просмотреть файл
@@ -122,7 +122,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
### Resolved issues
- Introduced subdirection when `rocprofv3 --output-file` used to specify a folder path
- Create subdirectory when `rocprofv3 --output-file` includes a folder path
- Fixed misaligned stores (undefined behavior) for buffer records
- Fixed crash when only scratch reporting is enabled
- Fixed `MeanOccupancy` metrics
@@ -131,6 +131,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- Fixed support for derived counters in reduce operation
- Bug fixed in max-in-reduce operation
- Introduced fix to handle a range of values for `select()` dimension in expressions parser
- Conditional `aql::set_profiler_active_on_queue` only when counter collection is registered (resolves Navi3 kernel tracing issues)
### Removed
+27 -21
Просмотреть файл
@@ -513,27 +513,33 @@ Queue::Queue(const AgentCache& agent,
_ext_api.hsa_amd_profiling_set_profiler_enabled_fn(_intercept_queue, true))
<< "Could not setup intercept profiler";
CHECK(_agent.cpu_pool().handle != 0);
CHECK(_agent.get_hsa_agent().handle != 0);
// Set state of the queue to allow profiling
aql::set_profiler_active_on_queue(
_agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) {
hsa_signal_t completion;
create_signal(0, &completion);
pkt.ext_amd_aql_pm4.completion_signal = completion;
counters::submitPacket(_intercept_queue, &pkt);
constexpr auto timeout_hint =
std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::seconds{1});
if(core_api.hsa_signal_wait_relaxed_fn(completion,
HSA_SIGNAL_CONDITION_EQ,
0,
timeout_hint.count(),
HSA_WAIT_STATE_ACTIVE) != 0)
{
ROCP_FATAL << "Could not set agent to be profiled";
}
core_api.hsa_signal_destroy_fn(completion);
});
if(!context::get_registered_contexts([](const context::context* ctx) {
return (ctx->counter_collection || ctx->device_counter_collection);
}).empty())
{
CHECK(_agent.cpu_pool().handle != 0);
CHECK(_agent.get_hsa_agent().handle != 0);
// Set state of the queue to allow profiling
aql::set_profiler_active_on_queue(
_agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) {
hsa_signal_t completion;
create_signal(0, &completion);
pkt.ext_amd_aql_pm4.completion_signal = completion;
counters::submitPacket(_intercept_queue, &pkt);
constexpr auto timeout_hint =
std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::seconds{1});
if(core_api.hsa_signal_wait_relaxed_fn(completion,
HSA_SIGNAL_CONDITION_EQ,
0,
timeout_hint.count(),
HSA_WAIT_STATE_ACTIVE) != 0)
{
ROCP_FATAL << "Could not set agent to be profiled";
}
core_api.hsa_signal_destroy_fn(completion);
});
}
ROCP_HSA_TABLE_CALL(
FATAL,