Fix navi3 kernel tracing (#1133)
* Fix navi3 kernel tracing
- conditional aql::set_profiler_active_on_queue only when counter collection is registered
* Update changelog
* Update following name change
[ROCm/rocprofiler-sdk commit: f7c87e455d]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
36d357337d
Коммит
957b34e01a
@@ -122,7 +122,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
|
||||
### Resolved issues
|
||||
|
||||
- Introduced subdirection when `rocprofv3 --output-file` used to specify a folder path
|
||||
- Create subdirectory when `rocprofv3 --output-file` includes a folder path
|
||||
- Fixed misaligned stores (undefined behavior) for buffer records
|
||||
- Fixed crash when only scratch reporting is enabled
|
||||
- Fixed `MeanOccupancy` metrics
|
||||
@@ -131,6 +131,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
|
||||
- Fixed support for derived counters in reduce operation
|
||||
- Bug fixed in max-in-reduce operation
|
||||
- Introduced fix to handle a range of values for `select()` dimension in expressions parser
|
||||
- Conditional `aql::set_profiler_active_on_queue` only when counter collection is registered (resolves Navi3 kernel tracing issues)
|
||||
|
||||
### Removed
|
||||
|
||||
|
||||
@@ -513,27 +513,33 @@ Queue::Queue(const AgentCache& agent,
|
||||
_ext_api.hsa_amd_profiling_set_profiler_enabled_fn(_intercept_queue, true))
|
||||
<< "Could not setup intercept profiler";
|
||||
|
||||
CHECK(_agent.cpu_pool().handle != 0);
|
||||
CHECK(_agent.get_hsa_agent().handle != 0);
|
||||
// Set state of the queue to allow profiling
|
||||
aql::set_profiler_active_on_queue(
|
||||
_agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) {
|
||||
hsa_signal_t completion;
|
||||
create_signal(0, &completion);
|
||||
pkt.ext_amd_aql_pm4.completion_signal = completion;
|
||||
counters::submitPacket(_intercept_queue, &pkt);
|
||||
constexpr auto timeout_hint =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::seconds{1});
|
||||
if(core_api.hsa_signal_wait_relaxed_fn(completion,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
0,
|
||||
timeout_hint.count(),
|
||||
HSA_WAIT_STATE_ACTIVE) != 0)
|
||||
{
|
||||
ROCP_FATAL << "Could not set agent to be profiled";
|
||||
}
|
||||
core_api.hsa_signal_destroy_fn(completion);
|
||||
});
|
||||
if(!context::get_registered_contexts([](const context::context* ctx) {
|
||||
return (ctx->counter_collection || ctx->device_counter_collection);
|
||||
}).empty())
|
||||
{
|
||||
CHECK(_agent.cpu_pool().handle != 0);
|
||||
CHECK(_agent.get_hsa_agent().handle != 0);
|
||||
|
||||
// Set state of the queue to allow profiling
|
||||
aql::set_profiler_active_on_queue(
|
||||
_agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) {
|
||||
hsa_signal_t completion;
|
||||
create_signal(0, &completion);
|
||||
pkt.ext_amd_aql_pm4.completion_signal = completion;
|
||||
counters::submitPacket(_intercept_queue, &pkt);
|
||||
constexpr auto timeout_hint =
|
||||
std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::seconds{1});
|
||||
if(core_api.hsa_signal_wait_relaxed_fn(completion,
|
||||
HSA_SIGNAL_CONDITION_EQ,
|
||||
0,
|
||||
timeout_hint.count(),
|
||||
HSA_WAIT_STATE_ACTIVE) != 0)
|
||||
{
|
||||
ROCP_FATAL << "Could not set agent to be profiled";
|
||||
}
|
||||
core_api.hsa_signal_destroy_fn(completion);
|
||||
});
|
||||
}
|
||||
|
||||
ROCP_HSA_TABLE_CALL(
|
||||
FATAL,
|
||||
|
||||
Ссылка в новой задаче
Block a user