diff --git a/projects/rocprofiler-sdk/CHANGELOG.md b/projects/rocprofiler-sdk/CHANGELOG.md index e25e51e0fe..adb12feae2 100644 --- a/projects/rocprofiler-sdk/CHANGELOG.md +++ b/projects/rocprofiler-sdk/CHANGELOG.md @@ -122,7 +122,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec ### Resolved issues -- Introduced subdirection when `rocprofv3 --output-file` used to specify a folder path +- Create subdirectory when `rocprofv3 --output-file` includes a folder path - Fixed misaligned stores (undefined behavior) for buffer records - Fixed crash when only scratch reporting is enabled - Fixed `MeanOccupancy` metrics @@ -131,6 +131,7 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec - Fixed support for derived counters in reduce operation - Bug fixed in max-in-reduce operation - Introduced fix to handle a range of values for `select()` dimension in expressions parser +- Conditional `aql::set_profiler_active_on_queue` only when counter collection is registered (resolves Navi3 kernel tracing issues) ### Removed diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp index 7ae3c18a13..027ba407b0 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/hsa/queue.cpp @@ -513,27 +513,33 @@ Queue::Queue(const AgentCache& agent, _ext_api.hsa_amd_profiling_set_profiler_enabled_fn(_intercept_queue, true)) << "Could not setup intercept profiler"; - CHECK(_agent.cpu_pool().handle != 0); - CHECK(_agent.get_hsa_agent().handle != 0); - // Set state of the queue to allow profiling - aql::set_profiler_active_on_queue( - _agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) { - hsa_signal_t completion; - create_signal(0, &completion); - pkt.ext_amd_aql_pm4.completion_signal = completion; - counters::submitPacket(_intercept_queue, &pkt); - constexpr auto timeout_hint = - std::chrono::duration_cast(std::chrono::seconds{1}); - if(core_api.hsa_signal_wait_relaxed_fn(completion, - HSA_SIGNAL_CONDITION_EQ, - 0, - timeout_hint.count(), - HSA_WAIT_STATE_ACTIVE) != 0) - { - ROCP_FATAL << "Could not set agent to be profiled"; - } - core_api.hsa_signal_destroy_fn(completion); - }); + if(!context::get_registered_contexts([](const context::context* ctx) { + return (ctx->counter_collection || ctx->device_counter_collection); + }).empty()) + { + CHECK(_agent.cpu_pool().handle != 0); + CHECK(_agent.get_hsa_agent().handle != 0); + + // Set state of the queue to allow profiling + aql::set_profiler_active_on_queue( + _agent.cpu_pool(), _agent.get_hsa_agent(), [&](hsa::rocprofiler_packet pkt) { + hsa_signal_t completion; + create_signal(0, &completion); + pkt.ext_amd_aql_pm4.completion_signal = completion; + counters::submitPacket(_intercept_queue, &pkt); + constexpr auto timeout_hint = + std::chrono::duration_cast(std::chrono::seconds{1}); + if(core_api.hsa_signal_wait_relaxed_fn(completion, + HSA_SIGNAL_CONDITION_EQ, + 0, + timeout_hint.count(), + HSA_WAIT_STATE_ACTIVE) != 0) + { + ROCP_FATAL << "Could not set agent to be profiled"; + } + core_api.hsa_signal_destroy_fn(completion); + }); + } ROCP_HSA_TABLE_CALL( FATAL,