From 4e2144dbfa2ccdc772028938d611eb2ba8df4cf5 Mon Sep 17 00:00:00 2001 From: Giovanni Lenzi Baraldi Date: Thu, 4 Jul 2024 03:58:45 -0300 Subject: [PATCH] General fixes to ATT, packets and event ID retrieval (#960) * General fixes to ATT, packets and event ID retrieval * Update source/lib/rocprofiler-sdk/hsa/aql_packet.hpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .../rocprofiler-sdk/aql/packet_construct.cpp | 18 ++++++--- .../lib/rocprofiler-sdk/counters/metrics.cpp | 30 +++++++------- .../lib/rocprofiler-sdk/counters/metrics.hpp | 4 +- source/lib/rocprofiler-sdk/hsa/aql_packet.hpp | 19 +-------- .../rocprofiler-sdk/thread_trace/att_core.hpp | 2 +- .../thread_trace/att_service.cpp | 39 +++++++------------ .../thread_trace/tests/att_packet_test.cpp | 9 ++--- 7 files changed, 50 insertions(+), 71 deletions(-) diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp index da2ef57d67..d67bd56a12 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp @@ -157,11 +157,19 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache& if(perf_ctrl != 0 && !params.perfcounters.empty()) { - aql_params.push_back( - {HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL, {perf_ctrl - 1}}); - auto perf_param = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME; - for(uint32_t perf_counter : params.perfcounters) - aql_params.push_back({perf_param, {perf_counter}}); + for(const auto& perf_counter : params.perfcounters) + { + aqlprofile_att_parameter_t param{}; + param.parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME; + param.counter_id = perf_counter.first; + param.simd_mask = perf_counter.second; + aql_params.push_back(param); + } + + aqlprofile_att_parameter_t param{}; + param.parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL; + param.value = perf_ctrl - 1; + aql_params.push_back(param); } } diff --git a/source/lib/rocprofiler-sdk/counters/metrics.cpp b/source/lib/rocprofiler-sdk/counters/metrics.cpp index f44ede858d..e9b60dfede 100644 --- a/source/lib/rocprofiler-sdk/counters/metrics.cpp +++ b/source/lib/rocprofiler-sdk/counters/metrics.cpp @@ -195,24 +195,22 @@ getMetricIdMap() return id_map; } -const MetricIdMap* +std::unordered_map getPerfCountersIdMap() { - // Only GFX9 counters in SQ Block are supported - static MetricIdMap*& att_perf_counters_map = - common::static_object::construct([]() { - MetricIdMap map; - std::string agent_prefix{"gfx9"}; - auto is_gfx9 = [&](auto& agent_name) { - return (agent_name.find(agent_prefix) != std::string::npos); - }; - for(const auto& [agent_name, metrics] : *CHECK_NOTNULL(getMetricMap())) - if(is_gfx9(agent_name)) - for(const auto& metric : metrics) - if(metric.block() == "SQ") map.emplace(metric.id(), metric); - return map; - }()); - return att_perf_counters_map; + std::unordered_map map; + + for(const auto& [agent, list] : *CHECK_NOTNULL(getMetricMap())) + { + if(agent.find("gfx9") == std::string::npos) continue; + for(const auto& metric : list) + { + if(metric.name().find("SQ_") == 0 && !metric.event().empty()) + map.emplace(metric.id(), std::stoi(metric.event())); + } + } + + return map; } const MetricMap* diff --git a/source/lib/rocprofiler-sdk/counters/metrics.hpp b/source/lib/rocprofiler-sdk/counters/metrics.hpp index df58e16615..cb9612a15f 100644 --- a/source/lib/rocprofiler-sdk/counters/metrics.hpp +++ b/source/lib/rocprofiler-sdk/counters/metrics.hpp @@ -119,10 +119,10 @@ const MetricIdMap* getMetricIdMap(); /** - * Get the metrics for perfcounters options in thread trace + * Get the metric event ids for perfcounters options in thread trace * applicable only for GFX9 agents and SQ block counters */ -const MetricIdMap* +std::unordered_map getPerfCountersIdMap(); /** diff --git a/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp b/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp index 97dfe8a809..e68f8555ac 100644 --- a/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp +++ b/source/lib/rocprofiler-sdk/hsa/aql_packet.hpp @@ -158,22 +158,6 @@ struct TraceMemoryPool static hsa_status_t Copy(void* dst, const void* src, size_t size, void* data); }; -class BaseTTAQLPacket : public AQLPacket -{ - friend class rocprofiler::aql::ThreadTraceAQLPacketFactory; - -public: - BaseTTAQLPacket(std::shared_ptr& _tracepool) - : tracepool(_tracepool){}; - ~BaseTTAQLPacket() override = default; - - aqlprofile_handle_t GetHandle() const { return tracepool->handle; } - hsa_agent_t GetAgent() const { return tracepool->gpu_agent; } - -protected: - std::shared_ptr tracepool; -}; - class CodeobjMarkerAQLPacket : public AQLPacket { friend class rocprofiler::aql::ThreadTraceAQLPacketFactory; @@ -209,7 +193,8 @@ public: const aqlprofile_att_profile_t& profile); ~TraceControlAQLPacket() override = default; - explicit TraceControlAQLPacket(TraceControlAQLPacket& other) + explicit TraceControlAQLPacket(const TraceControlAQLPacket& other) + : AQLPacket() { this->tracepool = other.tracepool; this->packets = other.packets; diff --git a/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp b/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp index 76e6a86a37..dcaab786fc 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp +++ b/source/lib/rocprofiler-sdk/thread_trace/att_core.hpp @@ -64,7 +64,7 @@ struct thread_trace_parameter_pack uint64_t buffer_size = DEFAULT_BUFFER_SIZE; // GFX9 Only - std::vector perfcounters; + std::vector> perfcounters; static constexpr size_t DEFAULT_SIMD = 0x7; static constexpr size_t DEFAULT_PERFCOUNTER_SIMD_MASK = 0xF; diff --git a/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp b/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp index 36efa1e3dc..277029cec1 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/att_service.cpp @@ -30,21 +30,6 @@ #include "lib/rocprofiler-sdk/registration.hpp" #include "rocprofiler-sdk/amd_detail/thread_trace.h" -namespace -{ -uint32_t -get_mask(const rocprofiler::counters::Metric* metric, uint64_t simds_selected) -{ - uint32_t mask = std::atoi(metric->event().c_str()); - if(simds_selected == 0) - simds_selected = - rocprofiler::thread_trace::thread_trace_parameter_pack::DEFAULT_PERFCOUNTER_SIMD_MASK; - mask |= simds_selected - << rocprofiler::thread_trace::thread_trace_parameter_pack::PERFCOUNTER_SIMD_MASK_SHIFT; - return mask; -} -} // namespace - extern "C" { rocprofiler_status_t ROCPROFILER_API rocprofiler_configure_dispatch_thread_trace_service( @@ -69,7 +54,7 @@ rocprofiler_configure_dispatch_thread_trace_service( pack.shader_cb_fn = shader_callback; pack.callback_userdata = callback_userdata; - const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap()); + auto id_map = rocprofiler::counters::getPerfCountersIdMap(); for(size_t p = 0; p < num_parameters; p++) { const rocprofiler_att_parameter_t& param = parameters[p]; @@ -85,10 +70,12 @@ rocprofiler_configure_dispatch_thread_trace_service( case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break; case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break; case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER: - if(const auto* metric_ptr = - rocprofiler::common::get_val(id_map, param.counter_id.handle)) - pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask)); - break; + { + auto event_it = id_map.find(param.counter_id.handle); + if(event_it != id_map.end()) + pack.perfcounters.push_back({event_it->second, param.simd_mask}); + } + break; case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL: pack.perfcounter_ctrl = param.value; break; @@ -124,7 +111,7 @@ rocprofiler_configure_agent_thread_trace_service( pack.shader_cb_fn = shader_callback; pack.callback_userdata = callback_userdata; - const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap()); + auto id_map = rocprofiler::counters::getPerfCountersIdMap(); for(size_t p = 0; p < num_parameters; p++) { const rocprofiler_att_parameter_t& param = parameters[p]; @@ -140,10 +127,12 @@ rocprofiler_configure_agent_thread_trace_service( case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break; case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break; case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER: - if(const auto* metric_ptr = - rocprofiler::common::get_val(id_map, param.counter_id.handle)) - pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask)); - break; + { + auto event_it = id_map.find(param.counter_id.handle); + if(event_it != id_map.end()) + pack.perfcounters.push_back({event_it->second, param.simd_mask}); + } + break; case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL: pack.perfcounter_ctrl = param.value; break; diff --git a/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp b/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp index 3f760b47ff..129a71cd6a 100644 --- a/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp +++ b/source/lib/rocprofiler-sdk/thread_trace/tests/att_packet_test.cpp @@ -179,7 +179,7 @@ TEST(thread_trace, perfcounters_configure_test) // Only GFX9 SQ Block counters are supported std::vector> perf_counters = { {"SQ_WAVES", 0x1}, {"SQ_WAVES", 0x2}, {"SQ_WAVES", 0x2}, {"GRBM_COUNT", 0x3}}; - std::set expected; + std::set> expected; std::vector params; params.push_back({ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, {1}}); auto metrics = rocprofiler::counters::getMetricsForAgent("gfx90a"); @@ -193,7 +193,7 @@ TEST(thread_trace, perfcounters_configure_test) att_param.counter_id = rocprofiler_counter_id_t{.handle = metric.id()}; att_param.simd_mask = simd_mask; params.push_back(att_param); - expected.insert(std::atoi(metric.event().c_str()) | (simd_mask << 28)); + expected.insert({std::atoi(metric.event().c_str()), simd_mask}); } rocprofiler_configure_dispatch_thread_trace_service( @@ -216,7 +216,7 @@ TEST(thread_trace, perfcounters_configure_test) ASSERT_NE(tracer, nullptr); ASSERT_EQ(tracer->params.perfcounter_ctrl, 1); ASSERT_EQ(tracer->params.perfcounters.size(), 3); - for(uint32_t param : tracer->params.perfcounters) + for(const auto& param : tracer->params.perfcounters) EXPECT_TRUE(expected.find(param) != expected.end()) << "valid AQLprofile mask not generated for perfcounters"; context::pop_client(1); @@ -242,8 +242,7 @@ TEST(thread_trace, perfcounters_aql_options_test) for(auto& [counter_name, simd_mask] : perf_counters) for(auto& metric : metrics) if(metric.name() == counter_name) - _params.perfcounters.push_back(std::atoi(metric.event().c_str()) | - (simd_mask << 28)); + _params.perfcounters.push_back({std::atoi(metric.event().c_str()), simd_mask}); _params.perfcounter_ctrl = 2; auto new_tracer = std::make_unique( _params, begin(agents)->second, get_api_table(), get_ext_table());