General fixes to ATT, packets and event ID retrieval (#960)

* General fixes to ATT, packets and event ID retrieval

* Update source/lib/rocprofiler-sdk/hsa/aql_packet.hpp

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Αυτή η υποβολή περιλαμβάνεται σε:
Giovanni Lenzi Baraldi
2024-07-04 03:58:45 -03:00
υποβλήθηκε από GitHub
γονέας 498b1f2bd7
υποβολή 4e2144dbfa
7 αρχεία άλλαξαν με 50 προσθήκες και 71 διαγραφές
@@ -157,11 +157,19 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache&
if(perf_ctrl != 0 && !params.perfcounters.empty())
{
aql_params.push_back(
{HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL, {perf_ctrl - 1}});
auto perf_param = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME;
for(uint32_t perf_counter : params.perfcounters)
aql_params.push_back({perf_param, {perf_counter}});
for(const auto& perf_counter : params.perfcounters)
{
aqlprofile_att_parameter_t param{};
param.parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME;
param.counter_id = perf_counter.first;
param.simd_mask = perf_counter.second;
aql_params.push_back(param);
}
aqlprofile_att_parameter_t param{};
param.parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL;
param.value = perf_ctrl - 1;
aql_params.push_back(param);
}
}
@@ -195,24 +195,22 @@ getMetricIdMap()
return id_map;
}
const MetricIdMap*
std::unordered_map<uint64_t, int>
getPerfCountersIdMap()
{
// Only GFX9 counters in SQ Block are supported
static MetricIdMap*& att_perf_counters_map =
common::static_object<MetricIdMap>::construct([]() {
MetricIdMap map;
std::string agent_prefix{"gfx9"};
auto is_gfx9 = [&](auto& agent_name) {
return (agent_name.find(agent_prefix) != std::string::npos);
};
for(const auto& [agent_name, metrics] : *CHECK_NOTNULL(getMetricMap()))
if(is_gfx9(agent_name))
for(const auto& metric : metrics)
if(metric.block() == "SQ") map.emplace(metric.id(), metric);
return map;
}());
return att_perf_counters_map;
std::unordered_map<uint64_t, int> map;
for(const auto& [agent, list] : *CHECK_NOTNULL(getMetricMap()))
{
if(agent.find("gfx9") == std::string::npos) continue;
for(const auto& metric : list)
{
if(metric.name().find("SQ_") == 0 && !metric.event().empty())
map.emplace(metric.id(), std::stoi(metric.event()));
}
}
return map;
}
const MetricMap*
@@ -119,10 +119,10 @@ const MetricIdMap*
getMetricIdMap();
/**
* Get the metrics for perfcounters options in thread trace
* Get the metric event ids for perfcounters options in thread trace
* applicable only for GFX9 agents and SQ block counters
*/
const MetricIdMap*
std::unordered_map<uint64_t, int>
getPerfCountersIdMap();
/**
@@ -158,22 +158,6 @@ struct TraceMemoryPool
static hsa_status_t Copy(void* dst, const void* src, size_t size, void* data);
};
class BaseTTAQLPacket : public AQLPacket
{
friend class rocprofiler::aql::ThreadTraceAQLPacketFactory;
public:
BaseTTAQLPacket(std::shared_ptr<TraceMemoryPool>& _tracepool)
: tracepool(_tracepool){};
~BaseTTAQLPacket() override = default;
aqlprofile_handle_t GetHandle() const { return tracepool->handle; }
hsa_agent_t GetAgent() const { return tracepool->gpu_agent; }
protected:
std::shared_ptr<TraceMemoryPool> tracepool;
};
class CodeobjMarkerAQLPacket : public AQLPacket
{
friend class rocprofiler::aql::ThreadTraceAQLPacketFactory;
@@ -209,7 +193,8 @@ public:
const aqlprofile_att_profile_t& profile);
~TraceControlAQLPacket() override = default;
explicit TraceControlAQLPacket(TraceControlAQLPacket& other)
explicit TraceControlAQLPacket(const TraceControlAQLPacket& other)
: AQLPacket()
{
this->tracepool = other.tracepool;
this->packets = other.packets;
@@ -64,7 +64,7 @@ struct thread_trace_parameter_pack
uint64_t buffer_size = DEFAULT_BUFFER_SIZE;
// GFX9 Only
std::vector<uint32_t> perfcounters;
std::vector<std::pair<uint32_t, uint32_t>> perfcounters;
static constexpr size_t DEFAULT_SIMD = 0x7;
static constexpr size_t DEFAULT_PERFCOUNTER_SIMD_MASK = 0xF;
@@ -30,21 +30,6 @@
#include "lib/rocprofiler-sdk/registration.hpp"
#include "rocprofiler-sdk/amd_detail/thread_trace.h"
namespace
{
uint32_t
get_mask(const rocprofiler::counters::Metric* metric, uint64_t simds_selected)
{
uint32_t mask = std::atoi(metric->event().c_str());
if(simds_selected == 0)
simds_selected =
rocprofiler::thread_trace::thread_trace_parameter_pack::DEFAULT_PERFCOUNTER_SIMD_MASK;
mask |= simds_selected
<< rocprofiler::thread_trace::thread_trace_parameter_pack::PERFCOUNTER_SIMD_MASK_SHIFT;
return mask;
}
} // namespace
extern "C" {
rocprofiler_status_t ROCPROFILER_API
rocprofiler_configure_dispatch_thread_trace_service(
@@ -69,7 +54,7 @@ rocprofiler_configure_dispatch_thread_trace_service(
pack.shader_cb_fn = shader_callback;
pack.callback_userdata = callback_userdata;
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap());
auto id_map = rocprofiler::counters::getPerfCountersIdMap();
for(size_t p = 0; p < num_parameters; p++)
{
const rocprofiler_att_parameter_t& param = parameters[p];
@@ -85,10 +70,12 @@ rocprofiler_configure_dispatch_thread_trace_service(
case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break;
case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break;
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER:
if(const auto* metric_ptr =
rocprofiler::common::get_val(id_map, param.counter_id.handle))
pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask));
break;
{
auto event_it = id_map.find(param.counter_id.handle);
if(event_it != id_map.end())
pack.perfcounters.push_back({event_it->second, param.simd_mask});
}
break;
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
pack.perfcounter_ctrl = param.value;
break;
@@ -124,7 +111,7 @@ rocprofiler_configure_agent_thread_trace_service(
pack.shader_cb_fn = shader_callback;
pack.callback_userdata = callback_userdata;
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap());
auto id_map = rocprofiler::counters::getPerfCountersIdMap();
for(size_t p = 0; p < num_parameters; p++)
{
const rocprofiler_att_parameter_t& param = parameters[p];
@@ -140,10 +127,12 @@ rocprofiler_configure_agent_thread_trace_service(
case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break;
case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break;
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER:
if(const auto* metric_ptr =
rocprofiler::common::get_val(id_map, param.counter_id.handle))
pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask));
break;
{
auto event_it = id_map.find(param.counter_id.handle);
if(event_it != id_map.end())
pack.perfcounters.push_back({event_it->second, param.simd_mask});
}
break;
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
pack.perfcounter_ctrl = param.value;
break;
@@ -179,7 +179,7 @@ TEST(thread_trace, perfcounters_configure_test)
// Only GFX9 SQ Block counters are supported
std::vector<std::pair<std::string, uint64_t>> perf_counters = {
{"SQ_WAVES", 0x1}, {"SQ_WAVES", 0x2}, {"SQ_WAVES", 0x2}, {"GRBM_COUNT", 0x3}};
std::set<uint32_t> expected;
std::set<std::pair<uint32_t, uint32_t>> expected;
std::vector<rocprofiler_att_parameter_t> params;
params.push_back({ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, {1}});
auto metrics = rocprofiler::counters::getMetricsForAgent("gfx90a");
@@ -193,7 +193,7 @@ TEST(thread_trace, perfcounters_configure_test)
att_param.counter_id = rocprofiler_counter_id_t{.handle = metric.id()};
att_param.simd_mask = simd_mask;
params.push_back(att_param);
expected.insert(std::atoi(metric.event().c_str()) | (simd_mask << 28));
expected.insert({std::atoi(metric.event().c_str()), simd_mask});
}
rocprofiler_configure_dispatch_thread_trace_service(
@@ -216,7 +216,7 @@ TEST(thread_trace, perfcounters_configure_test)
ASSERT_NE(tracer, nullptr);
ASSERT_EQ(tracer->params.perfcounter_ctrl, 1);
ASSERT_EQ(tracer->params.perfcounters.size(), 3);
for(uint32_t param : tracer->params.perfcounters)
for(const auto& param : tracer->params.perfcounters)
EXPECT_TRUE(expected.find(param) != expected.end())
<< "valid AQLprofile mask not generated for perfcounters";
context::pop_client(1);
@@ -242,8 +242,7 @@ TEST(thread_trace, perfcounters_aql_options_test)
for(auto& [counter_name, simd_mask] : perf_counters)
for(auto& metric : metrics)
if(metric.name() == counter_name)
_params.perfcounters.push_back(std::atoi(metric.event().c_str()) |
(simd_mask << 28));
_params.perfcounters.push_back({std::atoi(metric.event().c_str()), simd_mask});
_params.perfcounter_ctrl = 2;
auto new_tracer = std::make_unique<thread_trace::ThreadTracerQueue>(
_params, begin(agents)->second, get_api_table(), get_ext_table());