General fixes to ATT, packets and event ID retrieval (#960)
* General fixes to ATT, packets and event ID retrieval * Update source/lib/rocprofiler-sdk/hsa/aql_packet.hpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Αυτή η υποβολή περιλαμβάνεται σε:
υποβλήθηκε από
GitHub
γονέας
498b1f2bd7
υποβολή
4e2144dbfa
@@ -157,11 +157,19 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache&
|
||||
|
||||
if(perf_ctrl != 0 && !params.perfcounters.empty())
|
||||
{
|
||||
aql_params.push_back(
|
||||
{HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL, {perf_ctrl - 1}});
|
||||
auto perf_param = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME;
|
||||
for(uint32_t perf_counter : params.perfcounters)
|
||||
aql_params.push_back({perf_param, {perf_counter}});
|
||||
for(const auto& perf_counter : params.perfcounters)
|
||||
{
|
||||
aqlprofile_att_parameter_t param{};
|
||||
param.parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME;
|
||||
param.counter_id = perf_counter.first;
|
||||
param.simd_mask = perf_counter.second;
|
||||
aql_params.push_back(param);
|
||||
}
|
||||
|
||||
aqlprofile_att_parameter_t param{};
|
||||
param.parameter_name = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL;
|
||||
param.value = perf_ctrl - 1;
|
||||
aql_params.push_back(param);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -195,24 +195,22 @@ getMetricIdMap()
|
||||
return id_map;
|
||||
}
|
||||
|
||||
const MetricIdMap*
|
||||
std::unordered_map<uint64_t, int>
|
||||
getPerfCountersIdMap()
|
||||
{
|
||||
// Only GFX9 counters in SQ Block are supported
|
||||
static MetricIdMap*& att_perf_counters_map =
|
||||
common::static_object<MetricIdMap>::construct([]() {
|
||||
MetricIdMap map;
|
||||
std::string agent_prefix{"gfx9"};
|
||||
auto is_gfx9 = [&](auto& agent_name) {
|
||||
return (agent_name.find(agent_prefix) != std::string::npos);
|
||||
};
|
||||
for(const auto& [agent_name, metrics] : *CHECK_NOTNULL(getMetricMap()))
|
||||
if(is_gfx9(agent_name))
|
||||
for(const auto& metric : metrics)
|
||||
if(metric.block() == "SQ") map.emplace(metric.id(), metric);
|
||||
return map;
|
||||
}());
|
||||
return att_perf_counters_map;
|
||||
std::unordered_map<uint64_t, int> map;
|
||||
|
||||
for(const auto& [agent, list] : *CHECK_NOTNULL(getMetricMap()))
|
||||
{
|
||||
if(agent.find("gfx9") == std::string::npos) continue;
|
||||
for(const auto& metric : list)
|
||||
{
|
||||
if(metric.name().find("SQ_") == 0 && !metric.event().empty())
|
||||
map.emplace(metric.id(), std::stoi(metric.event()));
|
||||
}
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
const MetricMap*
|
||||
|
||||
@@ -119,10 +119,10 @@ const MetricIdMap*
|
||||
getMetricIdMap();
|
||||
|
||||
/**
|
||||
* Get the metrics for perfcounters options in thread trace
|
||||
* Get the metric event ids for perfcounters options in thread trace
|
||||
* applicable only for GFX9 agents and SQ block counters
|
||||
*/
|
||||
const MetricIdMap*
|
||||
std::unordered_map<uint64_t, int>
|
||||
getPerfCountersIdMap();
|
||||
|
||||
/**
|
||||
|
||||
@@ -158,22 +158,6 @@ struct TraceMemoryPool
|
||||
static hsa_status_t Copy(void* dst, const void* src, size_t size, void* data);
|
||||
};
|
||||
|
||||
class BaseTTAQLPacket : public AQLPacket
|
||||
{
|
||||
friend class rocprofiler::aql::ThreadTraceAQLPacketFactory;
|
||||
|
||||
public:
|
||||
BaseTTAQLPacket(std::shared_ptr<TraceMemoryPool>& _tracepool)
|
||||
: tracepool(_tracepool){};
|
||||
~BaseTTAQLPacket() override = default;
|
||||
|
||||
aqlprofile_handle_t GetHandle() const { return tracepool->handle; }
|
||||
hsa_agent_t GetAgent() const { return tracepool->gpu_agent; }
|
||||
|
||||
protected:
|
||||
std::shared_ptr<TraceMemoryPool> tracepool;
|
||||
};
|
||||
|
||||
class CodeobjMarkerAQLPacket : public AQLPacket
|
||||
{
|
||||
friend class rocprofiler::aql::ThreadTraceAQLPacketFactory;
|
||||
@@ -209,7 +193,8 @@ public:
|
||||
const aqlprofile_att_profile_t& profile);
|
||||
~TraceControlAQLPacket() override = default;
|
||||
|
||||
explicit TraceControlAQLPacket(TraceControlAQLPacket& other)
|
||||
explicit TraceControlAQLPacket(const TraceControlAQLPacket& other)
|
||||
: AQLPacket()
|
||||
{
|
||||
this->tracepool = other.tracepool;
|
||||
this->packets = other.packets;
|
||||
|
||||
@@ -64,7 +64,7 @@ struct thread_trace_parameter_pack
|
||||
uint64_t buffer_size = DEFAULT_BUFFER_SIZE;
|
||||
|
||||
// GFX9 Only
|
||||
std::vector<uint32_t> perfcounters;
|
||||
std::vector<std::pair<uint32_t, uint32_t>> perfcounters;
|
||||
|
||||
static constexpr size_t DEFAULT_SIMD = 0x7;
|
||||
static constexpr size_t DEFAULT_PERFCOUNTER_SIMD_MASK = 0xF;
|
||||
|
||||
@@ -30,21 +30,6 @@
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "rocprofiler-sdk/amd_detail/thread_trace.h"
|
||||
|
||||
namespace
|
||||
{
|
||||
uint32_t
|
||||
get_mask(const rocprofiler::counters::Metric* metric, uint64_t simds_selected)
|
||||
{
|
||||
uint32_t mask = std::atoi(metric->event().c_str());
|
||||
if(simds_selected == 0)
|
||||
simds_selected =
|
||||
rocprofiler::thread_trace::thread_trace_parameter_pack::DEFAULT_PERFCOUNTER_SIMD_MASK;
|
||||
mask |= simds_selected
|
||||
<< rocprofiler::thread_trace::thread_trace_parameter_pack::PERFCOUNTER_SIMD_MASK_SHIFT;
|
||||
return mask;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
extern "C" {
|
||||
rocprofiler_status_t ROCPROFILER_API
|
||||
rocprofiler_configure_dispatch_thread_trace_service(
|
||||
@@ -69,7 +54,7 @@ rocprofiler_configure_dispatch_thread_trace_service(
|
||||
pack.shader_cb_fn = shader_callback;
|
||||
pack.callback_userdata = callback_userdata;
|
||||
|
||||
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap());
|
||||
auto id_map = rocprofiler::counters::getPerfCountersIdMap();
|
||||
for(size_t p = 0; p < num_parameters; p++)
|
||||
{
|
||||
const rocprofiler_att_parameter_t& param = parameters[p];
|
||||
@@ -85,10 +70,12 @@ rocprofiler_configure_dispatch_thread_trace_service(
|
||||
case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break;
|
||||
case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break;
|
||||
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER:
|
||||
if(const auto* metric_ptr =
|
||||
rocprofiler::common::get_val(id_map, param.counter_id.handle))
|
||||
pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask));
|
||||
break;
|
||||
{
|
||||
auto event_it = id_map.find(param.counter_id.handle);
|
||||
if(event_it != id_map.end())
|
||||
pack.perfcounters.push_back({event_it->second, param.simd_mask});
|
||||
}
|
||||
break;
|
||||
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
|
||||
pack.perfcounter_ctrl = param.value;
|
||||
break;
|
||||
@@ -124,7 +111,7 @@ rocprofiler_configure_agent_thread_trace_service(
|
||||
pack.shader_cb_fn = shader_callback;
|
||||
pack.callback_userdata = callback_userdata;
|
||||
|
||||
const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap());
|
||||
auto id_map = rocprofiler::counters::getPerfCountersIdMap();
|
||||
for(size_t p = 0; p < num_parameters; p++)
|
||||
{
|
||||
const rocprofiler_att_parameter_t& param = parameters[p];
|
||||
@@ -140,10 +127,12 @@ rocprofiler_configure_agent_thread_trace_service(
|
||||
case ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE: pack.buffer_size = param.value; break;
|
||||
case ROCPROFILER_ATT_PARAMETER_SIMD_SELECT: pack.simd_select = param.value; break;
|
||||
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER:
|
||||
if(const auto* metric_ptr =
|
||||
rocprofiler::common::get_val(id_map, param.counter_id.handle))
|
||||
pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask));
|
||||
break;
|
||||
{
|
||||
auto event_it = id_map.find(param.counter_id.handle);
|
||||
if(event_it != id_map.end())
|
||||
pack.perfcounters.push_back({event_it->second, param.simd_mask});
|
||||
}
|
||||
break;
|
||||
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
|
||||
pack.perfcounter_ctrl = param.value;
|
||||
break;
|
||||
|
||||
@@ -179,7 +179,7 @@ TEST(thread_trace, perfcounters_configure_test)
|
||||
// Only GFX9 SQ Block counters are supported
|
||||
std::vector<std::pair<std::string, uint64_t>> perf_counters = {
|
||||
{"SQ_WAVES", 0x1}, {"SQ_WAVES", 0x2}, {"SQ_WAVES", 0x2}, {"GRBM_COUNT", 0x3}};
|
||||
std::set<uint32_t> expected;
|
||||
std::set<std::pair<uint32_t, uint32_t>> expected;
|
||||
std::vector<rocprofiler_att_parameter_t> params;
|
||||
params.push_back({ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, {1}});
|
||||
auto metrics = rocprofiler::counters::getMetricsForAgent("gfx90a");
|
||||
@@ -193,7 +193,7 @@ TEST(thread_trace, perfcounters_configure_test)
|
||||
att_param.counter_id = rocprofiler_counter_id_t{.handle = metric.id()};
|
||||
att_param.simd_mask = simd_mask;
|
||||
params.push_back(att_param);
|
||||
expected.insert(std::atoi(metric.event().c_str()) | (simd_mask << 28));
|
||||
expected.insert({std::atoi(metric.event().c_str()), simd_mask});
|
||||
}
|
||||
|
||||
rocprofiler_configure_dispatch_thread_trace_service(
|
||||
@@ -216,7 +216,7 @@ TEST(thread_trace, perfcounters_configure_test)
|
||||
ASSERT_NE(tracer, nullptr);
|
||||
ASSERT_EQ(tracer->params.perfcounter_ctrl, 1);
|
||||
ASSERT_EQ(tracer->params.perfcounters.size(), 3);
|
||||
for(uint32_t param : tracer->params.perfcounters)
|
||||
for(const auto& param : tracer->params.perfcounters)
|
||||
EXPECT_TRUE(expected.find(param) != expected.end())
|
||||
<< "valid AQLprofile mask not generated for perfcounters";
|
||||
context::pop_client(1);
|
||||
@@ -242,8 +242,7 @@ TEST(thread_trace, perfcounters_aql_options_test)
|
||||
for(auto& [counter_name, simd_mask] : perf_counters)
|
||||
for(auto& metric : metrics)
|
||||
if(metric.name() == counter_name)
|
||||
_params.perfcounters.push_back(std::atoi(metric.event().c_str()) |
|
||||
(simd_mask << 28));
|
||||
_params.perfcounters.push_back({std::atoi(metric.event().c_str()), simd_mask});
|
||||
_params.perfcounter_ctrl = 2;
|
||||
auto new_tracer = std::make_unique<thread_trace::ThreadTracerQueue>(
|
||||
_params, begin(agents)->second, get_api_table(), get_ext_table());
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user