Fix for SQTT perfmon IDs (#1818)

* Fix for SQTT perfmon IDs

* Review comments
Tento commit je obsažen v:
Giovanni Lenzi Baraldi
2025-11-13 13:46:57 +01:00
odevzdal GitHub
rodič 720a5bcf9a
revize cf164dd025
4 změnil soubory, kde provedl 117 přidání a 47 odebrání
@@ -325,19 +325,14 @@ loadMetrics(bool reload, const std::optional<ArchMetric> add_metric)
}
std::unordered_map<uint64_t, int>
getPerfCountersIdMap()
getPerfCountersIdMap(const rocprofiler_agent_t* agent)
{
std::unordered_map<uint64_t, int> map;
auto mets = loadMetrics();
for(const auto& [agent, list] : mets->arch_to_metric)
auto map = std::unordered_map<uint64_t, int>{};
for(const auto& metric : getMetricsForAgent(agent))
{
if(agent.find("gfx9") == std::string::npos) continue;
for(const auto& metric : list)
{
if(metric.name().find("SQ_") == 0 && !metric.event().empty())
map.emplace(metric.id(), std::stoi(metric.event()));
}
// Only add basic SQ counters
if(metric.name().find("SQ_") == 0 && !metric.event().empty())
map.emplace(metric.id(), std::stoi(metric.event()));
}
return map;
@@ -117,7 +117,7 @@ getMetricsForAgent(const rocprofiler_agent_t* agent);
* applicable only for GFX9 agents and SQ block counters
*/
std::unordered_map<uint64_t, int>
getPerfCountersIdMap();
getPerfCountersIdMap(const rocprofiler_agent_t* agent);
/**
* Checks if a metric is valid for a given agent
@@ -41,9 +41,13 @@ using parameter_pack = rocprofiler::thread_trace::thread_trace_parameter_pack;
rocprofiler_status_t
build_pack_from_array(parameter_pack& pack,
const rocprofiler_thread_trace_parameter_t* params,
size_t num_parameters)
size_t num_parameters,
rocprofiler_agent_id_t agent_id)
{
auto id_map = rocprofiler::counters::getPerfCountersIdMap();
const auto* agent = rocprofiler::agent::get_agent(agent_id);
if(agent == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
auto id_map = rocprofiler::counters::getPerfCountersIdMap(agent);
for(size_t p = 0; p < num_parameters; p++)
{
@@ -68,6 +72,8 @@ build_pack_from_array(parameter_pack& pack,
auto event_it = id_map.find(param.counter_id.handle);
if(event_it != id_map.end())
pack.perfcounters.push_back({event_it->second, param.simd_mask});
else
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
}
break;
case ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL:
@@ -125,7 +131,7 @@ rocprofiler_configure_dispatch_thread_trace_service(
if(pack.dispatch_cb_fn == nullptr) return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
{
auto status = build_pack_from_array(pack, parameters, num_parameters);
auto status = build_pack_from_array(pack, parameters, num_parameters, agent_id);
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
}
@@ -160,7 +166,7 @@ rocprofiler_configure_device_thread_trace_service(
pack.callback_userdata = userdata;
{
auto status = build_pack_from_array(pack, parameters, num_parameters);
auto status = build_pack_from_array(pack, parameters, num_parameters, agent_id);
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
}
@@ -75,6 +75,27 @@ test_init()
[[maybe_unused]] static bool run_once = init();
}
rocprofiler_status_t
get_sq_waves_counter(rocprofiler_agent_id_t /* id */,
rocprofiler_counter_id_t* counters,
size_t num_counters,
void* userdata)
{
for(size_t i = 0; i < num_counters; ++i)
{
auto _info = rocprofiler_counter_info_v1_t{};
ROCPROFILER_CALL(
rocprofiler_query_counter_info(counters[i], ROCPROFILER_COUNTER_INFO_VERSION_1, &_info),
"query counter");
if(_info.name && std::string_view(_info.name).find("SQ_WAVES") == 0)
{
static_cast<rocprofiler_thread_trace_parameter_t*>(userdata)->counter_id = counters[i];
return ROCPROFILER_STATUS_SUCCESS;
}
}
return ROCPROFILER_STATUS_ERROR;
}
} // namespace rocprofiler
using namespace rocprofiler;
@@ -172,6 +193,7 @@ TEST(thread_trace, configure_test)
TEST(thread_trace, perfcounters_configure_test)
{
constexpr int NUM_COUNTERS = 3;
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
@@ -181,32 +203,89 @@ TEST(thread_trace, perfcounters_configure_test)
rocprofiler_context_id_t ctx{0};
ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed");
// Only GFX9 SQ Block counters are supported
std::vector<std::pair<std::string, uint64_t>> perf_counters = {
{"SQ_WAVES", 0x1}, {"SQ_WAVES", 0x2}, {"SQ_WAVES", 0x2}, {"GRBM_COUNT", 0x3}};
std::set<std::pair<uint32_t, uint32_t>> expected;
std::vector<rocprofiler_thread_trace_parameter_t> params;
params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {1}});
auto metrics = rocprofiler::counters::getMetricsForAgent("gfx90a");
auto agents = hsa::get_queue_controller()->get_supported_agents();
ASSERT_GT(agents.size(), 0);
for(auto& [counter_name, simd_mask] : perf_counters)
for(auto& metric : metrics)
if(metric.name() == counter_name)
{
rocprofiler_thread_trace_parameter_t att_param;
att_param.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER;
att_param.counter_id = rocprofiler_counter_id_t{.handle = metric.id()};
att_param.simd_mask = simd_mask;
params.push_back(att_param);
expected.insert({std::atoi(metric.event().c_str()), simd_mask});
}
for(auto& [_, agent] : agents)
{
auto params = std::vector<rocprofiler_thread_trace_parameter_t>{};
params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {1}});
auto sq_waves = rocprofiler_thread_trace_parameter_t{};
sq_waves.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER;
ROCPROFILER_CALL(rocprofiler_iterate_agent_supported_counters(
agent.get_rocp_agent()->id, get_sq_waves_counter, &sq_waves),
"iterate counters");
for(int i = 0; i < NUM_COUNTERS; i++)
{
sq_waves.simd_mask = 1 << i;
params.emplace_back(sq_waves);
}
ROCPROFILER_CALL(
rocprofiler_configure_dispatch_thread_trace_service(
ctx,
agent.get_rocp_agent()->id,
params.data(),
params.size(),
[](rocprofiler_agent_id_t,
rocprofiler_queue_id_t,
rocprofiler_async_correlation_id_t,
rocprofiler_kernel_id_t,
rocprofiler_dispatch_id_t,
void*,
rocprofiler_user_data_t*) { return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; },
[](rocprofiler_agent_id_t, int64_t, void*, size_t, rocprofiler_user_data_t) {},
nullptr),
"configure");
}
auto* context = rocprofiler::context::get_mutable_registered_context(ctx);
auto* tracer = context->dispatch_thread_trace.get();
ASSERT_NE(tracer, nullptr);
for(auto& [id, agent] : tracer->get_agents())
{
// We expect perfcounters.size() to match the number of counters we added
ASSERT_EQ(agent->params.perfcounter_ctrl, 1);
ASSERT_EQ(agent->params.perfcounters.size(), NUM_COUNTERS);
for(const auto& param : agent->params.perfcounters)
{
// We expect a nonzero event id (.first) and nonzero simd mask (.second)
EXPECT_TRUE(param.first != 0);
EXPECT_TRUE(param.second != 0)
<< "valid AQLprofile mask not generated for perfcounters";
}
}
context::pop_client(1);
}
TEST(thread_trace, perfcounters_configure_fail_test)
{
ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS);
test_init();
registration::init_logging();
registration::set_init_status(-1);
context::push_client(1);
rocprofiler_context_id_t ctx{0};
ROCPROFILER_CALL(rocprofiler_create_context(&ctx), "context creation failed");
auto agents = hsa::get_queue_controller()->get_supported_agents();
ASSERT_GT(agents.size(), 0);
for(auto& [_, agent] : agents)
{
rocprofiler_configure_dispatch_thread_trace_service(
auto params = std::vector<rocprofiler_thread_trace_parameter_t>{};
params.push_back({ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTERS_CTRL, {1}});
auto sq_waves = rocprofiler_thread_trace_parameter_t{};
sq_waves.type = ROCPROFILER_THREAD_TRACE_PARAMETER_PERFCOUNTER;
// We are not initializing the counter, so we expect the configuration to fail
params.emplace_back(sq_waves);
auto status = rocprofiler_configure_dispatch_thread_trace_service(
ctx,
agent.get_rocp_agent()->id,
params.data(),
@@ -220,20 +299,10 @@ TEST(thread_trace, perfcounters_configure_test)
rocprofiler_user_data_t*) { return ROCPROFILER_THREAD_TRACE_CONTROL_NONE; },
[](rocprofiler_agent_id_t, int64_t, void*, size_t, rocprofiler_user_data_t) {},
nullptr);
EXPECT_NE(status, ROCPROFILER_STATUS_SUCCESS);
}
auto* context = rocprofiler::context::get_mutable_registered_context(ctx);
auto* tracer = context->dispatch_thread_trace.get();
ASSERT_NE(tracer, nullptr);
for(auto& [id, agent] : tracer->get_agents())
{
ASSERT_EQ(agent->params.perfcounter_ctrl, 1);
ASSERT_EQ(agent->params.perfcounters.size(), 3);
for(const auto& param : agent->params.perfcounters)
EXPECT_TRUE(expected.find(param) != expected.end())
<< "valid AQLprofile mask not generated for perfcounters";
}
context::pop_client(1);
}