Fix for ATT codeobj table initialization (#290)

* Fix for codeobj HSA table order

* Fix tests

* Format

---------

Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
This commit is contained in:
Baraldi, Giovanni
2025-03-20 22:27:46 +01:00
gecommit door GitHub
bovenliggende c9ca876b79
commit b21452ec11
4 gewijzigde bestanden met toevoegingen van 17 en 15 verwijderingen
@@ -903,6 +903,7 @@ rocprofiler_set_api_table(const char* name,
if(runtime_pc_sampling_table)
rocprofiler::pc_sampling::code_object::initialize(hsa_api_table);
#endif
rocprofiler::thread_trace::code_object::initialize(hsa_api_table);
// install rocprofiler API wrappers
rocprofiler::hsa::update_table(hsa_api_table->core_, lib_instance);
@@ -507,8 +507,6 @@ initialize(HsaApiTable* table)
{
ROCP_FATAL_IF(!table->core_ || !table->amd_ext_);
code_object::initialize(table);
for(auto& ctx : context::get_registered_contexts())
{
if(ctx->device_thread_trace) ctx->device_thread_trace->resource_init();
@@ -94,14 +94,8 @@ public:
std::unique_ptr<hsa::TraceControlAQLPacket> get_control(bool bStart);
void iterate_data(aqlprofile_handle_t handle, rocprofiler_user_data_t data);
hsa_queue_t* queue{nullptr};
std::mutex trace_resources_mut;
thread_trace_parameter_pack params;
std::atomic<int> active_traces{0};
std::unique_ptr<hsa::TraceControlAQLPacket> control_packet;
std::unique_ptr<aql::ThreadTraceAQLPacketFactory> factory;
thread_trace_parameter_pack params;
const rocprofiler_agent_id_t agent_id;
[[nodiscard]] std::unique_ptr<class Signal> Submit(hsa_ext_amd_aql_pm4_packet_t* packet,
bool bWait) const;
@@ -116,11 +110,15 @@ public:
}
return nullptr;
}
std::unique_ptr<aql::ThreadTraceAQLPacketFactory> factory{nullptr};
private:
std::unique_ptr<code_object::CodeobjCallbackRegistry> codeobj_reg{nullptr};
hsa_queue_t* queue{nullptr};
std::atomic<int> active_traces{0};
std::mutex trace_resources_mut{};
rocprofiler_agent_id_t agent_id;
std::unique_ptr<hsa::TraceControlAQLPacket> control_packet{nullptr};
std::unique_ptr<code_object::CodeobjCallbackRegistry> codeobj_reg{nullptr};
};
class DispatchThreadTracer
@@ -150,8 +148,10 @@ public:
rocprofiler_user_data_t* user_data,
const context::correlation_id* corr_id);
void post_kernel_call(inst_pkt_t& aql, const hsa::queue_info_session& session);
void post_kernel_call(inst_pkt_t& aql, const hsa::queue_info_session& session);
const auto& get_agents() const { return agents; }
private:
std::unordered_map<hsa_agent_t, std::unique_ptr<ThreadTracerQueue>> agents{};
std::unordered_map<rocprofiler_agent_id_t, thread_trace_parameter_pack> params{};
@@ -181,6 +181,9 @@ public:
return params.find(id) != params.end();
}
const auto& get_agents() const { return agents; }
private:
std::map<rocprofiler_agent_id_t, std::unique_ptr<ThreadTracerQueue>> agents{};
std::map<rocprofiler_agent_id_t, thread_trace_parameter_pack> params{};
@@ -112,7 +112,7 @@ TEST(thread_trace, resource_creation)
tracer.resource_init();
for(auto& [_, agenttracer] : tracer.agents)
for(auto& [_, agenttracer] : tracer.get_agents())
{
agenttracer->load_codeobj(1, 0x1000, 0x1000);
agenttracer->load_codeobj(2, 0x3000, 0x1000);
@@ -222,7 +222,7 @@ TEST(thread_trace, perfcounters_configure_test)
auto* tracer = context->dispatch_thread_trace.get();
ASSERT_NE(tracer, nullptr);
for(auto& [id, agent] : tracer->agents)
for(auto& [id, agent] : tracer->get_agents())
{
ASSERT_EQ(agent->params.perfcounter_ctrl, 1);
ASSERT_EQ(agent->params.perfcounters.size(), 3);