PC sampling services provides dispatch id (#1209)

Αυτή η υποβολή περιλαμβάνεται σε:
Vladimir Indic
2024-11-21 18:10:31 +01:00
υποβλήθηκε από GitHub
γονέας 6ae441f785
υποβολή 8d2ce4b475
8 αρχεία άλλαξαν με 36 προσθήκες και 21 διαγραφές
@@ -322,9 +322,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
<< "wave_in_group: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
<< "chiplet: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet)
<< ", "
// << "cu_id: " << pc_sample->hw_id << ", "
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
<< "correlation: {internal=" << std::setw(7)
<< pc_sample->correlation_id.internal << ", "
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}"
@@ -395,7 +395,7 @@ WriteInterceptor(const void* packets,
if(pc_sampling::is_pc_sample_service_configured(queue.get_agent().get_rocp_agent()->id))
{
transformed_packets.emplace_back(pc_sampling::hsa::generate_marker_packet_for_kernel(
corr_id, tracing_data_v.external_correlation_ids));
corr_id, tracing_data_v.external_correlation_ids, dispatch_id));
}
#endif
@@ -105,6 +105,7 @@ amd_intercept_marker_handler_callback(const struct amd_aql_intercept_marker_s* p
dispatch_pkt.write_index = packet_id;
dispatch_pkt.correlation_id = {.internal = internal_correlation,
.external = external_correlation};
dispatch_pkt.dispatch_id = packet->user_data[2];
auto* parser = pcs_session->parser.get();
if(parser->shouldFlipRocrBuffer(dispatch_pkt))
@@ -187,7 +188,8 @@ data_ready_callback(void* client_callback_data,
rocprofiler::hsa::rocprofiler_packet
generate_marker_packet_for_kernel(
context::correlation_id* correlation_id,
const tracing::external_correlation_id_map_t& external_correlation_ids)
const tracing::external_correlation_id_map_t& external_correlation_ids,
const rocprofiler_dispatch_id_t dispatch_id)
{
// This function executes for each kernel dispatched to the agent on which
// the PC sampling service is configured.
@@ -231,6 +233,9 @@ generate_marker_packet_for_kernel(
marker_pkt.user_data[1] = 0;
}
// dispatch_id should always be present
marker_pkt.user_data[2] = dispatch_id;
return rocprofiler::hsa::rocprofiler_packet(marker_pkt);
}
@@ -42,7 +42,8 @@ namespace hsa
rocprofiler::hsa::rocprofiler_packet
generate_marker_packet_for_kernel(
context::correlation_id* correlation_id,
const tracing::external_correlation_id_map_t& external_correlation_ids);
const tracing::external_correlation_id_map_t& external_correlation_ids,
const rocprofiler_dispatch_id_t dispatch_id);
void
pc_sampling_service_start(context::pc_sampling_service* service);
@@ -47,6 +47,12 @@ operator==(device_handle a, device_handle b)
namespace Parser
{
struct dispatch_correlation_ids_t
{
rocprofiler_dispatch_id_t dispatch_id;
rocprofiler_correlation_id_t correlation_id;
};
/**
* @brief Struct immitating the correlation_id returned by the trap handler in raw PC samples.
*/
@@ -70,11 +76,11 @@ struct DispatchPkt
struct cache_type_t
{
trap_correlation_id_t id_in{.raw = ~0ul};
rocprofiler_correlation_id_t id_out{};
uint64_t dev_id = ~0ul;
size_t increment = 0;
size_t object_id = 0;
trap_correlation_id_t id_in{.raw = ~0ul};
dispatch_correlation_ids_t id_out{};
uint64_t dev_id = ~0ul;
size_t increment = 0;
size_t object_id = 0;
};
inline bool
@@ -131,7 +137,7 @@ public:
{
std::unique_lock<std::mutex> lk(mut);
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id;
dispatch_to_correlation[{trap_id, pkt.device}] = {pkt.dispatch_id, pkt.correlation_id};
cache_reset_count.fetch_add(1);
}
@@ -150,7 +156,7 @@ public:
* Given a device dev, doorbell and and wrapped dispatch_id,
* @returns the correlation_id set by dispatch_pkt_id_t
*/
rocprofiler_correlation_id_t get(device_handle dev, trap_correlation_id_t correlation_in)
dispatch_correlation_ids_t get(device_handle dev, trap_correlation_id_t correlation_in)
{
#ifndef _PARSER_CORRELATION_DISABLE_CACHE
static thread_local cache_type_t cache{};
@@ -195,9 +201,9 @@ public:
}
private:
std::unordered_map<DispatchPkt, rocprofiler_correlation_id_t> dispatch_to_correlation{};
std::atomic<size_t> cache_reset_count{1};
size_t object_id = 0;
std::unordered_map<DispatchPkt, dispatch_correlation_ids_t> dispatch_to_correlation{};
std::atomic<size_t> cache_reset_count{1};
size_t object_id = 0;
std::mutex mut;
};
@@ -238,9 +244,13 @@ add_upcoming_samples(const device_handle device,
try
{
Parser::trap_correlation_id_t trap{.raw = snap->correlation_id};
pc_sample.correlation_id = corr_map->get(device, trap);
auto dispatch_correlation_ids = corr_map->get(device, trap);
pc_sample.dispatch_id = dispatch_correlation_ids.dispatch_id;
pc_sample.correlation_id = dispatch_correlation_ids.correlation_id;
} catch(std::exception& e)
{
// TODO: introduce ROCPROFILER_DISPATCH_ID_INTERNAL_NONE
pc_sample.dispatch_id = 0;
pc_sample.correlation_id = {.internal = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE,
.external = rocprofiler_user_data_t{
.value = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE}};
@@ -73,7 +73,7 @@ typedef struct
uint64_t read_index;
/// both internal and external correlation ID.
rocprofiler_correlation_id_t correlation_id;
reserved_type _[2];
rocprofiler_dispatch_id_t dispatch_id;
} dispatch_pkt_id_t;
typedef struct
@@ -94,6 +94,7 @@ Benchmark(bool bWarmup)
TEST(pcs_parser, benchmark_test)
{
// Tests for host trap v0 records
std::cout << "Parsing rocprofiler_pc_sampling_record_host_trap_v0_t records!" << std::endl;
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(true), true);
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(false), true);
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(false), true);
@@ -351,9 +351,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
<< "wave_in_group: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
<< "chiplet: " << std::setw(2)
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet)
<< ", "
// << "cu_id: " << pc_sample->hw_id << ", "
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
<< "correlation: {internal=" << std::setw(7)
<< pc_sample->correlation_id.internal << ", "
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value