PC sampling services provides dispatch id (#1209)
Αυτή η υποβολή περιλαμβάνεται σε:
υποβλήθηκε από
GitHub
γονέας
6ae441f785
υποβολή
8d2ce4b475
@@ -322,9 +322,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
<< "wave_in_group: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
|
||||
<< "chiplet: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet)
|
||||
<< ", "
|
||||
// << "cu_id: " << pc_sample->hw_id << ", "
|
||||
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
|
||||
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
|
||||
<< "correlation: {internal=" << std::setw(7)
|
||||
<< pc_sample->correlation_id.internal << ", "
|
||||
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}"
|
||||
|
||||
@@ -395,7 +395,7 @@ WriteInterceptor(const void* packets,
|
||||
if(pc_sampling::is_pc_sample_service_configured(queue.get_agent().get_rocp_agent()->id))
|
||||
{
|
||||
transformed_packets.emplace_back(pc_sampling::hsa::generate_marker_packet_for_kernel(
|
||||
corr_id, tracing_data_v.external_correlation_ids));
|
||||
corr_id, tracing_data_v.external_correlation_ids, dispatch_id));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -105,6 +105,7 @@ amd_intercept_marker_handler_callback(const struct amd_aql_intercept_marker_s* p
|
||||
dispatch_pkt.write_index = packet_id;
|
||||
dispatch_pkt.correlation_id = {.internal = internal_correlation,
|
||||
.external = external_correlation};
|
||||
dispatch_pkt.dispatch_id = packet->user_data[2];
|
||||
|
||||
auto* parser = pcs_session->parser.get();
|
||||
if(parser->shouldFlipRocrBuffer(dispatch_pkt))
|
||||
@@ -187,7 +188,8 @@ data_ready_callback(void* client_callback_data,
|
||||
rocprofiler::hsa::rocprofiler_packet
|
||||
generate_marker_packet_for_kernel(
|
||||
context::correlation_id* correlation_id,
|
||||
const tracing::external_correlation_id_map_t& external_correlation_ids)
|
||||
const tracing::external_correlation_id_map_t& external_correlation_ids,
|
||||
const rocprofiler_dispatch_id_t dispatch_id)
|
||||
{
|
||||
// This function executes for each kernel dispatched to the agent on which
|
||||
// the PC sampling service is configured.
|
||||
@@ -231,6 +233,9 @@ generate_marker_packet_for_kernel(
|
||||
marker_pkt.user_data[1] = 0;
|
||||
}
|
||||
|
||||
// dispatch_id should always be present
|
||||
marker_pkt.user_data[2] = dispatch_id;
|
||||
|
||||
return rocprofiler::hsa::rocprofiler_packet(marker_pkt);
|
||||
}
|
||||
|
||||
|
||||
@@ -42,7 +42,8 @@ namespace hsa
|
||||
rocprofiler::hsa::rocprofiler_packet
|
||||
generate_marker_packet_for_kernel(
|
||||
context::correlation_id* correlation_id,
|
||||
const tracing::external_correlation_id_map_t& external_correlation_ids);
|
||||
const tracing::external_correlation_id_map_t& external_correlation_ids,
|
||||
const rocprofiler_dispatch_id_t dispatch_id);
|
||||
|
||||
void
|
||||
pc_sampling_service_start(context::pc_sampling_service* service);
|
||||
|
||||
@@ -47,6 +47,12 @@ operator==(device_handle a, device_handle b)
|
||||
|
||||
namespace Parser
|
||||
{
|
||||
struct dispatch_correlation_ids_t
|
||||
{
|
||||
rocprofiler_dispatch_id_t dispatch_id;
|
||||
rocprofiler_correlation_id_t correlation_id;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Struct immitating the correlation_id returned by the trap handler in raw PC samples.
|
||||
*/
|
||||
@@ -70,11 +76,11 @@ struct DispatchPkt
|
||||
|
||||
struct cache_type_t
|
||||
{
|
||||
trap_correlation_id_t id_in{.raw = ~0ul};
|
||||
rocprofiler_correlation_id_t id_out{};
|
||||
uint64_t dev_id = ~0ul;
|
||||
size_t increment = 0;
|
||||
size_t object_id = 0;
|
||||
trap_correlation_id_t id_in{.raw = ~0ul};
|
||||
dispatch_correlation_ids_t id_out{};
|
||||
uint64_t dev_id = ~0ul;
|
||||
size_t increment = 0;
|
||||
size_t object_id = 0;
|
||||
};
|
||||
|
||||
inline bool
|
||||
@@ -131,7 +137,7 @@ public:
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(mut);
|
||||
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id;
|
||||
dispatch_to_correlation[{trap_id, pkt.device}] = {pkt.dispatch_id, pkt.correlation_id};
|
||||
cache_reset_count.fetch_add(1);
|
||||
}
|
||||
|
||||
@@ -150,7 +156,7 @@ public:
|
||||
* Given a device dev, doorbell and and wrapped dispatch_id,
|
||||
* @returns the correlation_id set by dispatch_pkt_id_t
|
||||
*/
|
||||
rocprofiler_correlation_id_t get(device_handle dev, trap_correlation_id_t correlation_in)
|
||||
dispatch_correlation_ids_t get(device_handle dev, trap_correlation_id_t correlation_in)
|
||||
{
|
||||
#ifndef _PARSER_CORRELATION_DISABLE_CACHE
|
||||
static thread_local cache_type_t cache{};
|
||||
@@ -195,9 +201,9 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<DispatchPkt, rocprofiler_correlation_id_t> dispatch_to_correlation{};
|
||||
std::atomic<size_t> cache_reset_count{1};
|
||||
size_t object_id = 0;
|
||||
std::unordered_map<DispatchPkt, dispatch_correlation_ids_t> dispatch_to_correlation{};
|
||||
std::atomic<size_t> cache_reset_count{1};
|
||||
size_t object_id = 0;
|
||||
|
||||
std::mutex mut;
|
||||
};
|
||||
@@ -238,9 +244,13 @@ add_upcoming_samples(const device_handle device,
|
||||
try
|
||||
{
|
||||
Parser::trap_correlation_id_t trap{.raw = snap->correlation_id};
|
||||
pc_sample.correlation_id = corr_map->get(device, trap);
|
||||
auto dispatch_correlation_ids = corr_map->get(device, trap);
|
||||
pc_sample.dispatch_id = dispatch_correlation_ids.dispatch_id;
|
||||
pc_sample.correlation_id = dispatch_correlation_ids.correlation_id;
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
// TODO: introduce ROCPROFILER_DISPATCH_ID_INTERNAL_NONE
|
||||
pc_sample.dispatch_id = 0;
|
||||
pc_sample.correlation_id = {.internal = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE,
|
||||
.external = rocprofiler_user_data_t{
|
||||
.value = ROCPROFILER_CORRELATION_ID_INTERNAL_NONE}};
|
||||
|
||||
@@ -73,7 +73,7 @@ typedef struct
|
||||
uint64_t read_index;
|
||||
/// both internal and external correlation ID.
|
||||
rocprofiler_correlation_id_t correlation_id;
|
||||
reserved_type _[2];
|
||||
rocprofiler_dispatch_id_t dispatch_id;
|
||||
} dispatch_pkt_id_t;
|
||||
|
||||
typedef struct
|
||||
|
||||
@@ -94,6 +94,7 @@ Benchmark(bool bWarmup)
|
||||
TEST(pcs_parser, benchmark_test)
|
||||
{
|
||||
// Tests for host trap v0 records
|
||||
std::cout << "Parsing rocprofiler_pc_sampling_record_host_trap_v0_t records!" << std::endl;
|
||||
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(true), true);
|
||||
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(false), true);
|
||||
EXPECT_EQ(Benchmark<rocprofiler_pc_sampling_record_host_trap_v0_t>(false), true);
|
||||
|
||||
@@ -351,9 +351,8 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
<< "wave_in_group: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
|
||||
<< "chiplet: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet)
|
||||
<< ", "
|
||||
// << "cu_id: " << pc_sample->hw_id << ", "
|
||||
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
|
||||
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
|
||||
<< "correlation: {internal=" << std::setw(7)
|
||||
<< pc_sample->correlation_id.internal << ", "
|
||||
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value
|
||||
|
||||
Αναφορά σε νέο ζήτημα
Block a user