Fixed pcs doorbell_id so it matches the trap handler (#309)

* Fixed pcs doorbell_id so it matches the trap handler

* source formatting (clang-format v11) (#310)

Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>

* Changed trap_handler correlation_id to a struct and added comments

* Fixed one of the comments

* source formatting (clang-format v11) (#312)

Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>

* Changing correlation_in to please the linter

* source formatting (clang-format v11) (#313)

Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>

[ROCm/rocprofiler-sdk commit: 5761b5fb60]
Dieser Commit ist enthalten in:
Giovanni Lenzi Baraldi
2023-12-18 11:50:06 -03:00
committet von GitHub
Ursprung 309a8e069b
Commit 946407623f
3 geänderte Dateien mit 68 neuen und 36 gelöschten Zeilen
@@ -43,19 +43,33 @@ operator==(device_handle a, device_handle b)
namespace Parser
{
/*
struct DispatchPkt
/**
* @brief Struct immitating the correlation_id returned by the trap handler in raw PC samples.
*/
union trap_correlation_id_t
{
uint64_t write_id; //! The location where this dispatch is written to
uint64_t doorbell_id; //! The doorbell non-unique ID
device_handle dev; //! Which device this is run
}; */
struct DispatchPkt
{
uint64_t correlation_id_in; //! Correlation ID seen by the trap handler
device_handle dev; //! Which device this is run
uint64_t raw;
struct
{
uint64_t dispatch_index : 25;
uint64_t _reserved0 : 7;
uint64_t doorbell_id : 10;
uint64_t _reserved1 : 22;
} wrapped;
};
struct DispatchPkt
{
trap_correlation_id_t correlation_id_in; //! Correlation ID seen by the trap handler
device_handle dev; //! Which device this is run
};
inline bool
operator==(const trap_correlation_id_t& a, const trap_correlation_id_t& b)
{
return a.raw == b.raw;
}
inline bool
operator==(const DispatchPkt& a, const DispatchPkt& b)
{
@@ -68,7 +82,7 @@ struct std::hash<Parser::DispatchPkt>
{
size_t operator()(const Parser::DispatchPkt& d) const
{
return (d.correlation_id_in << 8) ^ d.dev.handle;
return (d.correlation_id_in.raw << 8) ^ d.dev.handle;
}
};
@@ -85,36 +99,39 @@ public:
/**
* Checks wether a dispatch pkt will generate a collision.
* Returns true on collision and false when slot is available.
* @returns true on collision and false when slot is available.
*/
bool checkDispatch(const dispatch_pkt_id_t& pkt) const
{
uint64_t trap = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
auto trap = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
return dispatch_to_correlation.find({trap, pkt.device}) != dispatch_to_correlation.end();
}
/**
* Updates the mapping of dispatch_id to correlation_id
* @brief Updates the mapping of dispatch_id to correlation_id
*/
void newDispatch(const dispatch_pkt_id_t& pkt)
{
cache_dev_id = ~0ul;
uint64_t trap_id = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
cache_dev_id = ~0ul;
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id;
}
/**
* @brief Allows the parser to forget a correlation_id, to save memory.
*/
void forget(const dispatch_pkt_id_t& pkt)
{
cache_dev_id = ~0ul;
uint64_t trap_id = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
cache_dev_id = ~0ul;
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
dispatch_to_correlation.erase({trap_id, pkt.device});
}
/**
* Given a device dev, doorbell and and wrapped dispatch_id, returns the
* correlation_id set by dispatch_pkt_id_t
* Given a device dev, doorbell and and wrapped dispatch_id,
* @returns the correlation_id set by dispatch_pkt_id_t
*/
uint64_t get(device_handle dev, uint64_t correlation_in)
uint64_t get(device_handle dev, trap_correlation_id_t correlation_in)
{
#ifndef _PARSER_CORRELATION_DISABLE_CACHE
if(dev.handle == cache_dev_id && correlation_in == cache_correlation_id_in)
@@ -126,19 +143,33 @@ public:
return cache_correlation_id_out;
}
static uint64_t wrap_correlation_id(uint64_t doorbell, uint64_t write_idx, uint64_t queue_size)
/**
* Returns the correlation_id as seen by the trap handler, consisting of a
* - wrapped dispatch_pkt
* - doorbell_id divibed by 8 Bytes
* @param[in] doorbell The doorbell handler returned by HSA
* @param[in] write_idx The dispatch packet write index, [optional] not wrapped
* @param[in] queue_size The queue size. [optional] If write_index is already wrapped,
* then this value can just be a large integer > queue_size.
* @returns The correlation_id immitating the ones returned by the trap handler.
*/
static trap_correlation_id_t trap_correlation_id(uint64_t doorbell,
uint64_t write_idx,
uint64_t queue_size)
{
static constexpr uint64_t WRITE_WRAP = (1 << 25) - 1;
return ((write_idx % queue_size) & WRITE_WRAP) | (uint64_t(doorbell) << 32);
trap_correlation_id_t trap{.raw = 0};
trap.wrapped.dispatch_index = write_idx % queue_size;
trap.wrapped.doorbell_id = doorbell >> 3;
return trap;
}
private:
std::unordered_map<DispatchPkt, uint64_t> dispatch_to_correlation{};
// Making get() const and these cache variables mutable causes performance to be unstable
uint64_t cache_correlation_id_in = ~0ul; // Invalid value in cache
uint64_t cache_correlation_id_out = ~0ul;
uint64_t cache_dev_id = ~0ul; // Invalid device Id in cache
trap_correlation_id_t cache_correlation_id_in{.raw = ~0ul}; // Invalid value in cache
uint64_t cache_correlation_id_out = ~0ul;
uint64_t cache_dev_id = ~0ul; // Invalid device Id in cache
};
} // namespace Parser
@@ -157,7 +188,8 @@ add_upcoming_samples(const device_handle device,
samples[p] = copySample<bHostTrap, GFXIP>((const void*) (buffer + p));
try
{
samples[p].correlation_id = corr_map->get(device, snap->correlation_id);
Parser::trap_correlation_id_t trap{.raw = snap->correlation_id};
samples[p].correlation_id = corr_map->get(device, trap);
} catch(std::exception& e)
{
status = PCSAMPLE_STATUS_PARSER_ERROR;
@@ -127,7 +127,7 @@ public:
perf_sample_snapshot_v1 snap;
::memset(&snap, 0, sizeof(snap));
snap.pc = dispatch->unique_id;
snap.correlation_id = dispatch->getMockId();
snap.correlation_id = dispatch->getMockId().raw;
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
@@ -297,7 +297,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
pcsample_v1_t sample;
::memset(&sample, 0, sizeof(sample));
sample.pc = dispatch->unique_id;
sample.correlation_id = dispatch->getMockId();
sample.correlation_id = dispatch->getMockId().raw;
sample.flags.valid = valid && !error;
sample.wave_issued = issued;
@@ -315,7 +315,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
perf_sample_snapshot_v1 pss;
pss.perf_snapshot_data = snap.raw;
pss.correlation_id = dispatch->getMockId();
pss.correlation_id = dispatch->getMockId().raw;
dispatch->submit(std::move(pss));
};
@@ -383,7 +383,7 @@ class WaveOtherFieldsTest : public WaveSnapTest
snap.workgroup_id_z = blkz;
snap.chiplet_and_wave_id = (chip << 8) | (wave & 0x3F);
snap.hw_id = hwid;
snap.correlation_id = dispatch->getMockId();
snap.correlation_id = dispatch->getMockId().raw;
assert(dispatch.get());
dispatch->submit(snap);
@@ -117,7 +117,7 @@ private:
{
std::unordered_set<size_t> set;
for(size_t i = 0; i < num_unique_bells; i++)
set.insert(i);
set.insert(i << 3);
return set;
};
static std::unordered_set<size_t> available_ids;
@@ -208,9 +208,9 @@ public:
}
//! Returns the "correlation_id" seen by the trap handler.
uint64_t getMockId()
Parser::trap_correlation_id_t getMockId()
{
return Parser::CorrelationMap::wrap_correlation_id(doorbell_id, dispatch_id, queue->size);
return Parser::CorrelationMap::trap_correlation_id(doorbell_id, dispatch_id, queue->size);
};
//! Submits a packet to the buffer
@@ -254,7 +254,7 @@ public:
packet_union_t uni;
::memset(&uni, 0, sizeof(uni));
uni.snap.pc = dispatch->unique_id;
uni.snap.correlation_id = dispatch->getMockId();
uni.snap.correlation_id = dispatch->getMockId().raw;
dispatch->submit(uni);
};
void print()