Fixed pcs doorbell_id so it matches the trap handler (#309)
* Fixed pcs doorbell_id so it matches the trap handler
* source formatting (clang-format v11) (#310)
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>
* Changed trap_handler correlation_id to a struct and added comments
* Fixed one of the comments
* source formatting (clang-format v11) (#312)
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>
* Changing correlation_in to please the linter
* source formatting (clang-format v11) (#313)
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>
---------
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: ApoKalipse-V <ApoKalipse-V@users.noreply.github.com>
[ROCm/rocprofiler-sdk commit: 5761b5fb60]
Dieser Commit ist enthalten in:
committet von
GitHub
Ursprung
309a8e069b
Commit
946407623f
+60
-28
@@ -43,19 +43,33 @@ operator==(device_handle a, device_handle b)
|
||||
|
||||
namespace Parser
|
||||
{
|
||||
/*
|
||||
struct DispatchPkt
|
||||
/**
|
||||
* @brief Struct immitating the correlation_id returned by the trap handler in raw PC samples.
|
||||
*/
|
||||
union trap_correlation_id_t
|
||||
{
|
||||
uint64_t write_id; //! The location where this dispatch is written to
|
||||
uint64_t doorbell_id; //! The doorbell non-unique ID
|
||||
device_handle dev; //! Which device this is run
|
||||
}; */
|
||||
struct DispatchPkt
|
||||
{
|
||||
uint64_t correlation_id_in; //! Correlation ID seen by the trap handler
|
||||
device_handle dev; //! Which device this is run
|
||||
uint64_t raw;
|
||||
struct
|
||||
{
|
||||
uint64_t dispatch_index : 25;
|
||||
uint64_t _reserved0 : 7;
|
||||
uint64_t doorbell_id : 10;
|
||||
uint64_t _reserved1 : 22;
|
||||
} wrapped;
|
||||
};
|
||||
|
||||
struct DispatchPkt
|
||||
{
|
||||
trap_correlation_id_t correlation_id_in; //! Correlation ID seen by the trap handler
|
||||
device_handle dev; //! Which device this is run
|
||||
};
|
||||
|
||||
inline bool
|
||||
operator==(const trap_correlation_id_t& a, const trap_correlation_id_t& b)
|
||||
{
|
||||
return a.raw == b.raw;
|
||||
}
|
||||
|
||||
inline bool
|
||||
operator==(const DispatchPkt& a, const DispatchPkt& b)
|
||||
{
|
||||
@@ -68,7 +82,7 @@ struct std::hash<Parser::DispatchPkt>
|
||||
{
|
||||
size_t operator()(const Parser::DispatchPkt& d) const
|
||||
{
|
||||
return (d.correlation_id_in << 8) ^ d.dev.handle;
|
||||
return (d.correlation_id_in.raw << 8) ^ d.dev.handle;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -85,36 +99,39 @@ public:
|
||||
|
||||
/**
|
||||
* Checks wether a dispatch pkt will generate a collision.
|
||||
* Returns true on collision and false when slot is available.
|
||||
* @returns true on collision and false when slot is available.
|
||||
*/
|
||||
bool checkDispatch(const dispatch_pkt_id_t& pkt) const
|
||||
{
|
||||
uint64_t trap = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
auto trap = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
return dispatch_to_correlation.find({trap, pkt.device}) != dispatch_to_correlation.end();
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the mapping of dispatch_id to correlation_id
|
||||
* @brief Updates the mapping of dispatch_id to correlation_id
|
||||
*/
|
||||
void newDispatch(const dispatch_pkt_id_t& pkt)
|
||||
{
|
||||
cache_dev_id = ~0ul;
|
||||
uint64_t trap_id = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
cache_dev_id = ~0ul;
|
||||
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
dispatch_to_correlation[{trap_id, pkt.device}] = pkt.correlation_id;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allows the parser to forget a correlation_id, to save memory.
|
||||
*/
|
||||
void forget(const dispatch_pkt_id_t& pkt)
|
||||
{
|
||||
cache_dev_id = ~0ul;
|
||||
uint64_t trap_id = wrap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
cache_dev_id = ~0ul;
|
||||
auto trap_id = trap_correlation_id(pkt.doorbell_id, pkt.write_index, pkt.queue_size);
|
||||
dispatch_to_correlation.erase({trap_id, pkt.device});
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a device dev, doorbell and and wrapped dispatch_id, returns the
|
||||
* correlation_id set by dispatch_pkt_id_t
|
||||
* Given a device dev, doorbell and and wrapped dispatch_id,
|
||||
* @returns the correlation_id set by dispatch_pkt_id_t
|
||||
*/
|
||||
uint64_t get(device_handle dev, uint64_t correlation_in)
|
||||
uint64_t get(device_handle dev, trap_correlation_id_t correlation_in)
|
||||
{
|
||||
#ifndef _PARSER_CORRELATION_DISABLE_CACHE
|
||||
if(dev.handle == cache_dev_id && correlation_in == cache_correlation_id_in)
|
||||
@@ -126,19 +143,33 @@ public:
|
||||
return cache_correlation_id_out;
|
||||
}
|
||||
|
||||
static uint64_t wrap_correlation_id(uint64_t doorbell, uint64_t write_idx, uint64_t queue_size)
|
||||
/**
|
||||
* Returns the correlation_id as seen by the trap handler, consisting of a
|
||||
* - wrapped dispatch_pkt
|
||||
* - doorbell_id divibed by 8 Bytes
|
||||
* @param[in] doorbell The doorbell handler returned by HSA
|
||||
* @param[in] write_idx The dispatch packet write index, [optional] not wrapped
|
||||
* @param[in] queue_size The queue size. [optional] If write_index is already wrapped,
|
||||
* then this value can just be a large integer > queue_size.
|
||||
* @returns The correlation_id immitating the ones returned by the trap handler.
|
||||
*/
|
||||
static trap_correlation_id_t trap_correlation_id(uint64_t doorbell,
|
||||
uint64_t write_idx,
|
||||
uint64_t queue_size)
|
||||
{
|
||||
static constexpr uint64_t WRITE_WRAP = (1 << 25) - 1;
|
||||
return ((write_idx % queue_size) & WRITE_WRAP) | (uint64_t(doorbell) << 32);
|
||||
trap_correlation_id_t trap{.raw = 0};
|
||||
trap.wrapped.dispatch_index = write_idx % queue_size;
|
||||
trap.wrapped.doorbell_id = doorbell >> 3;
|
||||
return trap;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<DispatchPkt, uint64_t> dispatch_to_correlation{};
|
||||
|
||||
// Making get() const and these cache variables mutable causes performance to be unstable
|
||||
uint64_t cache_correlation_id_in = ~0ul; // Invalid value in cache
|
||||
uint64_t cache_correlation_id_out = ~0ul;
|
||||
uint64_t cache_dev_id = ~0ul; // Invalid device Id in cache
|
||||
trap_correlation_id_t cache_correlation_id_in{.raw = ~0ul}; // Invalid value in cache
|
||||
uint64_t cache_correlation_id_out = ~0ul;
|
||||
uint64_t cache_dev_id = ~0ul; // Invalid device Id in cache
|
||||
};
|
||||
} // namespace Parser
|
||||
|
||||
@@ -157,7 +188,8 @@ add_upcoming_samples(const device_handle device,
|
||||
samples[p] = copySample<bHostTrap, GFXIP>((const void*) (buffer + p));
|
||||
try
|
||||
{
|
||||
samples[p].correlation_id = corr_map->get(device, snap->correlation_id);
|
||||
Parser::trap_correlation_id_t trap{.raw = snap->correlation_id};
|
||||
samples[p].correlation_id = corr_map->get(device, trap);
|
||||
} catch(std::exception& e)
|
||||
{
|
||||
status = PCSAMPLE_STATUS_PARSER_ERROR;
|
||||
|
||||
+4
-4
@@ -127,7 +127,7 @@ public:
|
||||
perf_sample_snapshot_v1 snap;
|
||||
::memset(&snap, 0, sizeof(snap));
|
||||
snap.pc = dispatch->unique_id;
|
||||
snap.correlation_id = dispatch->getMockId();
|
||||
snap.correlation_id = dispatch->getMockId().raw;
|
||||
|
||||
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
|
||||
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
|
||||
@@ -297,7 +297,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
pcsample_v1_t sample;
|
||||
::memset(&sample, 0, sizeof(sample));
|
||||
sample.pc = dispatch->unique_id;
|
||||
sample.correlation_id = dispatch->getMockId();
|
||||
sample.correlation_id = dispatch->getMockId().raw;
|
||||
|
||||
sample.flags.valid = valid && !error;
|
||||
sample.wave_issued = issued;
|
||||
@@ -315,7 +315,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
|
||||
perf_sample_snapshot_v1 pss;
|
||||
pss.perf_snapshot_data = snap.raw;
|
||||
pss.correlation_id = dispatch->getMockId();
|
||||
pss.correlation_id = dispatch->getMockId().raw;
|
||||
dispatch->submit(std::move(pss));
|
||||
};
|
||||
|
||||
@@ -383,7 +383,7 @@ class WaveOtherFieldsTest : public WaveSnapTest
|
||||
snap.workgroup_id_z = blkz;
|
||||
snap.chiplet_and_wave_id = (chip << 8) | (wave & 0x3F);
|
||||
snap.hw_id = hwid;
|
||||
snap.correlation_id = dispatch->getMockId();
|
||||
snap.correlation_id = dispatch->getMockId().raw;
|
||||
|
||||
assert(dispatch.get());
|
||||
dispatch->submit(snap);
|
||||
|
||||
+4
-4
@@ -117,7 +117,7 @@ private:
|
||||
{
|
||||
std::unordered_set<size_t> set;
|
||||
for(size_t i = 0; i < num_unique_bells; i++)
|
||||
set.insert(i);
|
||||
set.insert(i << 3);
|
||||
return set;
|
||||
};
|
||||
static std::unordered_set<size_t> available_ids;
|
||||
@@ -208,9 +208,9 @@ public:
|
||||
}
|
||||
|
||||
//! Returns the "correlation_id" seen by the trap handler.
|
||||
uint64_t getMockId()
|
||||
Parser::trap_correlation_id_t getMockId()
|
||||
{
|
||||
return Parser::CorrelationMap::wrap_correlation_id(doorbell_id, dispatch_id, queue->size);
|
||||
return Parser::CorrelationMap::trap_correlation_id(doorbell_id, dispatch_id, queue->size);
|
||||
};
|
||||
|
||||
//! Submits a packet to the buffer
|
||||
@@ -254,7 +254,7 @@ public:
|
||||
packet_union_t uni;
|
||||
::memset(&uni, 0, sizeof(uni));
|
||||
uni.snap.pc = dispatch->unique_id;
|
||||
uni.snap.correlation_id = dispatch->getMockId();
|
||||
uni.snap.correlation_id = dispatch->getMockId().raw;
|
||||
dispatch->submit(uni);
|
||||
};
|
||||
void print()
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren