Removed duplicated code (#784)
* Removed duplicated code * Clang format * Removing union from pc_sampling API * Clang format
This commit is contained in:
committed by
GitHub
parent
b570ff5273
commit
b6c0b50d3e
@@ -181,11 +181,11 @@ private:
|
||||
|
||||
template <bool bHostTrap, typename GFXIP>
|
||||
inline pcsample_status_t
|
||||
add_upcoming_samples(const device_handle device,
|
||||
const generic_sample_t* buffer,
|
||||
const size_t available_samples,
|
||||
Parser::CorrelationMap* corr_map,
|
||||
pcsample_v1_t* samples)
|
||||
add_upcoming_samples(const device_handle device,
|
||||
const generic_sample_t* buffer,
|
||||
const size_t available_samples,
|
||||
Parser::CorrelationMap* corr_map,
|
||||
rocprofiler_pc_sampling_record_s* samples)
|
||||
{
|
||||
pcsample_status_t status = PCSAMPLE_STATUS_SUCCESS;
|
||||
for(uint64_t p = 0; p < available_samples; p++)
|
||||
@@ -241,8 +241,8 @@ _parse_buffer(generic_sample_t* buffer,
|
||||
|
||||
while(pkt_counter > 0)
|
||||
{
|
||||
pcsample_v1_t* samples = nullptr;
|
||||
uint64_t available_samples = callback(&samples, pkt_counter, userdata);
|
||||
rocprofiler_pc_sampling_record_s* samples = nullptr;
|
||||
uint64_t available_samples = callback(&samples, pkt_counter, userdata);
|
||||
|
||||
if(available_samples == 0 || available_samples > pkt_counter)
|
||||
return PCSAMPLE_STATUS_CALLBACK_ERROR;
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/pc_sampling.h>
|
||||
|
||||
/**
|
||||
* ######## Parser Definitions ########
|
||||
@@ -80,68 +81,13 @@ enum pcsample_arb_issue_state
|
||||
};
|
||||
}; // namespace PCSAMPLE
|
||||
|
||||
typedef union
|
||||
union pcsample_header_v1_t
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint8_t valid : 1;
|
||||
uint8_t type : 4; // 0=reserved, 1=hosttrap, 2=stochastic, 3=perfcounter, >=4 possible v2?
|
||||
uint8_t has_stall_reason : 1;
|
||||
uint8_t has_wave_cnt : 1;
|
||||
uint8_t reserved : 1;
|
||||
};
|
||||
uint8_t raw;
|
||||
} pcsample_header_v1_t;
|
||||
rocprofiler_pc_sampling_header_v1_t flags;
|
||||
uint8_t raw;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t dual_issue_valu : 1;
|
||||
uint32_t inst_type : 4;
|
||||
|
||||
uint32_t reason_not_issued : 7;
|
||||
uint32_t arb_state_issue : 10;
|
||||
uint32_t arb_state_stall : 10;
|
||||
} pcsample_snapshot_v1_t;
|
||||
|
||||
typedef union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t load_cnt : 6;
|
||||
uint32_t store_cnt : 6;
|
||||
uint32_t bvh_cnt : 3;
|
||||
uint32_t sample_cnt : 6;
|
||||
uint32_t ds_cnt : 6;
|
||||
uint32_t km_cnt : 5;
|
||||
};
|
||||
uint32_t raw;
|
||||
} pcsample_memorycounters_v1_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
pcsample_header_v1_t flags;
|
||||
uint8_t chiplet;
|
||||
uint8_t wave_id;
|
||||
uint8_t wave_issued : 1;
|
||||
uint8_t reserved : 7;
|
||||
uint32_t hw_id;
|
||||
|
||||
uint64_t pc;
|
||||
uint64_t exec_mask;
|
||||
uint32_t workgroup_id_x;
|
||||
uint32_t workgroup_id_y;
|
||||
uint32_t workgroup_id_z;
|
||||
|
||||
uint32_t wave_count;
|
||||
uint64_t timestamp;
|
||||
rocprofiler_correlation_id_t correlation_id;
|
||||
|
||||
pcsample_snapshot_v1_t snapshot;
|
||||
|
||||
pcsample_memorycounters_v1_t memory_counters;
|
||||
} pcsample_v1_t;
|
||||
|
||||
typedef uint64_t (*user_callback_t)(pcsample_v1_t**, uint64_t, void*);
|
||||
typedef uint64_t (*user_callback_t)(rocprofiler_pc_sampling_record_s**, uint64_t, void*);
|
||||
|
||||
/**
|
||||
* The types of errors to be returned by parse_buffer.
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp"
|
||||
|
||||
uint64_t
|
||||
PCSamplingParserContext::alloc(pcsample_v1_t** buffer, uint64_t size)
|
||||
PCSamplingParserContext::alloc(rocprofiler_pc_sampling_record_s** buffer, uint64_t size)
|
||||
{
|
||||
std::unique_lock<std::shared_mutex> lock(mut);
|
||||
assert(buffer != nullptr);
|
||||
|
||||
@@ -38,7 +38,7 @@ struct PCSamplingData
|
||||
: samples(size){};
|
||||
PCSamplingData& operator=(PCSamplingData&) = delete;
|
||||
|
||||
std::vector<pcsample_v1_t> samples;
|
||||
std::vector<rocprofiler_pc_sampling_record_s> samples;
|
||||
};
|
||||
|
||||
class PCSamplingParserContext
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
* @param[in] size Number of samples requested.
|
||||
* @returns Number of samples actually allocated on *buffer.
|
||||
*/
|
||||
uint64_t alloc(pcsample_v1_t** buffer, uint64_t size);
|
||||
uint64_t alloc(rocprofiler_pc_sampling_record_s** buffer, uint64_t size);
|
||||
|
||||
/**
|
||||
* @brief Parses a chunk of samples.
|
||||
@@ -112,8 +112,8 @@ protected:
|
||||
|
||||
while(pkt_counter > 0)
|
||||
{
|
||||
pcsample_v1_t* samples = nullptr;
|
||||
uint64_t memsize = alloc(&samples, pkt_counter);
|
||||
rocprofiler_pc_sampling_record_s* samples = nullptr;
|
||||
uint64_t memsize = alloc(&samples, pkt_counter);
|
||||
|
||||
if(memsize == 0 || memsize > pkt_counter) return PCSAMPLE_STATUS_CALLBACK_ERROR;
|
||||
|
||||
@@ -137,7 +137,8 @@ protected:
|
||||
*/
|
||||
pcsample_status_t flushForgetList();
|
||||
static void generate_id_completion_record(const dispatch_pkt_id_t& pkt) { (void) pkt; };
|
||||
static void generate_upcoming_pc_record(const pcsample_v1_t* samples, size_t num_samples)
|
||||
static void generate_upcoming_pc_record(const rocprofiler_pc_sampling_record_s* samples,
|
||||
size_t num_samples)
|
||||
{
|
||||
(void) samples;
|
||||
(void) num_samples;
|
||||
|
||||
@@ -56,22 +56,23 @@ Benchmark(bool bWarmup)
|
||||
for(size_t i = 0; i < SAMPLE_PER_DISPATCH; i++)
|
||||
MockWave(dispatch).genPCSample();
|
||||
|
||||
std::pair<pcsample_v1_t*, size_t> userdata;
|
||||
userdata.first = new pcsample_v1_t[TOTAL_NUM_SAMPLES];
|
||||
std::pair<rocprofiler_pc_sampling_record_s*, size_t> userdata;
|
||||
userdata.first = new rocprofiler_pc_sampling_record_s[TOTAL_NUM_SAMPLES];
|
||||
userdata.second = TOTAL_NUM_SAMPLES;
|
||||
|
||||
auto t0 = std::chrono::system_clock::now();
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
[](pcsample_v1_t** sample, uint64_t size, void* userdata_) {
|
||||
auto* pair = reinterpret_cast<std::pair<pcsample_v1_t*, size_t>*>(
|
||||
userdata_);
|
||||
assert(TOTAL_NUM_SAMPLES == pair->second);
|
||||
*sample = pair->first;
|
||||
return size;
|
||||
},
|
||||
&userdata));
|
||||
CHECK_PARSER(parse_buffer(
|
||||
(generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
[](rocprofiler_pc_sampling_record_s** sample, uint64_t size, void* userdata_) {
|
||||
auto* pair =
|
||||
reinterpret_cast<std::pair<rocprofiler_pc_sampling_record_s*, size_t>*>(userdata_);
|
||||
assert(TOTAL_NUM_SAMPLES == pair->second);
|
||||
*sample = pair->first;
|
||||
return size;
|
||||
},
|
||||
&userdata));
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
float samples_per_us = float(TOTAL_NUM_SAMPLES) / (t1 - t0).count() * 1E3f;
|
||||
|
||||
@@ -79,7 +80,8 @@ Benchmark(bool bWarmup)
|
||||
{
|
||||
std::cout << "Benchmark: Parsed " << int(samples_per_us * 1E3f + 0.5f) * 1E-3f
|
||||
<< " Msample/s (";
|
||||
std::cout << int(sizeof(pcsample_v1_t) * samples_per_us) << " MB/s)" << std::endl;
|
||||
std::cout << int(sizeof(rocprofiler_pc_sampling_record_s) * samples_per_us) << " MB/s)"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
delete[] userdata.first;
|
||||
|
||||
@@ -33,13 +33,15 @@ std::mt19937 rdgen(1);
|
||||
/**
|
||||
* Sample user memory allocation callback.
|
||||
* It expects userdata to be cast-able to a pointer to
|
||||
* std::vector<std::pair<pcsample_v1_t*, uint64_t>>
|
||||
* std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>>
|
||||
*/
|
||||
static uint64_t
|
||||
alloc_callback(pcsample_v1_t** buffer, uint64_t size, void* userdata)
|
||||
alloc_callback(rocprofiler_pc_sampling_record_s** buffer, uint64_t size, void* userdata)
|
||||
{
|
||||
*buffer = new pcsample_v1_t[size];
|
||||
auto& vector = *reinterpret_cast<std::vector<std::pair<pcsample_v1_t*, uint64_t>>*>(userdata);
|
||||
*buffer = new rocprofiler_pc_sampling_record_s[size];
|
||||
auto& vector =
|
||||
*reinterpret_cast<std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>>*>(
|
||||
userdata);
|
||||
vector.push_back({*buffer, size});
|
||||
return size;
|
||||
}
|
||||
@@ -49,7 +51,7 @@ alloc_callback(pcsample_v1_t** buffer, uint64_t size, void* userdata)
|
||||
* the reconstructed correlation_id.
|
||||
*/
|
||||
static bool
|
||||
check_samples(pcsample_v1_t* samples, uint64_t size)
|
||||
check_samples(rocprofiler_pc_sampling_record_s* samples, uint64_t size)
|
||||
{
|
||||
for(size_t i = 0; i < size; i++)
|
||||
if(samples[i].correlation_id.internal != samples[i].pc) return false;
|
||||
@@ -69,7 +71,7 @@ TEST(pcs_parser, hello_world)
|
||||
MockWave(dispatch).genPCSample();
|
||||
MockWave(dispatch).genPCSample();
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
@@ -112,7 +114,7 @@ TEST(pcs_parser, reverse_wave_order)
|
||||
for(auto it = dispatches.begin(); it != dispatches.end(); it++)
|
||||
MockWave(*it).genPCSample();
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
@@ -148,7 +150,7 @@ TEST(pcs_parser, dispatch_wrapping)
|
||||
MockWave(dispatch).genPCSample();
|
||||
}
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
@@ -195,7 +197,7 @@ TEST(pcs_parser, random_samples)
|
||||
for(int i = 0; i < num_samples; i++)
|
||||
MockWave(dispatches[rdgen() % dispatches.size()]).genPCSample();
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
@@ -288,7 +290,7 @@ TEST(pcs_parser, queue_hammer)
|
||||
<< std::endl;
|
||||
std::cout << "Max queue occupancy: " << max_q_occupancy << "\n\n" << std::endl;
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
@@ -300,8 +302,8 @@ TEST(pcs_parser, queue_hammer)
|
||||
NUM_ACTIONS); // QueueHammer test: Incorrect number of callbacks
|
||||
for(auto sb = 0ul; sb < all_allocations.size(); sb++)
|
||||
{
|
||||
pcsample_v1_t* samples = all_allocations[sb].first;
|
||||
size_t num_samples = all_allocations[sb].second;
|
||||
rocprofiler_pc_sampling_record_s* samples = all_allocations[sb].first;
|
||||
size_t num_samples = all_allocations[sb].second;
|
||||
|
||||
EXPECT_EQ(num_samples, NUM_QUEUES); // QueueHammer: Incorrect number of samples
|
||||
EXPECT_EQ(check_samples(samples, num_samples),
|
||||
@@ -327,7 +329,7 @@ TEST(pcs_parser, multi_buffer)
|
||||
const auto& packets = firstBuffer->packets;
|
||||
secondBuffer->packets = std::vector<packet_union_t>(packets.begin() + 2, packets.end());
|
||||
|
||||
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
|
||||
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) firstBuffer->packets.data(),
|
||||
firstBuffer->packets.size(),
|
||||
|
||||
@@ -34,11 +34,11 @@
|
||||
#define GFXIP_MAJOR 9
|
||||
|
||||
#define TYPECHECK(x) \
|
||||
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = ::PCSAMPLE::x, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
snapshots.push_back(rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = ::PCSAMPLE::x, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
#define UNROLL_TYPECHECK() \
|
||||
TYPECHECK(TYPE_VALU); \
|
||||
TYPECHECK(TYPE_MATRIX); \
|
||||
@@ -56,11 +56,11 @@
|
||||
TYPECHECK(TYPE_NO_INST);
|
||||
|
||||
#define REASONCHECK(x) \
|
||||
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = ::PCSAMPLE::x, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
snapshots.push_back(rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = ::PCSAMPLE::x, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
#define UNROLL_REASONCHECK(x) \
|
||||
REASONCHECK(REASON_NOT_AVAILABLE); \
|
||||
REASONCHECK(REASON_ALU); \
|
||||
@@ -72,11 +72,12 @@
|
||||
REASONCHECK(REASON_OTHER_WAIT);
|
||||
|
||||
#define ARBCHECK1(x, y) \
|
||||
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 1 << ::PCSAMPLE::x, \
|
||||
.arb_state_stall = 1 << ::PCSAMPLE::y});
|
||||
snapshots.push_back( \
|
||||
rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 1 << ::PCSAMPLE::x, \
|
||||
.arb_state_stall = 1 << ::PCSAMPLE::y});
|
||||
#define ARBCHECK2(x) \
|
||||
ARBCHECK1(x, ISSUE_VALU); \
|
||||
ARBCHECK1(x, ISSUE_MATRIX); \
|
||||
@@ -163,8 +164,8 @@ public:
|
||||
assert(parsed[0][i].wave_count == i);
|
||||
}
|
||||
|
||||
const size_t max_wave_number = 64;
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
const size_t max_wave_number = 64;
|
||||
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class InstTypeTest : public WaveSnapTest
|
||||
@@ -190,7 +191,7 @@ public:
|
||||
assert(snapshots[i].inst_type == parsed[0][i].snapshot.inst_type);
|
||||
}
|
||||
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class StallReasonTest : public WaveSnapTest
|
||||
@@ -216,7 +217,7 @@ public:
|
||||
assert(snapshots[i].reason_not_issued == parsed[0][i].snapshot.reason_not_issued);
|
||||
}
|
||||
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class ArbStateTest : public WaveSnapTest
|
||||
@@ -247,7 +248,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<pcsample_snapshot_v1_t> snapshots;
|
||||
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
};
|
||||
|
||||
class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
@@ -294,7 +295,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
|
||||
void genPCSample(bool valid, bool issued, bool dual, bool error)
|
||||
{
|
||||
pcsample_v1_t sample;
|
||||
rocprofiler_pc_sampling_record_s sample;
|
||||
::memset(&sample, 0, sizeof(sample));
|
||||
sample.pc = dispatch->unique_id;
|
||||
sample.correlation_id.internal = dispatch->getMockId().raw;
|
||||
@@ -319,7 +320,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
dispatch->submit(std::move(pss));
|
||||
};
|
||||
|
||||
std::vector<pcsample_v1_t> compare;
|
||||
std::vector<rocprofiler_pc_sampling_record_s> compare;
|
||||
};
|
||||
|
||||
class WaveOtherFieldsTest : public WaveSnapTest
|
||||
@@ -359,7 +360,7 @@ class WaveOtherFieldsTest : public WaveSnapTest
|
||||
|
||||
void genPCSample(int pc, int exec, int blkx, int blky, int blkz, int chip, int wave, int hwid)
|
||||
{
|
||||
pcsample_v1_t sample;
|
||||
rocprofiler_pc_sampling_record_s sample;
|
||||
::memset(&sample, 0, sizeof(sample));
|
||||
|
||||
sample.exec_mask = exec;
|
||||
@@ -391,7 +392,7 @@ class WaveOtherFieldsTest : public WaveSnapTest
|
||||
(void) pc;
|
||||
};
|
||||
|
||||
std::vector<pcsample_v1_t> compare;
|
||||
std::vector<rocprofiler_pc_sampling_record_s> compare;
|
||||
};
|
||||
|
||||
TEST(pcs_parser, gfx9_test)
|
||||
|
||||
@@ -65,7 +65,7 @@ public:
|
||||
submit(uni);
|
||||
}
|
||||
|
||||
std::vector<std::vector<pcsample_v1_t>> get_parsed_buffer(int GFXIP_MAJOR)
|
||||
std::vector<std::vector<rocprofiler_pc_sampling_record_s>> get_parsed_buffer(int GFXIP_MAJOR)
|
||||
{
|
||||
parsed_data = {};
|
||||
|
||||
@@ -78,16 +78,18 @@ public:
|
||||
return parsed_data;
|
||||
}
|
||||
|
||||
static uint64_t alloc_parse_memory(pcsample_v1_t** sample, uint64_t req_size, void* userdata)
|
||||
static uint64_t alloc_parse_memory(rocprofiler_pc_sampling_record_s** sample,
|
||||
uint64_t req_size,
|
||||
void* userdata)
|
||||
{
|
||||
auto* buffer = reinterpret_cast<MockRuntimeBuffer*>(userdata);
|
||||
buffer->parsed_data.push_back(std::vector<pcsample_v1_t>(req_size));
|
||||
buffer->parsed_data.push_back(std::vector<rocprofiler_pc_sampling_record_s>(req_size));
|
||||
*sample = buffer->parsed_data.back().data();
|
||||
return req_size;
|
||||
}
|
||||
|
||||
std::vector<packet_union_t> packets;
|
||||
std::vector<std::vector<pcsample_v1_t>> parsed_data;
|
||||
std::vector<packet_union_t> packets;
|
||||
std::vector<std::vector<rocprofiler_pc_sampling_record_s>> parsed_data;
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -32,11 +32,11 @@
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h"
|
||||
|
||||
template <typename SType>
|
||||
inline pcsample_v1_t
|
||||
inline rocprofiler_pc_sampling_record_s
|
||||
copySampleHeader(const SType& sample)
|
||||
{
|
||||
pcsample_v1_t ret;
|
||||
ret.flags.raw = 0;
|
||||
rocprofiler_pc_sampling_record_s ret;
|
||||
ret.flags = pcsample_header_v1_t{.raw = 0}.flags;
|
||||
ret.flags.type = AMD_SNAPSHOT_V1;
|
||||
|
||||
ret.pc = sample.pc;
|
||||
@@ -52,24 +52,24 @@ copySampleHeader(const SType& sample)
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline pcsample_v1_t
|
||||
inline rocprofiler_pc_sampling_record_s
|
||||
copyHostTrapSample(const perf_sample_host_trap_v1& sample)
|
||||
{
|
||||
pcsample_v1_t ret = copySampleHeader<perf_sample_host_trap_v1>(sample);
|
||||
ret.flags.type = AMD_HOST_TRAP_V1;
|
||||
rocprofiler_pc_sampling_record_s ret = copySampleHeader<perf_sample_host_trap_v1>(sample);
|
||||
ret.flags.type = AMD_HOST_TRAP_V1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename gfx>
|
||||
inline pcsample_v1_t
|
||||
inline rocprofiler_pc_sampling_record_s
|
||||
copyStochasticSample(const perf_sample_snapshot_v1& sample);
|
||||
|
||||
template <>
|
||||
inline pcsample_v1_t
|
||||
inline rocprofiler_pc_sampling_record_s
|
||||
copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
|
||||
{
|
||||
pcsample_v1_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
|
||||
rocprofiler_pc_sampling_record_s ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
|
||||
// Check wave_id matches snapshot_wave_id
|
||||
|
||||
ret.flags.has_wave_cnt = true;
|
||||
@@ -83,16 +83,16 @@ copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
|
||||
ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
|
||||
ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF;
|
||||
ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF;
|
||||
ret.memory_counters.raw = 0;
|
||||
ret.reserved = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline pcsample_v1_t
|
||||
inline rocprofiler_pc_sampling_record_s
|
||||
copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
|
||||
{
|
||||
pcsample_v1_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
|
||||
rocprofiler_pc_sampling_record_s ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
|
||||
// Check wave_id matches snapshot_wave_id
|
||||
|
||||
ret.flags.has_stall_reason = true;
|
||||
@@ -103,7 +103,7 @@ copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
|
||||
ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 9) & 0x7F;
|
||||
ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 16) & 0x7F;
|
||||
ret.snapshot.dual_issue_valu = false;
|
||||
ret.memory_counters.raw = 0;
|
||||
ret.reserved = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -195,12 +195,13 @@ translate_inst(int in)
|
||||
#undef LUTOVERLOAD
|
||||
|
||||
template <bool HostTrap, typename GFX>
|
||||
inline pcsample_v1_t
|
||||
inline rocprofiler_pc_sampling_record_s
|
||||
copySample(const void* sample)
|
||||
{
|
||||
if(HostTrap) return copyHostTrapSample(*(const perf_sample_host_trap_v1*) sample);
|
||||
|
||||
pcsample_v1_t ret = copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
|
||||
rocprofiler_pc_sampling_record_s ret =
|
||||
copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
|
||||
|
||||
ret.snapshot.inst_type = translate_inst<GFX>(ret.snapshot.inst_type);
|
||||
ret.snapshot.arb_state_issue = translate_arb<GFX>(ret.snapshot.arb_state_issue);
|
||||
|
||||
Reference in New Issue
Block a user