Removed duplicated code (#784)

* Removed duplicated code

* Clang format

* Removing union from pc_sampling API

* Clang format
This commit is contained in:
Giovanni Lenzi Baraldi
2024-04-18 07:42:26 -03:00
committed by GitHub
parent b570ff5273
commit b6c0b50d3e
9 changed files with 101 additions and 146 deletions
@@ -181,11 +181,11 @@ private:
template <bool bHostTrap, typename GFXIP>
inline pcsample_status_t
add_upcoming_samples(const device_handle device,
const generic_sample_t* buffer,
const size_t available_samples,
Parser::CorrelationMap* corr_map,
pcsample_v1_t* samples)
add_upcoming_samples(const device_handle device,
const generic_sample_t* buffer,
const size_t available_samples,
Parser::CorrelationMap* corr_map,
rocprofiler_pc_sampling_record_s* samples)
{
pcsample_status_t status = PCSAMPLE_STATUS_SUCCESS;
for(uint64_t p = 0; p < available_samples; p++)
@@ -241,8 +241,8 @@ _parse_buffer(generic_sample_t* buffer,
while(pkt_counter > 0)
{
pcsample_v1_t* samples = nullptr;
uint64_t available_samples = callback(&samples, pkt_counter, userdata);
rocprofiler_pc_sampling_record_s* samples = nullptr;
uint64_t available_samples = callback(&samples, pkt_counter, userdata);
if(available_samples == 0 || available_samples > pkt_counter)
return PCSAMPLE_STATUS_CALLBACK_ERROR;
@@ -23,6 +23,7 @@
#pragma once
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/pc_sampling.h>
/**
* ######## Parser Definitions ########
@@ -80,68 +81,13 @@ enum pcsample_arb_issue_state
};
}; // namespace PCSAMPLE
typedef union
union pcsample_header_v1_t
{
struct
{
uint8_t valid : 1;
uint8_t type : 4; // 0=reserved, 1=hosttrap, 2=stochastic, 3=perfcounter, >=4 possible v2?
uint8_t has_stall_reason : 1;
uint8_t has_wave_cnt : 1;
uint8_t reserved : 1;
};
uint8_t raw;
} pcsample_header_v1_t;
rocprofiler_pc_sampling_header_v1_t flags;
uint8_t raw;
};
typedef struct
{
uint32_t dual_issue_valu : 1;
uint32_t inst_type : 4;
uint32_t reason_not_issued : 7;
uint32_t arb_state_issue : 10;
uint32_t arb_state_stall : 10;
} pcsample_snapshot_v1_t;
typedef union
{
struct
{
uint32_t load_cnt : 6;
uint32_t store_cnt : 6;
uint32_t bvh_cnt : 3;
uint32_t sample_cnt : 6;
uint32_t ds_cnt : 6;
uint32_t km_cnt : 5;
};
uint32_t raw;
} pcsample_memorycounters_v1_t;
typedef struct
{
pcsample_header_v1_t flags;
uint8_t chiplet;
uint8_t wave_id;
uint8_t wave_issued : 1;
uint8_t reserved : 7;
uint32_t hw_id;
uint64_t pc;
uint64_t exec_mask;
uint32_t workgroup_id_x;
uint32_t workgroup_id_y;
uint32_t workgroup_id_z;
uint32_t wave_count;
uint64_t timestamp;
rocprofiler_correlation_id_t correlation_id;
pcsample_snapshot_v1_t snapshot;
pcsample_memorycounters_v1_t memory_counters;
} pcsample_v1_t;
typedef uint64_t (*user_callback_t)(pcsample_v1_t**, uint64_t, void*);
typedef uint64_t (*user_callback_t)(rocprofiler_pc_sampling_record_s**, uint64_t, void*);
/**
* The types of errors to be returned by parse_buffer.
@@ -23,7 +23,7 @@
#include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp"
uint64_t
PCSamplingParserContext::alloc(pcsample_v1_t** buffer, uint64_t size)
PCSamplingParserContext::alloc(rocprofiler_pc_sampling_record_s** buffer, uint64_t size)
{
std::unique_lock<std::shared_mutex> lock(mut);
assert(buffer != nullptr);
@@ -38,7 +38,7 @@ struct PCSamplingData
: samples(size){};
PCSamplingData& operator=(PCSamplingData&) = delete;
std::vector<pcsample_v1_t> samples;
std::vector<rocprofiler_pc_sampling_record_s> samples;
};
class PCSamplingParserContext
@@ -52,7 +52,7 @@ public:
* @param[in] size Number of samples requested.
* @returns Number of samples actually allocated on *buffer.
*/
uint64_t alloc(pcsample_v1_t** buffer, uint64_t size);
uint64_t alloc(rocprofiler_pc_sampling_record_s** buffer, uint64_t size);
/**
* @brief Parses a chunk of samples.
@@ -112,8 +112,8 @@ protected:
while(pkt_counter > 0)
{
pcsample_v1_t* samples = nullptr;
uint64_t memsize = alloc(&samples, pkt_counter);
rocprofiler_pc_sampling_record_s* samples = nullptr;
uint64_t memsize = alloc(&samples, pkt_counter);
if(memsize == 0 || memsize > pkt_counter) return PCSAMPLE_STATUS_CALLBACK_ERROR;
@@ -137,7 +137,8 @@ protected:
*/
pcsample_status_t flushForgetList();
static void generate_id_completion_record(const dispatch_pkt_id_t& pkt) { (void) pkt; };
static void generate_upcoming_pc_record(const pcsample_v1_t* samples, size_t num_samples)
static void generate_upcoming_pc_record(const rocprofiler_pc_sampling_record_s* samples,
size_t num_samples)
{
(void) samples;
(void) num_samples;
@@ -56,22 +56,23 @@ Benchmark(bool bWarmup)
for(size_t i = 0; i < SAMPLE_PER_DISPATCH; i++)
MockWave(dispatch).genPCSample();
std::pair<pcsample_v1_t*, size_t> userdata;
userdata.first = new pcsample_v1_t[TOTAL_NUM_SAMPLES];
std::pair<rocprofiler_pc_sampling_record_s*, size_t> userdata;
userdata.first = new rocprofiler_pc_sampling_record_s[TOTAL_NUM_SAMPLES];
userdata.second = TOTAL_NUM_SAMPLES;
auto t0 = std::chrono::system_clock::now();
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
[](pcsample_v1_t** sample, uint64_t size, void* userdata_) {
auto* pair = reinterpret_cast<std::pair<pcsample_v1_t*, size_t>*>(
userdata_);
assert(TOTAL_NUM_SAMPLES == pair->second);
*sample = pair->first;
return size;
},
&userdata));
CHECK_PARSER(parse_buffer(
(generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
[](rocprofiler_pc_sampling_record_s** sample, uint64_t size, void* userdata_) {
auto* pair =
reinterpret_cast<std::pair<rocprofiler_pc_sampling_record_s*, size_t>*>(userdata_);
assert(TOTAL_NUM_SAMPLES == pair->second);
*sample = pair->first;
return size;
},
&userdata));
auto t1 = std::chrono::system_clock::now();
float samples_per_us = float(TOTAL_NUM_SAMPLES) / (t1 - t0).count() * 1E3f;
@@ -79,7 +80,8 @@ Benchmark(bool bWarmup)
{
std::cout << "Benchmark: Parsed " << int(samples_per_us * 1E3f + 0.5f) * 1E-3f
<< " Msample/s (";
std::cout << int(sizeof(pcsample_v1_t) * samples_per_us) << " MB/s)" << std::endl;
std::cout << int(sizeof(rocprofiler_pc_sampling_record_s) * samples_per_us) << " MB/s)"
<< std::endl;
}
delete[] userdata.first;
@@ -33,13 +33,15 @@ std::mt19937 rdgen(1);
/**
* Sample user memory allocation callback.
* It expects userdata to be cast-able to a pointer to
* std::vector<std::pair<pcsample_v1_t*, uint64_t>>
* std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>>
*/
static uint64_t
alloc_callback(pcsample_v1_t** buffer, uint64_t size, void* userdata)
alloc_callback(rocprofiler_pc_sampling_record_s** buffer, uint64_t size, void* userdata)
{
*buffer = new pcsample_v1_t[size];
auto& vector = *reinterpret_cast<std::vector<std::pair<pcsample_v1_t*, uint64_t>>*>(userdata);
*buffer = new rocprofiler_pc_sampling_record_s[size];
auto& vector =
*reinterpret_cast<std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>>*>(
userdata);
vector.push_back({*buffer, size});
return size;
}
@@ -49,7 +51,7 @@ alloc_callback(pcsample_v1_t** buffer, uint64_t size, void* userdata)
* the reconstructed correlation_id.
*/
static bool
check_samples(pcsample_v1_t* samples, uint64_t size)
check_samples(rocprofiler_pc_sampling_record_s* samples, uint64_t size)
{
for(size_t i = 0; i < size; i++)
if(samples[i].correlation_id.internal != samples[i].pc) return false;
@@ -69,7 +71,7 @@ TEST(pcs_parser, hello_world)
MockWave(dispatch).genPCSample();
MockWave(dispatch).genPCSample();
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
@@ -112,7 +114,7 @@ TEST(pcs_parser, reverse_wave_order)
for(auto it = dispatches.begin(); it != dispatches.end(); it++)
MockWave(*it).genPCSample();
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
@@ -148,7 +150,7 @@ TEST(pcs_parser, dispatch_wrapping)
MockWave(dispatch).genPCSample();
}
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
@@ -195,7 +197,7 @@ TEST(pcs_parser, random_samples)
for(int i = 0; i < num_samples; i++)
MockWave(dispatches[rdgen() % dispatches.size()]).genPCSample();
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
@@ -288,7 +290,7 @@ TEST(pcs_parser, queue_hammer)
<< std::endl;
std::cout << "Max queue occupancy: " << max_q_occupancy << "\n\n" << std::endl;
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
@@ -300,8 +302,8 @@ TEST(pcs_parser, queue_hammer)
NUM_ACTIONS); // QueueHammer test: Incorrect number of callbacks
for(auto sb = 0ul; sb < all_allocations.size(); sb++)
{
pcsample_v1_t* samples = all_allocations[sb].first;
size_t num_samples = all_allocations[sb].second;
rocprofiler_pc_sampling_record_s* samples = all_allocations[sb].first;
size_t num_samples = all_allocations[sb].second;
EXPECT_EQ(num_samples, NUM_QUEUES); // QueueHammer: Incorrect number of samples
EXPECT_EQ(check_samples(samples, num_samples),
@@ -327,7 +329,7 @@ TEST(pcs_parser, multi_buffer)
const auto& packets = firstBuffer->packets;
secondBuffer->packets = std::vector<packet_union_t>(packets.begin() + 2, packets.end());
std::vector<std::pair<pcsample_v1_t*, uint64_t>> all_allocations;
std::vector<std::pair<rocprofiler_pc_sampling_record_s*, uint64_t>> all_allocations;
CHECK_PARSER(parse_buffer((generic_sample_t*) firstBuffer->packets.data(),
firstBuffer->packets.size(),
@@ -34,11 +34,11 @@
#define GFXIP_MAJOR 9
#define TYPECHECK(x) \
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
.inst_type = ::PCSAMPLE::x, \
.reason_not_issued = 0, \
.arb_state_issue = 0, \
.arb_state_stall = 0});
snapshots.push_back(rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
.inst_type = ::PCSAMPLE::x, \
.reason_not_issued = 0, \
.arb_state_issue = 0, \
.arb_state_stall = 0});
#define UNROLL_TYPECHECK() \
TYPECHECK(TYPE_VALU); \
TYPECHECK(TYPE_MATRIX); \
@@ -56,11 +56,11 @@
TYPECHECK(TYPE_NO_INST);
#define REASONCHECK(x) \
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
.inst_type = 0, \
.reason_not_issued = ::PCSAMPLE::x, \
.arb_state_issue = 0, \
.arb_state_stall = 0});
snapshots.push_back(rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
.inst_type = 0, \
.reason_not_issued = ::PCSAMPLE::x, \
.arb_state_issue = 0, \
.arb_state_stall = 0});
#define UNROLL_REASONCHECK(x) \
REASONCHECK(REASON_NOT_AVAILABLE); \
REASONCHECK(REASON_ALU); \
@@ -72,11 +72,12 @@
REASONCHECK(REASON_OTHER_WAIT);
#define ARBCHECK1(x, y) \
snapshots.push_back(pcsample_snapshot_v1_t{.dual_issue_valu = 0, \
.inst_type = 0, \
.reason_not_issued = 0, \
.arb_state_issue = 1 << ::PCSAMPLE::x, \
.arb_state_stall = 1 << ::PCSAMPLE::y});
snapshots.push_back( \
rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
.inst_type = 0, \
.reason_not_issued = 0, \
.arb_state_issue = 1 << ::PCSAMPLE::x, \
.arb_state_stall = 1 << ::PCSAMPLE::y});
#define ARBCHECK2(x) \
ARBCHECK1(x, ISSUE_VALU); \
ARBCHECK1(x, ISSUE_MATRIX); \
@@ -163,8 +164,8 @@ public:
assert(parsed[0][i].wave_count == i);
}
const size_t max_wave_number = 64;
std::vector<pcsample_snapshot_v1_t> snapshots;
const size_t max_wave_number = 64;
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
};
class InstTypeTest : public WaveSnapTest
@@ -190,7 +191,7 @@ public:
assert(snapshots[i].inst_type == parsed[0][i].snapshot.inst_type);
}
std::vector<pcsample_snapshot_v1_t> snapshots;
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
};
class StallReasonTest : public WaveSnapTest
@@ -216,7 +217,7 @@ public:
assert(snapshots[i].reason_not_issued == parsed[0][i].snapshot.reason_not_issued);
}
std::vector<pcsample_snapshot_v1_t> snapshots;
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
};
class ArbStateTest : public WaveSnapTest
@@ -247,7 +248,7 @@ public:
}
}
std::vector<pcsample_snapshot_v1_t> snapshots;
std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
};
class WaveIssueAndErrorTest : public WaveSnapTest
@@ -294,7 +295,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
void genPCSample(bool valid, bool issued, bool dual, bool error)
{
pcsample_v1_t sample;
rocprofiler_pc_sampling_record_s sample;
::memset(&sample, 0, sizeof(sample));
sample.pc = dispatch->unique_id;
sample.correlation_id.internal = dispatch->getMockId().raw;
@@ -319,7 +320,7 @@ class WaveIssueAndErrorTest : public WaveSnapTest
dispatch->submit(std::move(pss));
};
std::vector<pcsample_v1_t> compare;
std::vector<rocprofiler_pc_sampling_record_s> compare;
};
class WaveOtherFieldsTest : public WaveSnapTest
@@ -359,7 +360,7 @@ class WaveOtherFieldsTest : public WaveSnapTest
void genPCSample(int pc, int exec, int blkx, int blky, int blkz, int chip, int wave, int hwid)
{
pcsample_v1_t sample;
rocprofiler_pc_sampling_record_s sample;
::memset(&sample, 0, sizeof(sample));
sample.exec_mask = exec;
@@ -391,7 +392,7 @@ class WaveOtherFieldsTest : public WaveSnapTest
(void) pc;
};
std::vector<pcsample_v1_t> compare;
std::vector<rocprofiler_pc_sampling_record_s> compare;
};
TEST(pcs_parser, gfx9_test)
@@ -65,7 +65,7 @@ public:
submit(uni);
}
std::vector<std::vector<pcsample_v1_t>> get_parsed_buffer(int GFXIP_MAJOR)
std::vector<std::vector<rocprofiler_pc_sampling_record_s>> get_parsed_buffer(int GFXIP_MAJOR)
{
parsed_data = {};
@@ -78,16 +78,18 @@ public:
return parsed_data;
}
static uint64_t alloc_parse_memory(pcsample_v1_t** sample, uint64_t req_size, void* userdata)
static uint64_t alloc_parse_memory(rocprofiler_pc_sampling_record_s** sample,
uint64_t req_size,
void* userdata)
{
auto* buffer = reinterpret_cast<MockRuntimeBuffer*>(userdata);
buffer->parsed_data.push_back(std::vector<pcsample_v1_t>(req_size));
buffer->parsed_data.push_back(std::vector<rocprofiler_pc_sampling_record_s>(req_size));
*sample = buffer->parsed_data.back().data();
return req_size;
}
std::vector<packet_union_t> packets;
std::vector<std::vector<pcsample_v1_t>> parsed_data;
std::vector<packet_union_t> packets;
std::vector<std::vector<rocprofiler_pc_sampling_record_s>> parsed_data;
};
/**
@@ -32,11 +32,11 @@
#include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h"
template <typename SType>
inline pcsample_v1_t
inline rocprofiler_pc_sampling_record_s
copySampleHeader(const SType& sample)
{
pcsample_v1_t ret;
ret.flags.raw = 0;
rocprofiler_pc_sampling_record_s ret;
ret.flags = pcsample_header_v1_t{.raw = 0}.flags;
ret.flags.type = AMD_SNAPSHOT_V1;
ret.pc = sample.pc;
@@ -52,24 +52,24 @@ copySampleHeader(const SType& sample)
return ret;
}
inline pcsample_v1_t
inline rocprofiler_pc_sampling_record_s
copyHostTrapSample(const perf_sample_host_trap_v1& sample)
{
pcsample_v1_t ret = copySampleHeader<perf_sample_host_trap_v1>(sample);
ret.flags.type = AMD_HOST_TRAP_V1;
rocprofiler_pc_sampling_record_s ret = copySampleHeader<perf_sample_host_trap_v1>(sample);
ret.flags.type = AMD_HOST_TRAP_V1;
return ret;
}
template <typename gfx>
inline pcsample_v1_t
inline rocprofiler_pc_sampling_record_s
copyStochasticSample(const perf_sample_snapshot_v1& sample);
template <>
inline pcsample_v1_t
inline rocprofiler_pc_sampling_record_s
copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
{
pcsample_v1_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
rocprofiler_pc_sampling_record_s ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
// Check wave_id matches snapshot_wave_id
ret.flags.has_wave_cnt = true;
@@ -83,16 +83,16 @@ copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF;
ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF;
ret.memory_counters.raw = 0;
ret.reserved = 0;
return ret;
}
template <>
inline pcsample_v1_t
inline rocprofiler_pc_sampling_record_s
copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
{
pcsample_v1_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
rocprofiler_pc_sampling_record_s ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
// Check wave_id matches snapshot_wave_id
ret.flags.has_stall_reason = true;
@@ -103,7 +103,7 @@ copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 9) & 0x7F;
ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 16) & 0x7F;
ret.snapshot.dual_issue_valu = false;
ret.memory_counters.raw = 0;
ret.reserved = 0;
return ret;
}
@@ -195,12 +195,13 @@ translate_inst(int in)
#undef LUTOVERLOAD
template <bool HostTrap, typename GFX>
inline pcsample_v1_t
inline rocprofiler_pc_sampling_record_s
copySample(const void* sample)
{
if(HostTrap) return copyHostTrapSample(*(const perf_sample_host_trap_v1*) sample);
pcsample_v1_t ret = copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
rocprofiler_pc_sampling_record_s ret =
copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
ret.snapshot.inst_type = translate_inst<GFX>(ret.snapshot.inst_type);
ret.snapshot.arb_state_issue = translate_arb<GFX>(ret.snapshot.arb_state_issue);