* GFX950 Stochastic PC sampling

* Use actual type instead void *

* error reporting if the pcs method is inappropriate

[ROCm/rocprofiler-sdk commit: 2bb64e9c9a]
Этот коммит содержится в:
Indic, Vladimir
2025-05-29 14:29:52 +02:00
коммит произвёл GitHub
родитель e55c31db27
Коммит ec2fe441e6
17 изменённых файлов: 493 добавлений и 170 удалений
+6 -3
Просмотреть файл
@@ -175,9 +175,12 @@ data_ready_callback(void* client_callback_data,
// TODO: how about using std::future
std::condition_variable cv;
auto gfx_major = ((agent_session->agent->gfx_target_version / 10000) % 100);
auto pcs_parser_status = agent_session->parser->parse(
upc, reinterpret_cast<const generic_sample_t*>(buff.get()), gfx_major, cv, false);
auto pcs_parser_status =
agent_session->parser->parse(upc,
reinterpret_cast<const generic_sample_t*>(buff.get()),
agent_session->agent->gfx_target_version,
cv,
false);
if(pcs_parser_status != PCSAMPLE_STATUS_SUCCESS)
{
+1 -1
Просмотреть файл
@@ -1,7 +1,7 @@
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES pc_record_interface.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_HEADERS
correlation.hpp gfx9.hpp gfx11.hpp parser_types.hpp pc_record_interface.hpp rocr.h
translation.hpp)
translation.hpp gfx950.hpp)
target_sources(
rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES}
+18 -5
Просмотреть файл
@@ -240,16 +240,19 @@ add_upcoming_samples(const device_handle device,
const auto* snap = reinterpret_cast<const perf_sample_snapshot_v1*>(buffer + p);
auto& pc_sample = samples[p];
pc_sample = copySample<GFXIP, PcSamplingRecordT>((const void*) (buffer + p));
pc_sample = copySample<GFXIP, PcSamplingRecordT>(static_cast<const void*>(snap));
// skip invalid samples
if(pc_sample.size == 0) continue;
// Correct PC address of the original sample (if needed) prior to decoding it.
auto pc_address = correct_pc_address<GFXIP, PcSamplingRecordT>(snap);
// Convert PC -> (loaded code object id containing PC, offset within code object)
if(!cache_addr_range.inrange(snap->pc))
cache_addr_range = table->find_codeobj_in_range(snap->pc);
if(!cache_addr_range.inrange(pc_address.value))
cache_addr_range = table->find_codeobj_in_range(pc_address.value);
pc_sample.pc.code_object_id = cache_addr_range.id;
pc_sample.pc.code_object_offset = snap->pc - cache_addr_range.addr;
pc_sample.pc.code_object_offset = pc_address.value - cache_addr_range.addr;
try
{
@@ -357,6 +360,7 @@ template <typename PcSamplingRecordT>
pcsample_status_t inline parse_buffer(generic_sample_t* buffer,
uint64_t buffer_size,
int gfxip_major,
int gfxip_minor,
user_callback_t<PcSamplingRecordT> callback,
void* userdata)
{
@@ -364,11 +368,20 @@ pcsample_status_t inline parse_buffer(generic_sample_t* buffer,
auto parseSample_func = _parse_buffer<GFX9, PcSamplingRecordT>;
if(gfxip_major == 9)
parseSample_func = _parse_buffer<GFX9, PcSamplingRecordT>;
{
if(gfxip_minor == 5)
{
parseSample_func = _parse_buffer<GFX950, PcSamplingRecordT>;
}
}
else if(gfxip_major == 11)
{
parseSample_func = _parse_buffer<GFX11, PcSamplingRecordT>;
}
else
{
return PCSAMPLE_STATUS_INVALID_GFXIP;
}
return parseSample_func(buffer, buffer_size, callback, userdata, corr_map.get());
};
+26
Просмотреть файл
@@ -0,0 +1,26 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
class GFX950 : public GFX9
{};
+4
Просмотреть файл
@@ -120,6 +120,10 @@ enum PCSAMPLE_STATUS
* Invalid GFXIP string was passed to the parser.
*/
PCSAMPLE_STATUS_INVALID_GFXIP,
/**
* Invalid PC sampling method was passed to the parser.
*/
PCSAMPLE_STATUS_INVALID_METHOD,
/**
* Last error type
*/
+55 -15
Просмотреть файл
@@ -52,30 +52,70 @@ PCSamplingParserContext::alloc<rocprofiler_pc_sampling_record_stochastic_v0_t>(
return size;
}
/**
* @brief Get the appropriate parse function based on the GFXIP and sampling method.
*
* If the inappropriate sampling method is provided, it returns nullptr.
*/
template <typename GFXIP>
PCSamplingParserContext::parse_funct_ptr_t
PCSamplingParserContext::_get_parse_func_for_method(rocprofiler_pc_sampling_method_t pcs_method)
{
if(pcs_method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
{
return &PCSamplingParserContext::_parse<GFXIP,
rocprofiler_pc_sampling_record_host_trap_v0_t>;
}
else if(pcs_method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
{
return &PCSamplingParserContext::_parse<GFXIP,
rocprofiler_pc_sampling_record_stochastic_v0_t>;
}
else
{
return nullptr;
}
}
pcsample_status_t
PCSamplingParserContext::parse(const upcoming_samples_t& upcoming,
const generic_sample_t* data_,
int gfxip_major,
uint32_t gfx_target_version,
std::condition_variable& midway_signal,
bool bRocrBufferFlip)
{
bool bIsHostTrap = upcoming.which_sample_type == AMD_HOST_TRAP_V1;
auto gfxip_major = (gfx_target_version / 10000) % 100;
auto gfxip_minor = (gfx_target_version / 100) % 100;
auto pcs_method = (upcoming.which_sample_type == AMD_HOST_TRAP_V1)
? ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP
: ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC;
// Template instantiation is faster!
auto parseSample_func =
bIsHostTrap
? &PCSamplingParserContext::_parse<GFX9, rocprofiler_pc_sampling_record_host_trap_v0_t>
: &PCSamplingParserContext::_parse<GFX9,
rocprofiler_pc_sampling_record_stochastic_v0_t>;
if(gfxip_major == 11)
parseSample_func =
bIsHostTrap
? &PCSamplingParserContext::_parse<GFX11,
rocprofiler_pc_sampling_record_host_trap_v0_t>
: &PCSamplingParserContext::_parse<GFX11,
rocprofiler_pc_sampling_record_stochastic_v0_t>;
else if(gfxip_major != 9)
parse_funct_ptr_t parseSample_func = nullptr;
if(gfxip_major == 9)
{
if(gfxip_minor == 5)
{
parseSample_func = _get_parse_func_for_method<GFX950>(pcs_method);
}
else
{
parseSample_func = _get_parse_func_for_method<GFX9>(pcs_method);
}
}
else if(gfxip_major == 11)
{
parseSample_func = _get_parse_func_for_method<GFX11>(pcs_method);
}
else
{
return PCSAMPLE_STATUS_INVALID_GFXIP;
}
if(parseSample_func == nullptr)
{
return PCSAMPLE_STATUS_INVALID_METHOD;
}
auto status = (this->*parseSample_func)(upcoming, data_);
midway_signal.notify_all();
+7 -1
Просмотреть файл
@@ -85,7 +85,7 @@ public:
*/
pcsample_status_t parse(const upcoming_samples_t& upcoming,
const generic_sample_t* data,
int gfxip_major,
uint32_t gfx_target_version,
std::condition_variable& midway_signal,
bool bFlushCorrelationIds);
@@ -199,5 +199,11 @@ protected:
mutable std::shared_mutex mut;
private:
using parse_funct_ptr_t = pcsample_status_t (
PCSamplingParserContext::*)(const upcoming_samples_t&, const generic_sample_t*);
template <typename GFXIP>
parse_funct_ptr_t _get_parse_func_for_method(rocprofiler_pc_sampling_method_t pcs_method);
std::unordered_map<rocprofiler_agent_id_t, rocprofiler_buffer_id_t> _agent_buffers;
};
+14 -5
Просмотреть файл
@@ -5,10 +5,18 @@ include(GoogleTest)
set(PCTEST_INCLUDE_DIR
${PROJECT_SOURCE_DIR}/source/lib/rocprofiler-sdk/pc_sampling/parser/)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_ID_TEST_SOURCES correlation_id_test.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_BENCH_TEST_SOURCES benchmark_test.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_GFX9_TEST_SOURCES gfx9test.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_HEADERS mocks.hpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES mocks.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_ID_TEST_SOURCES
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} correlation_id_test.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_BENCH_TEST_SOURCES
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} benchmark_test.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_GFX9_TEST_SOURCES
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} gfx9test.hpp gfx9test.cpp
gfx950test.cpp)
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} multigpu.cpp)
add_executable(pcs_gfx9_test)
@@ -70,7 +78,8 @@ target_link_libraries(
add_executable(pcs_thread_test)
target_compile_options(pcs_thread_test PRIVATE "-Ofast")
target_sources(pcs_thread_test PRIVATE multigpu.cpp)
target_sources(pcs_thread_test
PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES})
target_include_directories(pcs_thread_test PRIVATE ${PCTEST_INCLUDE_DIR})
target_link_libraries(
@@ -81,7 +90,7 @@ target_link_libraries(
gtest_add_tests(
TARGET pcs_thread_test
SOURCES multigpu.cpp
SOURCES ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES}
TEST_LIST pcs_thread_test_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
@@ -26,6 +26,7 @@
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
#define GFXIP_MAJOR 9
#define GFXIP_MINOR 4
/**
* Benchmarks how fast the parser can process samples on a single threaded case
@@ -75,6 +76,7 @@ Benchmark(bool bWarmup)
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
user_cb,
&userdata));
auto t1 = std::chrono::system_clock::now();
@@ -27,6 +27,7 @@
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
#define GFXIP_MAJOR 9
#define GFXIP_MINOR 4
std::mt19937 rdgen(1);
@@ -77,6 +78,7 @@ pcs_parser_hello_world()
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
@@ -131,6 +133,7 @@ pcs_parser_reverse_wave_order()
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
@@ -171,6 +174,7 @@ pcs_parser_dispatch_wrapping()
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
@@ -226,6 +230,7 @@ pcs_parser_random_samples()
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
@@ -330,6 +335,7 @@ pcs_parser_queue_hammer()
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
buffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
@@ -382,11 +388,13 @@ pcs_parser_multi_buffer()
CHECK_PARSER(parse_buffer((generic_sample_t*) firstBuffer->packets.data(),
firstBuffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
CHECK_PARSER(parse_buffer((generic_sample_t*) secondBuffer->packets.data(),
secondBuffer->packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
alloc_callback<PcSamplingRecordT>,
(void*) &all_allocations));
+72
Просмотреть файл
@@ -0,0 +1,72 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifdef NDEBUG
# undef NDEBUG
#endif
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp"
#include <gtest/gtest.h>
#include <cstddef>
/**
* @brief This test verifies if the PC address is corrected properly on GFX950 when required.
*/
template <typename PcSamplingRecordT>
class MidMacroPCCorrectionGFX950 : public MidMacroPCCorrection<PcSamplingRecordT>
{
public:
void genPCSample(uint64_t pc, bool mid_macro) override
{
// mid_macro exists only for stochastic PC sampling on GFX950
if constexpr(!std::is_same<PcSamplingRecordT,
rocprofiler_pc_sampling_record_stochastic_v0_t>::value)
{
// Invalidate mid_macro
mid_macro = false;
}
// invoking parent class
MidMacroPCCorrection<PcSamplingRecordT>::genPCSample(pc, mid_macro);
};
uint64_t calcaulteExpectedPC(uint64_t pc, bool mid_macro) override
{
// According to the regspec, if mid_macro is true, we need to subtract 2 dwords from the PC
// address.
return mid_macro ? (pc - 2 * sizeof(uint32_t)) : pc;
}
std::vector<std::vector<PcSamplingRecordT>> get_parsed_data() override
{
return this->buffer->get_parsed_buffer(9, 5); // GFX950
}
};
TEST(pcs_parser, gfx950_test)
{
MidMacroPCCorrectionGFX950<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
MidMacroPCCorrectionGFX950<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
std::cout << "GFX950 Test Done." << std::endl;
}
+79 -47
Просмотреть файл
@@ -24,6 +24,7 @@
# undef NDEBUG
#endif
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
@@ -138,53 +139,6 @@
\
NON_GFX9_ARBSTATE_IS_ZERO(x, y)
template <typename PcSamplingRecordT>
class WaveSnapTest
{
public:
WaveSnapTest()
{
buffer = std::make_shared<MockRuntimeBuffer<PcSamplingRecordT>>();
queue = std::make_shared<MockQueue<PcSamplingRecordT>>(16, buffer);
dispatch = std::make_shared<MockDispatch<PcSamplingRecordT>>(queue);
}
void Test()
{
FillBuffers();
CheckBuffers();
}
virtual void FillBuffers() = 0;
virtual void CheckBuffers() = 0;
void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall)
{
wave_cnt &= 0x3F;
inst_type &= 0xF;
reason &= 0x7;
arb_issue &= 0xFF;
arb_stall &= 0xFF;
perf_sample_snapshot_v1 snap;
::memset(&snap, 0, sizeof(snap));
snap.pc = dispatch->unique_id;
snap.correlation_id = dispatch->getMockId().raw;
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
snap.perf_snapshot_data |= 0x1; // sample is valid
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
snap.perf_snapshot_data1 = wave_cnt;
EXPECT_NE(dispatch.get(), nullptr);
dispatch->submit(packet_union_t{.snap = snap});
};
std::shared_ptr<MockRuntimeBuffer<PcSamplingRecordT>> buffer;
std::shared_ptr<MockQueue<PcSamplingRecordT>> queue;
std::shared_ptr<MockDispatch<PcSamplingRecordT>> dispatch;
};
template <typename PcSamplingRecordT>
class WaveCntTest : public WaveSnapTest<PcSamplingRecordT>
{
@@ -611,6 +565,81 @@ class WaveOtherFieldsTest : public WaveSnapTest<PcSamplingRecordT>
std::vector<PcSamplingRecordT> compare;
};
/**
* @brief This test verifies that the PC address remains unchanged for GFX9.
*/
template <typename PcSamplingRecordT>
void
MidMacroPCCorrection<PcSamplingRecordT>::FillBuffers()
{
this->buffer->genUpcomingSamples(3);
// NOTE: mid_macro is relevant only on GFX950
genPCSample(0x800, true);
genPCSample(0x900, false);
genPCSample(0x1000, true);
}
template <typename PcSamplingRecordT>
std::vector<std::vector<PcSamplingRecordT>>
MidMacroPCCorrection<PcSamplingRecordT>::get_parsed_data()
{
return this->buffer->get_parsed_buffer(9); // GFXIP==9
}
template <typename PcSamplingRecordT>
void
MidMacroPCCorrection<PcSamplingRecordT>::CheckBuffers()
{
auto parsed = get_parsed_data();
EXPECT_EQ(parsed.size(), 1);
EXPECT_EQ(parsed[0].size(), 3);
EXPECT_EQ(compare.size(), 3);
for(size_t i = 0; i < 3; i++)
{
// verifying PC address
EXPECT_EQ(parsed[0][i].pc.code_object_offset, compare[i].pc.code_object_offset);
}
}
/**
* @brief By default, PC address remains unchanged.
*/
template <typename PcSamplingRecordT>
uint64_t
MidMacroPCCorrection<PcSamplingRecordT>::calcaulteExpectedPC(uint64_t pc, bool /*mid_macro*/)
{
return pc;
}
template <typename PcSamplingRecordT>
void
MidMacroPCCorrection<PcSamplingRecordT>::genPCSample(uint64_t pc, bool mid_macro)
{
PcSamplingRecordT sample;
::memset(&sample, 0, sizeof(sample));
// Calculate the expected PC address
sample.pc.code_object_offset = calcaulteExpectedPC(pc, mid_macro);
compare.push_back(sample);
// This test considers only PC address.
perf_sample_snapshot_v1 snap;
::memset(&snap, 0, sizeof(snap));
snap.pc = pc;
// Mandatory for correlation mapping. Otherwise, parsing error occurs.
snap.correlation_id = this->dispatch->getMockId().raw;
// to ensure all stochastic samples are generated properly,
// marked them as valid
snap.perf_snapshot_data |= 0x1; // set the bit indicating the sample is valid
// the mid_macro is the bit at the position 31
snap.perf_snapshot_data1 = (mid_macro << 31);
EXPECT_NE(this->dispatch.get(), nullptr);
this->dispatch->submit(snap);
}
TEST(pcs_parser, gfx9_test)
{
// Tests specific to stochastic sampling only
@@ -628,5 +657,8 @@ TEST(pcs_parser, gfx9_test)
WaveOtherFieldsTest<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
WaveOtherFieldsTest<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
MidMacroPCCorrection<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
MidMacroPCCorrection<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
std::cout << "GFX9 Test Done." << std::endl;
}
+105
Просмотреть файл
@@ -0,0 +1,105 @@
// MIT License
//
// Copyright (c) 2025 ROCm Developer Tools
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
#include <gtest/gtest.h>
template <typename PcSamplingRecordT>
class WaveSnapTest
{
public:
WaveSnapTest()
{
buffer = std::make_shared<MockRuntimeBuffer<PcSamplingRecordT>>();
queue = std::make_shared<MockQueue<PcSamplingRecordT>>(16, buffer);
dispatch = std::make_shared<MockDispatch<PcSamplingRecordT>>(queue);
}
void Test()
{
FillBuffers();
CheckBuffers();
}
virtual void FillBuffers() = 0;
virtual void CheckBuffers() = 0;
void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall)
{
wave_cnt &= 0x3F;
inst_type &= 0xF;
reason &= 0x7;
arb_issue &= 0xFF;
arb_stall &= 0xFF;
perf_sample_snapshot_v1 snap;
::memset(&snap, 0, sizeof(snap));
snap.pc = dispatch->unique_id;
snap.correlation_id = dispatch->getMockId().raw;
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
snap.perf_snapshot_data |= 0x1; // sample is valid
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
snap.perf_snapshot_data1 = wave_cnt;
EXPECT_NE(dispatch.get(), nullptr);
dispatch->submit(packet_union_t{.snap = snap});
};
std::shared_ptr<MockRuntimeBuffer<PcSamplingRecordT>> buffer;
std::shared_ptr<MockQueue<PcSamplingRecordT>> queue;
std::shared_ptr<MockDispatch<PcSamplingRecordT>> dispatch;
};
/**
* @brief Testing how mid_macro bit affects the PC address.
*
* On GFX950, this bit triggers correction of the PC address.
* On other GFX9 architectures, the PC address remains unchanged.
*/
template <typename PcSamplingRecordT>
class MidMacroPCCorrection : public WaveSnapTest<PcSamplingRecordT>
{
public:
void FillBuffers() override; // Explicitly mark as override
void CheckBuffers() override; // Explicitly mark as override
/**
* @brief Generate PC sample with mid_macro flag.
* The @p mid_macro is relevant for the GFX950, so it's false by default
*/
virtual void genPCSample(uint64_t pc, bool mid_macro = false);
/**
* @brief Caulcate expected PC address for comparison.
*/
virtual uint64_t calcaulteExpectedPC(uint64_t pc, bool mid_macro = false);
virtual std::vector<std::vector<PcSamplingRecordT>> get_parsed_data();
protected:
///< testing data
std::vector<PcSamplingRecordT> compare;
};
+39
Просмотреть файл
@@ -0,0 +1,39 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
template <>
void
MockRuntimeBuffer<rocprofiler_pc_sampling_record_host_trap_v0_t>::genUpcomingSamples(
int num_samples)
{
genUpcomingSamples(num_samples, AMD_HOST_TRAP_V1);
}
template <>
void
MockRuntimeBuffer<rocprofiler_pc_sampling_record_stochastic_v0_t>::genUpcomingSamples(
int num_samples)
{
this->genUpcomingSamples(num_samples, AMD_SNAPSHOT_V1);
}
+6 -17
Просмотреть файл
@@ -74,13 +74,18 @@ public:
//! Submits a "upcoming_samples_t" packet signaling the next num_samples packets are PC samples
void genUpcomingSamples(int num_samples);
std::vector<std::vector<PcSamplingRecordT>> get_parsed_buffer(int GFXIP_MAJOR)
/**
* @brief By default, we assume the gfx94X.
*/
std::vector<std::vector<PcSamplingRecordT>> get_parsed_buffer(int GFXIP_MAJOR,
int GFXIP_MINOR = 4)
{
parsed_data = {};
CHECK_PARSER(parse_buffer((generic_sample_t*) packets.data(),
packets.size(),
GFXIP_MAJOR,
GFXIP_MINOR,
&alloc_parse_memory,
this));
@@ -103,22 +108,6 @@ public:
const uint32_t device;
};
template <>
void
MockRuntimeBuffer<rocprofiler_pc_sampling_record_host_trap_v0_t>::genUpcomingSamples(
int num_samples)
{
genUpcomingSamples(num_samples, AMD_HOST_TRAP_V1);
}
template <>
void
MockRuntimeBuffer<rocprofiler_pc_sampling_record_stochastic_v0_t>::genUpcomingSamples(
int num_samples)
{
this->genUpcomingSamples(num_samples, AMD_SNAPSHOT_V1);
}
/**
* Mimics a HSA doorbell. Every live instance of this class has an unique ID (handler).
* The handler itself may be not unique considering dead instances.
+3 -3
Просмотреть файл
@@ -20,12 +20,12 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
#include <gtest/gtest.h>
#include <cstddef>
#include <future>
#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp"
#include "mocks.hpp"
#define GFXIP_MAJOR 9
constexpr size_t NUM_THREADS = 8;
+48 -73
Просмотреть файл
@@ -22,86 +22,17 @@
#pragma once
#include <array>
#include <cstdint>
#include <cstring>
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx11.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx9.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h"
#include <rocprofiler-sdk/pc_sampling.h>
// TODO: refactor the commented code for stochastic sampling
// template <typename gfx>
// inline rocprofiler_pc_sampling_record_t
// copyStochasticSample(const perf_sample_snapshot_v1& sample);
// template <>
// inline rocprofiler_pc_sampling_record_t
// copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
// {
// rocprofiler_pc_sampling_record_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
// // Check wave_id matches snapshot_wave_id
// ret.flags.has_wave_cnt = true;
// ret.flags.has_stall_reason = true;
// ret.wave_count = sample.perf_snapshot_data1 & 0x3F;
// ret.snapshot.dual_issue_valu = sample.perf_snapshot_data >> 2;
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 3;
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF;
// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF;
// ret.reserved = 0;
// return ret;
// }
// template <>
// inline rocprofiler_pc_sampling_record_t
// copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
// {
// rocprofiler_pc_sampling_record_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
// // Check wave_id matches snapshot_wave_id
// ret.flags.has_stall_reason = true;
// ret.wave_issued = sample.perf_snapshot_data >> 1;
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 2;
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 6) & 0x7;
// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 9) & 0x7F;
// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 16) & 0x7F;
// ret.snapshot.dual_issue_valu = false;
// ret.reserved = 0;
// return ret;
// }
// #define BITSHIFT(sname) out |= ((in >> GFX::sname) & 1) << PCSAMPLE::sname
// template <typename GFX>
// inline int
// translate_arb(int in)
// {
// size_t out = 0;
// BITSHIFT(ISSUE_VALU);
// BITSHIFT(ISSUE_MATRIX);
// BITSHIFT(ISSUE_LDS);
// BITSHIFT(ISSUE_LDS_DIRECT);
// BITSHIFT(ISSUE_SCALAR);
// BITSHIFT(ISSUE_VMEM_TEX);
// BITSHIFT(ISSUE_FLAT);
// BITSHIFT(ISSUE_EXP);
// BITSHIFT(ISSUE_MISC);
// BITSHIFT(ISSUE_BRMSG);
// return out & 0x3FF;
// }
// #undef BITSHIFT
#include <array>
#include <cstdint>
#include <cstring>
#define LUTOVERLOAD(sname, rocp_prefix) this->operator[](GFX::sname) = rocp_prefix##_##sname
#define LUTOVERLOAD_INST(sname) LUTOVERLOAD(sname, ROCPROFILER_PC_SAMPLING_INSTRUCTION)
@@ -322,6 +253,20 @@ copySample<GFX9, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sam
return ret;
}
template <>
inline rocprofiler_pc_sampling_record_host_trap_v0_t
copySample<GFX950, rocprofiler_pc_sampling_record_host_trap_v0_t>(const void* sample)
{
return copySample<GFX9, rocprofiler_pc_sampling_record_host_trap_v0_t>(sample);
}
template <>
inline rocprofiler_pc_sampling_record_stochastic_v0_t
copySample<GFX950, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sample)
{
return copySample<GFX9, rocprofiler_pc_sampling_record_stochastic_v0_t>(sample);
}
/**
* @brief Host trap V0 sample for GFX11
*/
@@ -348,4 +293,34 @@ copySample<GFX11, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sa
return ret;
}
/**
* @brief The default implementation assumes no correction is needed.
*/
template <typename GFX, typename PcSamplingRecordT>
inline rocprofiler_address_t
correct_pc_address(const perf_sample_snapshot_v1* sample)
{
return rocprofiler_address_t{.value = sample->pc};
}
/**
* @brief GFX950 specific implementation of the PC address correction.
*/
template <>
inline rocprofiler_address_t
correct_pc_address<GFX950, rocprofiler_pc_sampling_record_stochastic_v0_t>(
const perf_sample_snapshot_v1* sample)
{
// If mid_macro bit is 1, then reg spec says we need to subtract 2 dwords from the PC address.
auto mid_macro = static_cast<bool>(EXTRACT_BITS(sample->perf_snapshot_data1, 31, 31));
if(mid_macro)
{
return rocprofiler_address_t{.value = sample->pc - 2 * sizeof(uint32_t)};
}
else
{
return rocprofiler_address_t{.value = sample->pc};
}
}
#undef EXTRACT_BITS