From 2bb64e9c9a56b7ce614b80eebfb79967b87bb66f Mon Sep 17 00:00:00 2001 From: "Indic, Vladimir" Date: Thu, 29 May 2025 14:29:52 +0200 Subject: [PATCH] GFX950 Stochastic PC sampling (#344) * GFX950 Stochastic PC sampling * Use actual type instead void * * error reporting if the pcs method is inappropriate --- .../pc_sampling/hsa_adapter.cpp | 9 +- .../pc_sampling/parser/CMakeLists.txt | 2 +- .../pc_sampling/parser/correlation.hpp | 23 +++- .../pc_sampling/parser/gfx950.hpp | 26 ++++ .../pc_sampling/parser/parser_types.hpp | 4 + .../parser/pc_record_interface.cpp | 70 +++++++--- .../parser/pc_record_interface.hpp | 8 +- .../pc_sampling/parser/tests/CMakeLists.txt | 19 ++- .../parser/tests/benchmark_test.cpp | 2 + .../parser/tests/correlation_id_test.cpp | 8 ++ .../pc_sampling/parser/tests/gfx950test.cpp | 72 ++++++++++ .../pc_sampling/parser/tests/gfx9test.cpp | 126 +++++++++++------- .../pc_sampling/parser/tests/gfx9test.hpp | 105 +++++++++++++++ .../pc_sampling/parser/tests/mocks.cpp | 39 ++++++ .../pc_sampling/parser/tests/mocks.hpp | 23 +--- .../pc_sampling/parser/tests/multigpu.cpp | 6 +- .../pc_sampling/parser/translation.hpp | 121 +++++++---------- 17 files changed, 493 insertions(+), 170 deletions(-) create mode 100644 source/lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp create mode 100644 source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx950test.cpp create mode 100644 source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp create mode 100644 source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.cpp diff --git a/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp b/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp index ab943b4347..f9da46d0cb 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/hsa_adapter.cpp @@ -175,9 +175,12 @@ data_ready_callback(void* client_callback_data, // TODO: how about using std::future std::condition_variable cv; - auto gfx_major = ((agent_session->agent->gfx_target_version / 10000) % 100); - auto pcs_parser_status = agent_session->parser->parse( - upc, reinterpret_cast(buff.get()), gfx_major, cv, false); + auto pcs_parser_status = + agent_session->parser->parse(upc, + reinterpret_cast(buff.get()), + agent_session->agent->gfx_target_version, + cv, + false); if(pcs_parser_status != PCSAMPLE_STATUS_SUCCESS) { diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/CMakeLists.txt b/source/lib/rocprofiler-sdk/pc_sampling/parser/CMakeLists.txt index 6976064bb7..5e0ea79f79 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/CMakeLists.txt @@ -1,7 +1,7 @@ set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES pc_record_interface.cpp) set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_HEADERS correlation.hpp gfx9.hpp gfx11.hpp parser_types.hpp pc_record_interface.hpp rocr.h - translation.hpp) + translation.hpp gfx950.hpp) target_sources( rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES} diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp index 3d23d0d10b..d62aeeae49 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp @@ -240,16 +240,19 @@ add_upcoming_samples(const device_handle device, const auto* snap = reinterpret_cast(buffer + p); auto& pc_sample = samples[p]; - pc_sample = copySample((const void*) (buffer + p)); + pc_sample = copySample(static_cast(snap)); // skip invalid samples if(pc_sample.size == 0) continue; + // Correct PC address of the original sample (if needed) prior to decoding it. + auto pc_address = correct_pc_address(snap); + // Convert PC -> (loaded code object id containing PC, offset within code object) - if(!cache_addr_range.inrange(snap->pc)) - cache_addr_range = table->find_codeobj_in_range(snap->pc); + if(!cache_addr_range.inrange(pc_address.value)) + cache_addr_range = table->find_codeobj_in_range(pc_address.value); pc_sample.pc.code_object_id = cache_addr_range.id; - pc_sample.pc.code_object_offset = snap->pc - cache_addr_range.addr; + pc_sample.pc.code_object_offset = pc_address.value - cache_addr_range.addr; try { @@ -357,6 +360,7 @@ template pcsample_status_t inline parse_buffer(generic_sample_t* buffer, uint64_t buffer_size, int gfxip_major, + int gfxip_minor, user_callback_t callback, void* userdata) { @@ -364,11 +368,20 @@ pcsample_status_t inline parse_buffer(generic_sample_t* buffer, auto parseSample_func = _parse_buffer; if(gfxip_major == 9) - parseSample_func = _parse_buffer; + { + if(gfxip_minor == 5) + { + parseSample_func = _parse_buffer; + } + } else if(gfxip_major == 11) + { parseSample_func = _parse_buffer; + } else + { return PCSAMPLE_STATUS_INVALID_GFXIP; + } return parseSample_func(buffer, buffer_size, callback, userdata, corr_map.get()); }; diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp new file mode 100644 index 0000000000..586c8bdaf4 --- /dev/null +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp @@ -0,0 +1,26 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +class GFX950 : public GFX9 +{}; diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp index 17f775a64d..bd786e75f8 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp @@ -120,6 +120,10 @@ enum PCSAMPLE_STATUS * Invalid GFXIP string was passed to the parser. */ PCSAMPLE_STATUS_INVALID_GFXIP, + /** + * Invalid PC sampling method was passed to the parser. + */ + PCSAMPLE_STATUS_INVALID_METHOD, /** * Last error type */ diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.cpp index 28efc28189..2acd41a068 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.cpp @@ -52,30 +52,70 @@ PCSamplingParserContext::alloc( return size; } +/** + * @brief Get the appropriate parse function based on the GFXIP and sampling method. + * + * If the inappropriate sampling method is provided, it returns nullptr. + */ +template +PCSamplingParserContext::parse_funct_ptr_t +PCSamplingParserContext::_get_parse_func_for_method(rocprofiler_pc_sampling_method_t pcs_method) +{ + if(pcs_method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP) + { + return &PCSamplingParserContext::_parse; + } + else if(pcs_method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC) + { + return &PCSamplingParserContext::_parse; + } + else + { + return nullptr; + } +} + pcsample_status_t PCSamplingParserContext::parse(const upcoming_samples_t& upcoming, const generic_sample_t* data_, - int gfxip_major, + uint32_t gfx_target_version, std::condition_variable& midway_signal, bool bRocrBufferFlip) { - bool bIsHostTrap = upcoming.which_sample_type == AMD_HOST_TRAP_V1; + auto gfxip_major = (gfx_target_version / 10000) % 100; + auto gfxip_minor = (gfx_target_version / 100) % 100; + auto pcs_method = (upcoming.which_sample_type == AMD_HOST_TRAP_V1) + ? ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP + : ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC; // Template instantiation is faster! - auto parseSample_func = - bIsHostTrap - ? &PCSamplingParserContext::_parse - : &PCSamplingParserContext::_parse; - if(gfxip_major == 11) - parseSample_func = - bIsHostTrap - ? &PCSamplingParserContext::_parse - : &PCSamplingParserContext::_parse; - else if(gfxip_major != 9) + parse_funct_ptr_t parseSample_func = nullptr; + if(gfxip_major == 9) + { + if(gfxip_minor == 5) + { + parseSample_func = _get_parse_func_for_method(pcs_method); + } + else + { + parseSample_func = _get_parse_func_for_method(pcs_method); + } + } + else if(gfxip_major == 11) + { + parseSample_func = _get_parse_func_for_method(pcs_method); + } + else + { return PCSAMPLE_STATUS_INVALID_GFXIP; + } + + if(parseSample_func == nullptr) + { + return PCSAMPLE_STATUS_INVALID_METHOD; + } auto status = (this->*parseSample_func)(upcoming, data_); midway_signal.notify_all(); diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp index a124d8fc15..9652c8426c 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp @@ -85,7 +85,7 @@ public: */ pcsample_status_t parse(const upcoming_samples_t& upcoming, const generic_sample_t* data, - int gfxip_major, + uint32_t gfx_target_version, std::condition_variable& midway_signal, bool bFlushCorrelationIds); @@ -199,5 +199,11 @@ protected: mutable std::shared_mutex mut; private: + using parse_funct_ptr_t = pcsample_status_t ( + PCSamplingParserContext::*)(const upcoming_samples_t&, const generic_sample_t*); + + template + parse_funct_ptr_t _get_parse_func_for_method(rocprofiler_pc_sampling_method_t pcs_method); + std::unordered_map _agent_buffers; }; diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/CMakeLists.txt b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/CMakeLists.txt index b5283556ca..9d23f4c0a4 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/CMakeLists.txt @@ -5,10 +5,18 @@ include(GoogleTest) set(PCTEST_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/source/lib/rocprofiler-sdk/pc_sampling/parser/) -set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_ID_TEST_SOURCES correlation_id_test.cpp) -set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_BENCH_TEST_SOURCES benchmark_test.cpp) -set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_GFX9_TEST_SOURCES gfx9test.cpp) set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_HEADERS mocks.hpp) +set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES mocks.cpp) + +set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_ID_TEST_SOURCES + ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} correlation_id_test.cpp) +set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_BENCH_TEST_SOURCES + ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} benchmark_test.cpp) +set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_GFX9_TEST_SOURCES + ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} gfx9test.hpp gfx9test.cpp + gfx950test.cpp) +set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES + ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} multigpu.cpp) add_executable(pcs_gfx9_test) @@ -70,7 +78,8 @@ target_link_libraries( add_executable(pcs_thread_test) target_compile_options(pcs_thread_test PRIVATE "-Ofast") -target_sources(pcs_thread_test PRIVATE multigpu.cpp) +target_sources(pcs_thread_test + PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES}) target_include_directories(pcs_thread_test PRIVATE ${PCTEST_INCLUDE_DIR}) target_link_libraries( @@ -81,7 +90,7 @@ target_link_libraries( gtest_add_tests( TARGET pcs_thread_test - SOURCES multigpu.cpp + SOURCES ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES} TEST_LIST pcs_thread_test_TESTS WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp index 8a0d2c4786..e2c74765a0 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/benchmark_test.cpp @@ -26,6 +26,7 @@ #include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp" #define GFXIP_MAJOR 9 +#define GFXIP_MINOR 4 /** * Benchmarks how fast the parser can process samples on a single threaded case @@ -75,6 +76,7 @@ Benchmark(bool bWarmup) CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(), buffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, user_cb, &userdata)); auto t1 = std::chrono::system_clock::now(); diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/correlation_id_test.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/correlation_id_test.cpp index 081c29bfc9..cf29b1e256 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/correlation_id_test.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/correlation_id_test.cpp @@ -27,6 +27,7 @@ #include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp" #define GFXIP_MAJOR 9 +#define GFXIP_MINOR 4 std::mt19937 rdgen(1); @@ -77,6 +78,7 @@ pcs_parser_hello_world() CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(), buffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); @@ -131,6 +133,7 @@ pcs_parser_reverse_wave_order() CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(), buffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); @@ -171,6 +174,7 @@ pcs_parser_dispatch_wrapping() CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(), buffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); @@ -226,6 +230,7 @@ pcs_parser_random_samples() CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(), buffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); @@ -330,6 +335,7 @@ pcs_parser_queue_hammer() CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(), buffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); @@ -382,11 +388,13 @@ pcs_parser_multi_buffer() CHECK_PARSER(parse_buffer((generic_sample_t*) firstBuffer->packets.data(), firstBuffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); CHECK_PARSER(parse_buffer((generic_sample_t*) secondBuffer->packets.data(), secondBuffer->packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, alloc_callback, (void*) &all_allocations)); diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx950test.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx950test.cpp new file mode 100644 index 0000000000..9383694f0c --- /dev/null +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx950test.cpp @@ -0,0 +1,72 @@ +// MIT License +// +// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifdef NDEBUG +# undef NDEBUG +#endif + +#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp" + +#include +#include + +/** + * @brief This test verifies if the PC address is corrected properly on GFX950 when required. + */ +template +class MidMacroPCCorrectionGFX950 : public MidMacroPCCorrection +{ +public: + void genPCSample(uint64_t pc, bool mid_macro) override + { + // mid_macro exists only for stochastic PC sampling on GFX950 + if constexpr(!std::is_same::value) + { + // Invalidate mid_macro + mid_macro = false; + } + + // invoking parent class + MidMacroPCCorrection::genPCSample(pc, mid_macro); + }; + + uint64_t calcaulteExpectedPC(uint64_t pc, bool mid_macro) override + { + // According to the regspec, if mid_macro is true, we need to subtract 2 dwords from the PC + // address. + return mid_macro ? (pc - 2 * sizeof(uint32_t)) : pc; + } + + std::vector> get_parsed_data() override + { + return this->buffer->get_parsed_buffer(9, 5); // GFX950 + } +}; + +TEST(pcs_parser, gfx950_test) +{ + MidMacroPCCorrectionGFX950{}.Test(); + MidMacroPCCorrectionGFX950{}.Test(); + + std::cout << "GFX950 Test Done." << std::endl; +} diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.cpp index 2751013d6a..c96591a5c5 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.cpp @@ -24,6 +24,7 @@ # undef NDEBUG #endif +#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp" #include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp" #include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp" @@ -138,53 +139,6 @@ \ NON_GFX9_ARBSTATE_IS_ZERO(x, y) -template -class WaveSnapTest -{ -public: - WaveSnapTest() - { - buffer = std::make_shared>(); - queue = std::make_shared>(16, buffer); - dispatch = std::make_shared>(queue); - } - - void Test() - { - FillBuffers(); - CheckBuffers(); - } - - virtual void FillBuffers() = 0; - virtual void CheckBuffers() = 0; - - void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall) - { - wave_cnt &= 0x3F; - inst_type &= 0xF; - reason &= 0x7; - arb_issue &= 0xFF; - arb_stall &= 0xFF; - - perf_sample_snapshot_v1 snap; - ::memset(&snap, 0, sizeof(snap)); - snap.pc = dispatch->unique_id; - snap.correlation_id = dispatch->getMockId().raw; - - snap.perf_snapshot_data = (inst_type << 3) | (reason << 7); - snap.perf_snapshot_data |= 0x1; // sample is valid - snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18); - snap.perf_snapshot_data1 = wave_cnt; - - EXPECT_NE(dispatch.get(), nullptr); - dispatch->submit(packet_union_t{.snap = snap}); - }; - - std::shared_ptr> buffer; - std::shared_ptr> queue; - std::shared_ptr> dispatch; -}; - template class WaveCntTest : public WaveSnapTest { @@ -611,6 +565,81 @@ class WaveOtherFieldsTest : public WaveSnapTest std::vector compare; }; +/** + * @brief This test verifies that the PC address remains unchanged for GFX9. + */ +template +void +MidMacroPCCorrection::FillBuffers() +{ + this->buffer->genUpcomingSamples(3); + // NOTE: mid_macro is relevant only on GFX950 + genPCSample(0x800, true); + genPCSample(0x900, false); + genPCSample(0x1000, true); +} + +template +std::vector> +MidMacroPCCorrection::get_parsed_data() +{ + return this->buffer->get_parsed_buffer(9); // GFXIP==9 +} + +template +void +MidMacroPCCorrection::CheckBuffers() +{ + auto parsed = get_parsed_data(); + EXPECT_EQ(parsed.size(), 1); + EXPECT_EQ(parsed[0].size(), 3); + EXPECT_EQ(compare.size(), 3); + + for(size_t i = 0; i < 3; i++) + { + // verifying PC address + EXPECT_EQ(parsed[0][i].pc.code_object_offset, compare[i].pc.code_object_offset); + } +} + +/** + * @brief By default, PC address remains unchanged. + */ +template +uint64_t +MidMacroPCCorrection::calcaulteExpectedPC(uint64_t pc, bool /*mid_macro*/) +{ + return pc; +} + +template +void +MidMacroPCCorrection::genPCSample(uint64_t pc, bool mid_macro) +{ + PcSamplingRecordT sample; + ::memset(&sample, 0, sizeof(sample)); + // Calculate the expected PC address + sample.pc.code_object_offset = calcaulteExpectedPC(pc, mid_macro); + compare.push_back(sample); + + // This test considers only PC address. + perf_sample_snapshot_v1 snap; + ::memset(&snap, 0, sizeof(snap)); + snap.pc = pc; + // Mandatory for correlation mapping. Otherwise, parsing error occurs. + snap.correlation_id = this->dispatch->getMockId().raw; + + // to ensure all stochastic samples are generated properly, + // marked them as valid + snap.perf_snapshot_data |= 0x1; // set the bit indicating the sample is valid + + // the mid_macro is the bit at the position 31 + snap.perf_snapshot_data1 = (mid_macro << 31); + + EXPECT_NE(this->dispatch.get(), nullptr); + this->dispatch->submit(snap); +} + TEST(pcs_parser, gfx9_test) { // Tests specific to stochastic sampling only @@ -628,5 +657,8 @@ TEST(pcs_parser, gfx9_test) WaveOtherFieldsTest{}.Test(); WaveOtherFieldsTest{}.Test(); + MidMacroPCCorrection{}.Test(); + MidMacroPCCorrection{}.Test(); + std::cout << "GFX9 Test Done." << std::endl; } diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp new file mode 100644 index 0000000000..c015ad4976 --- /dev/null +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp @@ -0,0 +1,105 @@ +// MIT License +// +// Copyright (c) 2025 ROCm Developer Tools +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp" + +#include + +template +class WaveSnapTest +{ +public: + WaveSnapTest() + { + buffer = std::make_shared>(); + queue = std::make_shared>(16, buffer); + dispatch = std::make_shared>(queue); + } + + void Test() + { + FillBuffers(); + CheckBuffers(); + } + + virtual void FillBuffers() = 0; + virtual void CheckBuffers() = 0; + + void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall) + { + wave_cnt &= 0x3F; + inst_type &= 0xF; + reason &= 0x7; + arb_issue &= 0xFF; + arb_stall &= 0xFF; + + perf_sample_snapshot_v1 snap; + ::memset(&snap, 0, sizeof(snap)); + snap.pc = dispatch->unique_id; + snap.correlation_id = dispatch->getMockId().raw; + + snap.perf_snapshot_data = (inst_type << 3) | (reason << 7); + snap.perf_snapshot_data |= 0x1; // sample is valid + snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18); + snap.perf_snapshot_data1 = wave_cnt; + + EXPECT_NE(dispatch.get(), nullptr); + dispatch->submit(packet_union_t{.snap = snap}); + }; + + std::shared_ptr> buffer; + std::shared_ptr> queue; + std::shared_ptr> dispatch; +}; + +/** + * @brief Testing how mid_macro bit affects the PC address. + * + * On GFX950, this bit triggers correction of the PC address. + * On other GFX9 architectures, the PC address remains unchanged. + */ +template +class MidMacroPCCorrection : public WaveSnapTest +{ +public: + void FillBuffers() override; // Explicitly mark as override + void CheckBuffers() override; // Explicitly mark as override + + /** + * @brief Generate PC sample with mid_macro flag. + * The @p mid_macro is relevant for the GFX950, so it's false by default + */ + virtual void genPCSample(uint64_t pc, bool mid_macro = false); + + /** + * @brief Caulcate expected PC address for comparison. + */ + virtual uint64_t calcaulteExpectedPC(uint64_t pc, bool mid_macro = false); + + virtual std::vector> get_parsed_data(); + +protected: + ///< testing data + std::vector compare; +}; diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.cpp new file mode 100644 index 0000000000..075e41d93f --- /dev/null +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.cpp @@ -0,0 +1,39 @@ +// MIT License +// +// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp" + +template <> +void +MockRuntimeBuffer::genUpcomingSamples( + int num_samples) +{ + genUpcomingSamples(num_samples, AMD_HOST_TRAP_V1); +} + +template <> +void +MockRuntimeBuffer::genUpcomingSamples( + int num_samples) +{ + this->genUpcomingSamples(num_samples, AMD_SNAPSHOT_V1); +} diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp index ed8d10a085..f6ed028c31 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp @@ -74,13 +74,18 @@ public: //! Submits a "upcoming_samples_t" packet signaling the next num_samples packets are PC samples void genUpcomingSamples(int num_samples); - std::vector> get_parsed_buffer(int GFXIP_MAJOR) + /** + * @brief By default, we assume the gfx94X. + */ + std::vector> get_parsed_buffer(int GFXIP_MAJOR, + int GFXIP_MINOR = 4) { parsed_data = {}; CHECK_PARSER(parse_buffer((generic_sample_t*) packets.data(), packets.size(), GFXIP_MAJOR, + GFXIP_MINOR, &alloc_parse_memory, this)); @@ -103,22 +108,6 @@ public: const uint32_t device; }; -template <> -void -MockRuntimeBuffer::genUpcomingSamples( - int num_samples) -{ - genUpcomingSamples(num_samples, AMD_HOST_TRAP_V1); -} - -template <> -void -MockRuntimeBuffer::genUpcomingSamples( - int num_samples) -{ - this->genUpcomingSamples(num_samples, AMD_SNAPSHOT_V1); -} - /** * Mimics a HSA doorbell. Every live instance of this class has an unique ID (handler). * The handler itself may be not unique considering dead instances. diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/multigpu.cpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/multigpu.cpp index d839d68fed..bec0678ad1 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/multigpu.cpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/tests/multigpu.cpp @@ -20,12 +20,12 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp" +#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp" + #include #include - #include -#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp" -#include "mocks.hpp" #define GFXIP_MAJOR 9 constexpr size_t NUM_THREADS = 8; diff --git a/source/lib/rocprofiler-sdk/pc_sampling/parser/translation.hpp b/source/lib/rocprofiler-sdk/pc_sampling/parser/translation.hpp index c5bfd162ae..79a3a6083e 100644 --- a/source/lib/rocprofiler-sdk/pc_sampling/parser/translation.hpp +++ b/source/lib/rocprofiler-sdk/pc_sampling/parser/translation.hpp @@ -22,86 +22,17 @@ #pragma once -#include -#include -#include - #include "lib/rocprofiler-sdk/pc_sampling/parser/gfx11.hpp" #include "lib/rocprofiler-sdk/pc_sampling/parser/gfx9.hpp" +#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp" #include "lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp" #include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h" #include -// TODO: refactor the commented code for stochastic sampling - -// template -// inline rocprofiler_pc_sampling_record_t -// copyStochasticSample(const perf_sample_snapshot_v1& sample); - -// template <> -// inline rocprofiler_pc_sampling_record_t -// copyStochasticSample(const perf_sample_snapshot_v1& sample) -// { -// rocprofiler_pc_sampling_record_t ret = copySampleHeader(sample); -// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1; -// // Check wave_id matches snapshot_wave_id - -// ret.flags.has_wave_cnt = true; -// ret.flags.has_stall_reason = true; - -// ret.wave_count = sample.perf_snapshot_data1 & 0x3F; - -// ret.snapshot.dual_issue_valu = sample.perf_snapshot_data >> 2; -// ret.snapshot.inst_type = sample.perf_snapshot_data >> 3; -// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7; -// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF; -// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF; -// ret.reserved = 0; -// return ret; -// } - -// template <> -// inline rocprofiler_pc_sampling_record_t -// copyStochasticSample(const perf_sample_snapshot_v1& sample) -// { -// rocprofiler_pc_sampling_record_t ret = copySampleHeader(sample); -// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1; -// // Check wave_id matches snapshot_wave_id - -// ret.flags.has_stall_reason = true; - -// ret.wave_issued = sample.perf_snapshot_data >> 1; -// ret.snapshot.inst_type = sample.perf_snapshot_data >> 2; -// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 6) & 0x7; -// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 9) & 0x7F; -// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 16) & 0x7F; -// ret.snapshot.dual_issue_valu = false; -// ret.reserved = 0; -// return ret; -// } - -// #define BITSHIFT(sname) out |= ((in >> GFX::sname) & 1) << PCSAMPLE::sname - -// template -// inline int -// translate_arb(int in) -// { -// size_t out = 0; -// BITSHIFT(ISSUE_VALU); -// BITSHIFT(ISSUE_MATRIX); -// BITSHIFT(ISSUE_LDS); -// BITSHIFT(ISSUE_LDS_DIRECT); -// BITSHIFT(ISSUE_SCALAR); -// BITSHIFT(ISSUE_VMEM_TEX); -// BITSHIFT(ISSUE_FLAT); -// BITSHIFT(ISSUE_EXP); -// BITSHIFT(ISSUE_MISC); -// BITSHIFT(ISSUE_BRMSG); -// return out & 0x3FF; -// } - -// #undef BITSHIFT +#include +#include +#include #define LUTOVERLOAD(sname, rocp_prefix) this->operator[](GFX::sname) = rocp_prefix##_##sname #define LUTOVERLOAD_INST(sname) LUTOVERLOAD(sname, ROCPROFILER_PC_SAMPLING_INSTRUCTION) @@ -322,6 +253,20 @@ copySample(const void* sam return ret; } +template <> +inline rocprofiler_pc_sampling_record_host_trap_v0_t +copySample(const void* sample) +{ + return copySample(sample); +} + +template <> +inline rocprofiler_pc_sampling_record_stochastic_v0_t +copySample(const void* sample) +{ + return copySample(sample); +} + /** * @brief Host trap V0 sample for GFX11 */ @@ -348,4 +293,34 @@ copySample(const void* sa return ret; } +/** + * @brief The default implementation assumes no correction is needed. + */ +template +inline rocprofiler_address_t +correct_pc_address(const perf_sample_snapshot_v1* sample) +{ + return rocprofiler_address_t{.value = sample->pc}; +} + +/** + * @brief GFX950 specific implementation of the PC address correction. + */ +template <> +inline rocprofiler_address_t +correct_pc_address( + const perf_sample_snapshot_v1* sample) +{ + // If mid_macro bit is 1, then reg spec says we need to subtract 2 dwords from the PC address. + auto mid_macro = static_cast(EXTRACT_BITS(sample->perf_snapshot_data1, 31, 31)); + if(mid_macro) + { + return rocprofiler_address_t{.value = sample->pc - 2 * sizeof(uint32_t)}; + } + else + { + return rocprofiler_address_t{.value = sample->pc}; + } +} + #undef EXTRACT_BITS