GFX950 Stochastic PC sampling (#344)
* GFX950 Stochastic PC sampling
* Use actual type instead void *
* error reporting if the pcs method is inappropriate
[ROCm/rocprofiler-sdk commit: 2bb64e9c9a]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
e55c31db27
Коммит
ec2fe441e6
+6
-3
@@ -175,9 +175,12 @@ data_ready_callback(void* client_callback_data,
|
||||
// TODO: how about using std::future
|
||||
std::condition_variable cv;
|
||||
|
||||
auto gfx_major = ((agent_session->agent->gfx_target_version / 10000) % 100);
|
||||
auto pcs_parser_status = agent_session->parser->parse(
|
||||
upc, reinterpret_cast<const generic_sample_t*>(buff.get()), gfx_major, cv, false);
|
||||
auto pcs_parser_status =
|
||||
agent_session->parser->parse(upc,
|
||||
reinterpret_cast<const generic_sample_t*>(buff.get()),
|
||||
agent_session->agent->gfx_target_version,
|
||||
cv,
|
||||
false);
|
||||
|
||||
if(pcs_parser_status != PCSAMPLE_STATUS_SUCCESS)
|
||||
{
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES pc_record_interface.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_HEADERS
|
||||
correlation.hpp gfx9.hpp gfx11.hpp parser_types.hpp pc_record_interface.hpp rocr.h
|
||||
translation.hpp)
|
||||
translation.hpp gfx950.hpp)
|
||||
|
||||
target_sources(
|
||||
rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES}
|
||||
|
||||
+18
-5
@@ -240,16 +240,19 @@ add_upcoming_samples(const device_handle device,
|
||||
const auto* snap = reinterpret_cast<const perf_sample_snapshot_v1*>(buffer + p);
|
||||
|
||||
auto& pc_sample = samples[p];
|
||||
pc_sample = copySample<GFXIP, PcSamplingRecordT>((const void*) (buffer + p));
|
||||
pc_sample = copySample<GFXIP, PcSamplingRecordT>(static_cast<const void*>(snap));
|
||||
// skip invalid samples
|
||||
if(pc_sample.size == 0) continue;
|
||||
|
||||
// Correct PC address of the original sample (if needed) prior to decoding it.
|
||||
auto pc_address = correct_pc_address<GFXIP, PcSamplingRecordT>(snap);
|
||||
|
||||
// Convert PC -> (loaded code object id containing PC, offset within code object)
|
||||
if(!cache_addr_range.inrange(snap->pc))
|
||||
cache_addr_range = table->find_codeobj_in_range(snap->pc);
|
||||
if(!cache_addr_range.inrange(pc_address.value))
|
||||
cache_addr_range = table->find_codeobj_in_range(pc_address.value);
|
||||
|
||||
pc_sample.pc.code_object_id = cache_addr_range.id;
|
||||
pc_sample.pc.code_object_offset = snap->pc - cache_addr_range.addr;
|
||||
pc_sample.pc.code_object_offset = pc_address.value - cache_addr_range.addr;
|
||||
|
||||
try
|
||||
{
|
||||
@@ -357,6 +360,7 @@ template <typename PcSamplingRecordT>
|
||||
pcsample_status_t inline parse_buffer(generic_sample_t* buffer,
|
||||
uint64_t buffer_size,
|
||||
int gfxip_major,
|
||||
int gfxip_minor,
|
||||
user_callback_t<PcSamplingRecordT> callback,
|
||||
void* userdata)
|
||||
{
|
||||
@@ -364,11 +368,20 @@ pcsample_status_t inline parse_buffer(generic_sample_t* buffer,
|
||||
|
||||
auto parseSample_func = _parse_buffer<GFX9, PcSamplingRecordT>;
|
||||
if(gfxip_major == 9)
|
||||
parseSample_func = _parse_buffer<GFX9, PcSamplingRecordT>;
|
||||
{
|
||||
if(gfxip_minor == 5)
|
||||
{
|
||||
parseSample_func = _parse_buffer<GFX950, PcSamplingRecordT>;
|
||||
}
|
||||
}
|
||||
else if(gfxip_major == 11)
|
||||
{
|
||||
parseSample_func = _parse_buffer<GFX11, PcSamplingRecordT>;
|
||||
}
|
||||
else
|
||||
{
|
||||
return PCSAMPLE_STATUS_INVALID_GFXIP;
|
||||
}
|
||||
|
||||
return parseSample_func(buffer, buffer_size, callback, userdata, corr_map.get());
|
||||
};
|
||||
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
class GFX950 : public GFX9
|
||||
{};
|
||||
@@ -120,6 +120,10 @@ enum PCSAMPLE_STATUS
|
||||
* Invalid GFXIP string was passed to the parser.
|
||||
*/
|
||||
PCSAMPLE_STATUS_INVALID_GFXIP,
|
||||
/**
|
||||
* Invalid PC sampling method was passed to the parser.
|
||||
*/
|
||||
PCSAMPLE_STATUS_INVALID_METHOD,
|
||||
/**
|
||||
* Last error type
|
||||
*/
|
||||
|
||||
+55
-15
@@ -52,30 +52,70 @@ PCSamplingParserContext::alloc<rocprofiler_pc_sampling_record_stochastic_v0_t>(
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Get the appropriate parse function based on the GFXIP and sampling method.
|
||||
*
|
||||
* If the inappropriate sampling method is provided, it returns nullptr.
|
||||
*/
|
||||
template <typename GFXIP>
|
||||
PCSamplingParserContext::parse_funct_ptr_t
|
||||
PCSamplingParserContext::_get_parse_func_for_method(rocprofiler_pc_sampling_method_t pcs_method)
|
||||
{
|
||||
if(pcs_method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
{
|
||||
return &PCSamplingParserContext::_parse<GFXIP,
|
||||
rocprofiler_pc_sampling_record_host_trap_v0_t>;
|
||||
}
|
||||
else if(pcs_method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
{
|
||||
return &PCSamplingParserContext::_parse<GFXIP,
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t>;
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
pcsample_status_t
|
||||
PCSamplingParserContext::parse(const upcoming_samples_t& upcoming,
|
||||
const generic_sample_t* data_,
|
||||
int gfxip_major,
|
||||
uint32_t gfx_target_version,
|
||||
std::condition_variable& midway_signal,
|
||||
bool bRocrBufferFlip)
|
||||
{
|
||||
bool bIsHostTrap = upcoming.which_sample_type == AMD_HOST_TRAP_V1;
|
||||
auto gfxip_major = (gfx_target_version / 10000) % 100;
|
||||
auto gfxip_minor = (gfx_target_version / 100) % 100;
|
||||
auto pcs_method = (upcoming.which_sample_type == AMD_HOST_TRAP_V1)
|
||||
? ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP
|
||||
: ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC;
|
||||
|
||||
// Template instantiation is faster!
|
||||
auto parseSample_func =
|
||||
bIsHostTrap
|
||||
? &PCSamplingParserContext::_parse<GFX9, rocprofiler_pc_sampling_record_host_trap_v0_t>
|
||||
: &PCSamplingParserContext::_parse<GFX9,
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t>;
|
||||
if(gfxip_major == 11)
|
||||
parseSample_func =
|
||||
bIsHostTrap
|
||||
? &PCSamplingParserContext::_parse<GFX11,
|
||||
rocprofiler_pc_sampling_record_host_trap_v0_t>
|
||||
: &PCSamplingParserContext::_parse<GFX11,
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t>;
|
||||
else if(gfxip_major != 9)
|
||||
parse_funct_ptr_t parseSample_func = nullptr;
|
||||
if(gfxip_major == 9)
|
||||
{
|
||||
if(gfxip_minor == 5)
|
||||
{
|
||||
parseSample_func = _get_parse_func_for_method<GFX950>(pcs_method);
|
||||
}
|
||||
else
|
||||
{
|
||||
parseSample_func = _get_parse_func_for_method<GFX9>(pcs_method);
|
||||
}
|
||||
}
|
||||
else if(gfxip_major == 11)
|
||||
{
|
||||
parseSample_func = _get_parse_func_for_method<GFX11>(pcs_method);
|
||||
}
|
||||
else
|
||||
{
|
||||
return PCSAMPLE_STATUS_INVALID_GFXIP;
|
||||
}
|
||||
|
||||
if(parseSample_func == nullptr)
|
||||
{
|
||||
return PCSAMPLE_STATUS_INVALID_METHOD;
|
||||
}
|
||||
|
||||
auto status = (this->*parseSample_func)(upcoming, data_);
|
||||
midway_signal.notify_all();
|
||||
|
||||
+7
-1
@@ -85,7 +85,7 @@ public:
|
||||
*/
|
||||
pcsample_status_t parse(const upcoming_samples_t& upcoming,
|
||||
const generic_sample_t* data,
|
||||
int gfxip_major,
|
||||
uint32_t gfx_target_version,
|
||||
std::condition_variable& midway_signal,
|
||||
bool bFlushCorrelationIds);
|
||||
|
||||
@@ -199,5 +199,11 @@ protected:
|
||||
mutable std::shared_mutex mut;
|
||||
|
||||
private:
|
||||
using parse_funct_ptr_t = pcsample_status_t (
|
||||
PCSamplingParserContext::*)(const upcoming_samples_t&, const generic_sample_t*);
|
||||
|
||||
template <typename GFXIP>
|
||||
parse_funct_ptr_t _get_parse_func_for_method(rocprofiler_pc_sampling_method_t pcs_method);
|
||||
|
||||
std::unordered_map<rocprofiler_agent_id_t, rocprofiler_buffer_id_t> _agent_buffers;
|
||||
};
|
||||
|
||||
+14
-5
@@ -5,10 +5,18 @@ include(GoogleTest)
|
||||
set(PCTEST_INCLUDE_DIR
|
||||
${PROJECT_SOURCE_DIR}/source/lib/rocprofiler-sdk/pc_sampling/parser/)
|
||||
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_ID_TEST_SOURCES correlation_id_test.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_BENCH_TEST_SOURCES benchmark_test.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_GFX9_TEST_SOURCES gfx9test.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_HEADERS mocks.hpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES mocks.cpp)
|
||||
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_ID_TEST_SOURCES
|
||||
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} correlation_id_test.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_BENCH_TEST_SOURCES
|
||||
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} benchmark_test.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_GFX9_TEST_SOURCES
|
||||
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} gfx9test.hpp gfx9test.cpp
|
||||
gfx950test.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES
|
||||
${ROCPROFILER_LIB_PC_SAMPLING_PARSER_TEST_SOURCES} multigpu.cpp)
|
||||
|
||||
add_executable(pcs_gfx9_test)
|
||||
|
||||
@@ -70,7 +78,8 @@ target_link_libraries(
|
||||
add_executable(pcs_thread_test)
|
||||
target_compile_options(pcs_thread_test PRIVATE "-Ofast")
|
||||
|
||||
target_sources(pcs_thread_test PRIVATE multigpu.cpp)
|
||||
target_sources(pcs_thread_test
|
||||
PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES})
|
||||
target_include_directories(pcs_thread_test PRIVATE ${PCTEST_INCLUDE_DIR})
|
||||
|
||||
target_link_libraries(
|
||||
@@ -81,7 +90,7 @@ target_link_libraries(
|
||||
|
||||
gtest_add_tests(
|
||||
TARGET pcs_thread_test
|
||||
SOURCES multigpu.cpp
|
||||
SOURCES ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_MULTIGPU_TEST_SOURCES}
|
||||
TEST_LIST pcs_thread_test_TESTS
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
#define GFXIP_MAJOR 9
|
||||
#define GFXIP_MINOR 4
|
||||
|
||||
/**
|
||||
* Benchmarks how fast the parser can process samples on a single threaded case
|
||||
@@ -75,6 +76,7 @@ Benchmark(bool bWarmup)
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
user_cb,
|
||||
&userdata));
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
#define GFXIP_MAJOR 9
|
||||
#define GFXIP_MINOR 4
|
||||
|
||||
std::mt19937 rdgen(1);
|
||||
|
||||
@@ -77,6 +78,7 @@ pcs_parser_hello_world()
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
|
||||
@@ -131,6 +133,7 @@ pcs_parser_reverse_wave_order()
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
|
||||
@@ -171,6 +174,7 @@ pcs_parser_dispatch_wrapping()
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
|
||||
@@ -226,6 +230,7 @@ pcs_parser_random_samples()
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
|
||||
@@ -330,6 +335,7 @@ pcs_parser_queue_hammer()
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) buffer->packets.data(),
|
||||
buffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
|
||||
@@ -382,11 +388,13 @@ pcs_parser_multi_buffer()
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) firstBuffer->packets.data(),
|
||||
firstBuffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) secondBuffer->packets.data(),
|
||||
secondBuffer->packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
alloc_callback<PcSamplingRecordT>,
|
||||
(void*) &all_allocations));
|
||||
|
||||
|
||||
+72
@@ -0,0 +1,72 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#ifdef NDEBUG
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstddef>
|
||||
|
||||
/**
|
||||
* @brief This test verifies if the PC address is corrected properly on GFX950 when required.
|
||||
*/
|
||||
template <typename PcSamplingRecordT>
|
||||
class MidMacroPCCorrectionGFX950 : public MidMacroPCCorrection<PcSamplingRecordT>
|
||||
{
|
||||
public:
|
||||
void genPCSample(uint64_t pc, bool mid_macro) override
|
||||
{
|
||||
// mid_macro exists only for stochastic PC sampling on GFX950
|
||||
if constexpr(!std::is_same<PcSamplingRecordT,
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t>::value)
|
||||
{
|
||||
// Invalidate mid_macro
|
||||
mid_macro = false;
|
||||
}
|
||||
|
||||
// invoking parent class
|
||||
MidMacroPCCorrection<PcSamplingRecordT>::genPCSample(pc, mid_macro);
|
||||
};
|
||||
|
||||
uint64_t calcaulteExpectedPC(uint64_t pc, bool mid_macro) override
|
||||
{
|
||||
// According to the regspec, if mid_macro is true, we need to subtract 2 dwords from the PC
|
||||
// address.
|
||||
return mid_macro ? (pc - 2 * sizeof(uint32_t)) : pc;
|
||||
}
|
||||
|
||||
std::vector<std::vector<PcSamplingRecordT>> get_parsed_data() override
|
||||
{
|
||||
return this->buffer->get_parsed_buffer(9, 5); // GFX950
|
||||
}
|
||||
};
|
||||
|
||||
TEST(pcs_parser, gfx950_test)
|
||||
{
|
||||
MidMacroPCCorrectionGFX950<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
|
||||
MidMacroPCCorrectionGFX950<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
|
||||
std::cout << "GFX950 Test Done." << std::endl;
|
||||
}
|
||||
+79
-47
@@ -24,6 +24,7 @@
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/gfx9test.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
@@ -138,53 +139,6 @@
|
||||
\
|
||||
NON_GFX9_ARBSTATE_IS_ZERO(x, y)
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
class WaveSnapTest
|
||||
{
|
||||
public:
|
||||
WaveSnapTest()
|
||||
{
|
||||
buffer = std::make_shared<MockRuntimeBuffer<PcSamplingRecordT>>();
|
||||
queue = std::make_shared<MockQueue<PcSamplingRecordT>>(16, buffer);
|
||||
dispatch = std::make_shared<MockDispatch<PcSamplingRecordT>>(queue);
|
||||
}
|
||||
|
||||
void Test()
|
||||
{
|
||||
FillBuffers();
|
||||
CheckBuffers();
|
||||
}
|
||||
|
||||
virtual void FillBuffers() = 0;
|
||||
virtual void CheckBuffers() = 0;
|
||||
|
||||
void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall)
|
||||
{
|
||||
wave_cnt &= 0x3F;
|
||||
inst_type &= 0xF;
|
||||
reason &= 0x7;
|
||||
arb_issue &= 0xFF;
|
||||
arb_stall &= 0xFF;
|
||||
|
||||
perf_sample_snapshot_v1 snap;
|
||||
::memset(&snap, 0, sizeof(snap));
|
||||
snap.pc = dispatch->unique_id;
|
||||
snap.correlation_id = dispatch->getMockId().raw;
|
||||
|
||||
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
|
||||
snap.perf_snapshot_data |= 0x1; // sample is valid
|
||||
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
|
||||
snap.perf_snapshot_data1 = wave_cnt;
|
||||
|
||||
EXPECT_NE(dispatch.get(), nullptr);
|
||||
dispatch->submit(packet_union_t{.snap = snap});
|
||||
};
|
||||
|
||||
std::shared_ptr<MockRuntimeBuffer<PcSamplingRecordT>> buffer;
|
||||
std::shared_ptr<MockQueue<PcSamplingRecordT>> queue;
|
||||
std::shared_ptr<MockDispatch<PcSamplingRecordT>> dispatch;
|
||||
};
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
class WaveCntTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
{
|
||||
@@ -611,6 +565,81 @@ class WaveOtherFieldsTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
std::vector<PcSamplingRecordT> compare;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief This test verifies that the PC address remains unchanged for GFX9.
|
||||
*/
|
||||
template <typename PcSamplingRecordT>
|
||||
void
|
||||
MidMacroPCCorrection<PcSamplingRecordT>::FillBuffers()
|
||||
{
|
||||
this->buffer->genUpcomingSamples(3);
|
||||
// NOTE: mid_macro is relevant only on GFX950
|
||||
genPCSample(0x800, true);
|
||||
genPCSample(0x900, false);
|
||||
genPCSample(0x1000, true);
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
std::vector<std::vector<PcSamplingRecordT>>
|
||||
MidMacroPCCorrection<PcSamplingRecordT>::get_parsed_data()
|
||||
{
|
||||
return this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
void
|
||||
MidMacroPCCorrection<PcSamplingRecordT>::CheckBuffers()
|
||||
{
|
||||
auto parsed = get_parsed_data();
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), 3);
|
||||
EXPECT_EQ(compare.size(), 3);
|
||||
|
||||
for(size_t i = 0; i < 3; i++)
|
||||
{
|
||||
// verifying PC address
|
||||
EXPECT_EQ(parsed[0][i].pc.code_object_offset, compare[i].pc.code_object_offset);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief By default, PC address remains unchanged.
|
||||
*/
|
||||
template <typename PcSamplingRecordT>
|
||||
uint64_t
|
||||
MidMacroPCCorrection<PcSamplingRecordT>::calcaulteExpectedPC(uint64_t pc, bool /*mid_macro*/)
|
||||
{
|
||||
return pc;
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
void
|
||||
MidMacroPCCorrection<PcSamplingRecordT>::genPCSample(uint64_t pc, bool mid_macro)
|
||||
{
|
||||
PcSamplingRecordT sample;
|
||||
::memset(&sample, 0, sizeof(sample));
|
||||
// Calculate the expected PC address
|
||||
sample.pc.code_object_offset = calcaulteExpectedPC(pc, mid_macro);
|
||||
compare.push_back(sample);
|
||||
|
||||
// This test considers only PC address.
|
||||
perf_sample_snapshot_v1 snap;
|
||||
::memset(&snap, 0, sizeof(snap));
|
||||
snap.pc = pc;
|
||||
// Mandatory for correlation mapping. Otherwise, parsing error occurs.
|
||||
snap.correlation_id = this->dispatch->getMockId().raw;
|
||||
|
||||
// to ensure all stochastic samples are generated properly,
|
||||
// marked them as valid
|
||||
snap.perf_snapshot_data |= 0x1; // set the bit indicating the sample is valid
|
||||
|
||||
// the mid_macro is the bit at the position 31
|
||||
snap.perf_snapshot_data1 = (mid_macro << 31);
|
||||
|
||||
EXPECT_NE(this->dispatch.get(), nullptr);
|
||||
this->dispatch->submit(snap);
|
||||
}
|
||||
|
||||
TEST(pcs_parser, gfx9_test)
|
||||
{
|
||||
// Tests specific to stochastic sampling only
|
||||
@@ -628,5 +657,8 @@ TEST(pcs_parser, gfx9_test)
|
||||
WaveOtherFieldsTest<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
|
||||
WaveOtherFieldsTest<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
|
||||
MidMacroPCCorrection<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
|
||||
MidMacroPCCorrection<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
|
||||
std::cout << "GFX9 Test Done." << std::endl;
|
||||
}
|
||||
|
||||
+105
@@ -0,0 +1,105 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 ROCm Developer Tools
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
class WaveSnapTest
|
||||
{
|
||||
public:
|
||||
WaveSnapTest()
|
||||
{
|
||||
buffer = std::make_shared<MockRuntimeBuffer<PcSamplingRecordT>>();
|
||||
queue = std::make_shared<MockQueue<PcSamplingRecordT>>(16, buffer);
|
||||
dispatch = std::make_shared<MockDispatch<PcSamplingRecordT>>(queue);
|
||||
}
|
||||
|
||||
void Test()
|
||||
{
|
||||
FillBuffers();
|
||||
CheckBuffers();
|
||||
}
|
||||
|
||||
virtual void FillBuffers() = 0;
|
||||
virtual void CheckBuffers() = 0;
|
||||
|
||||
void genPCSample(int wave_cnt, int inst_type, int reason, int arb_issue, int arb_stall)
|
||||
{
|
||||
wave_cnt &= 0x3F;
|
||||
inst_type &= 0xF;
|
||||
reason &= 0x7;
|
||||
arb_issue &= 0xFF;
|
||||
arb_stall &= 0xFF;
|
||||
|
||||
perf_sample_snapshot_v1 snap;
|
||||
::memset(&snap, 0, sizeof(snap));
|
||||
snap.pc = dispatch->unique_id;
|
||||
snap.correlation_id = dispatch->getMockId().raw;
|
||||
|
||||
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
|
||||
snap.perf_snapshot_data |= 0x1; // sample is valid
|
||||
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
|
||||
snap.perf_snapshot_data1 = wave_cnt;
|
||||
|
||||
EXPECT_NE(dispatch.get(), nullptr);
|
||||
dispatch->submit(packet_union_t{.snap = snap});
|
||||
};
|
||||
|
||||
std::shared_ptr<MockRuntimeBuffer<PcSamplingRecordT>> buffer;
|
||||
std::shared_ptr<MockQueue<PcSamplingRecordT>> queue;
|
||||
std::shared_ptr<MockDispatch<PcSamplingRecordT>> dispatch;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Testing how mid_macro bit affects the PC address.
|
||||
*
|
||||
* On GFX950, this bit triggers correction of the PC address.
|
||||
* On other GFX9 architectures, the PC address remains unchanged.
|
||||
*/
|
||||
template <typename PcSamplingRecordT>
|
||||
class MidMacroPCCorrection : public WaveSnapTest<PcSamplingRecordT>
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override; // Explicitly mark as override
|
||||
void CheckBuffers() override; // Explicitly mark as override
|
||||
|
||||
/**
|
||||
* @brief Generate PC sample with mid_macro flag.
|
||||
* The @p mid_macro is relevant for the GFX950, so it's false by default
|
||||
*/
|
||||
virtual void genPCSample(uint64_t pc, bool mid_macro = false);
|
||||
|
||||
/**
|
||||
* @brief Caulcate expected PC address for comparison.
|
||||
*/
|
||||
virtual uint64_t calcaulteExpectedPC(uint64_t pc, bool mid_macro = false);
|
||||
|
||||
virtual std::vector<std::vector<PcSamplingRecordT>> get_parsed_data();
|
||||
|
||||
protected:
|
||||
///< testing data
|
||||
std::vector<PcSamplingRecordT> compare;
|
||||
};
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
template <>
|
||||
void
|
||||
MockRuntimeBuffer<rocprofiler_pc_sampling_record_host_trap_v0_t>::genUpcomingSamples(
|
||||
int num_samples)
|
||||
{
|
||||
genUpcomingSamples(num_samples, AMD_HOST_TRAP_V1);
|
||||
}
|
||||
|
||||
template <>
|
||||
void
|
||||
MockRuntimeBuffer<rocprofiler_pc_sampling_record_stochastic_v0_t>::genUpcomingSamples(
|
||||
int num_samples)
|
||||
{
|
||||
this->genUpcomingSamples(num_samples, AMD_SNAPSHOT_V1);
|
||||
}
|
||||
+6
-17
@@ -74,13 +74,18 @@ public:
|
||||
//! Submits a "upcoming_samples_t" packet signaling the next num_samples packets are PC samples
|
||||
void genUpcomingSamples(int num_samples);
|
||||
|
||||
std::vector<std::vector<PcSamplingRecordT>> get_parsed_buffer(int GFXIP_MAJOR)
|
||||
/**
|
||||
* @brief By default, we assume the gfx94X.
|
||||
*/
|
||||
std::vector<std::vector<PcSamplingRecordT>> get_parsed_buffer(int GFXIP_MAJOR,
|
||||
int GFXIP_MINOR = 4)
|
||||
{
|
||||
parsed_data = {};
|
||||
|
||||
CHECK_PARSER(parse_buffer((generic_sample_t*) packets.data(),
|
||||
packets.size(),
|
||||
GFXIP_MAJOR,
|
||||
GFXIP_MINOR,
|
||||
&alloc_parse_memory,
|
||||
this));
|
||||
|
||||
@@ -103,22 +108,6 @@ public:
|
||||
const uint32_t device;
|
||||
};
|
||||
|
||||
template <>
|
||||
void
|
||||
MockRuntimeBuffer<rocprofiler_pc_sampling_record_host_trap_v0_t>::genUpcomingSamples(
|
||||
int num_samples)
|
||||
{
|
||||
genUpcomingSamples(num_samples, AMD_HOST_TRAP_V1);
|
||||
}
|
||||
|
||||
template <>
|
||||
void
|
||||
MockRuntimeBuffer<rocprofiler_pc_sampling_record_stochastic_v0_t>::genUpcomingSamples(
|
||||
int num_samples)
|
||||
{
|
||||
this->genUpcomingSamples(num_samples, AMD_SNAPSHOT_V1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mimics a HSA doorbell. Every live instance of this class has an unique ID (handler).
|
||||
* The handler itself may be not unique considering dead instances.
|
||||
|
||||
+3
-3
@@ -20,12 +20,12 @@
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/tests/mocks.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <cstddef>
|
||||
|
||||
#include <future>
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp"
|
||||
#include "mocks.hpp"
|
||||
|
||||
#define GFXIP_MAJOR 9
|
||||
constexpr size_t NUM_THREADS = 8;
|
||||
|
||||
+48
-73
@@ -22,86 +22,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx11.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx9.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx950.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h"
|
||||
|
||||
#include <rocprofiler-sdk/pc_sampling.h>
|
||||
|
||||
// TODO: refactor the commented code for stochastic sampling
|
||||
|
||||
// template <typename gfx>
|
||||
// inline rocprofiler_pc_sampling_record_t
|
||||
// copyStochasticSample(const perf_sample_snapshot_v1& sample);
|
||||
|
||||
// template <>
|
||||
// inline rocprofiler_pc_sampling_record_t
|
||||
// copyStochasticSample<GFX9>(const perf_sample_snapshot_v1& sample)
|
||||
// {
|
||||
// rocprofiler_pc_sampling_record_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 26) & 0x1;
|
||||
// // Check wave_id matches snapshot_wave_id
|
||||
|
||||
// ret.flags.has_wave_cnt = true;
|
||||
// ret.flags.has_stall_reason = true;
|
||||
|
||||
// ret.wave_count = sample.perf_snapshot_data1 & 0x3F;
|
||||
|
||||
// ret.snapshot.dual_issue_valu = sample.perf_snapshot_data >> 2;
|
||||
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 3;
|
||||
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
|
||||
// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 10) & 0xFF;
|
||||
// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 18) & 0xFF;
|
||||
// ret.reserved = 0;
|
||||
// return ret;
|
||||
// }
|
||||
|
||||
// template <>
|
||||
// inline rocprofiler_pc_sampling_record_t
|
||||
// copyStochasticSample<GFX11>(const perf_sample_snapshot_v1& sample)
|
||||
// {
|
||||
// rocprofiler_pc_sampling_record_t ret = copySampleHeader<perf_sample_snapshot_v1>(sample);
|
||||
// ret.flags.valid = sample.perf_snapshot_data & (~sample.perf_snapshot_data >> 23) & 0x1;
|
||||
// // Check wave_id matches snapshot_wave_id
|
||||
|
||||
// ret.flags.has_stall_reason = true;
|
||||
|
||||
// ret.wave_issued = sample.perf_snapshot_data >> 1;
|
||||
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 2;
|
||||
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 6) & 0x7;
|
||||
// ret.snapshot.arb_state_issue = (sample.perf_snapshot_data >> 9) & 0x7F;
|
||||
// ret.snapshot.arb_state_stall = (sample.perf_snapshot_data >> 16) & 0x7F;
|
||||
// ret.snapshot.dual_issue_valu = false;
|
||||
// ret.reserved = 0;
|
||||
// return ret;
|
||||
// }
|
||||
|
||||
// #define BITSHIFT(sname) out |= ((in >> GFX::sname) & 1) << PCSAMPLE::sname
|
||||
|
||||
// template <typename GFX>
|
||||
// inline int
|
||||
// translate_arb(int in)
|
||||
// {
|
||||
// size_t out = 0;
|
||||
// BITSHIFT(ISSUE_VALU);
|
||||
// BITSHIFT(ISSUE_MATRIX);
|
||||
// BITSHIFT(ISSUE_LDS);
|
||||
// BITSHIFT(ISSUE_LDS_DIRECT);
|
||||
// BITSHIFT(ISSUE_SCALAR);
|
||||
// BITSHIFT(ISSUE_VMEM_TEX);
|
||||
// BITSHIFT(ISSUE_FLAT);
|
||||
// BITSHIFT(ISSUE_EXP);
|
||||
// BITSHIFT(ISSUE_MISC);
|
||||
// BITSHIFT(ISSUE_BRMSG);
|
||||
// return out & 0x3FF;
|
||||
// }
|
||||
|
||||
// #undef BITSHIFT
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#define LUTOVERLOAD(sname, rocp_prefix) this->operator[](GFX::sname) = rocp_prefix##_##sname
|
||||
#define LUTOVERLOAD_INST(sname) LUTOVERLOAD(sname, ROCPROFILER_PC_SAMPLING_INSTRUCTION)
|
||||
@@ -322,6 +253,20 @@ copySample<GFX9, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sam
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline rocprofiler_pc_sampling_record_host_trap_v0_t
|
||||
copySample<GFX950, rocprofiler_pc_sampling_record_host_trap_v0_t>(const void* sample)
|
||||
{
|
||||
return copySample<GFX9, rocprofiler_pc_sampling_record_host_trap_v0_t>(sample);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline rocprofiler_pc_sampling_record_stochastic_v0_t
|
||||
copySample<GFX950, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sample)
|
||||
{
|
||||
return copySample<GFX9, rocprofiler_pc_sampling_record_stochastic_v0_t>(sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Host trap V0 sample for GFX11
|
||||
*/
|
||||
@@ -348,4 +293,34 @@ copySample<GFX11, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sa
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The default implementation assumes no correction is needed.
|
||||
*/
|
||||
template <typename GFX, typename PcSamplingRecordT>
|
||||
inline rocprofiler_address_t
|
||||
correct_pc_address(const perf_sample_snapshot_v1* sample)
|
||||
{
|
||||
return rocprofiler_address_t{.value = sample->pc};
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief GFX950 specific implementation of the PC address correction.
|
||||
*/
|
||||
template <>
|
||||
inline rocprofiler_address_t
|
||||
correct_pc_address<GFX950, rocprofiler_pc_sampling_record_stochastic_v0_t>(
|
||||
const perf_sample_snapshot_v1* sample)
|
||||
{
|
||||
// If mid_macro bit is 1, then reg spec says we need to subtract 2 dwords from the PC address.
|
||||
auto mid_macro = static_cast<bool>(EXTRACT_BITS(sample->perf_snapshot_data1, 31, 31));
|
||||
if(mid_macro)
|
||||
{
|
||||
return rocprofiler_address_t{.value = sample->pc - 2 * sizeof(uint32_t)};
|
||||
}
|
||||
else
|
||||
{
|
||||
return rocprofiler_address_t{.value = sample->pc};
|
||||
}
|
||||
}
|
||||
|
||||
#undef EXTRACT_BITS
|
||||
|
||||
Ссылка в новой задаче
Block a user