From b5e4074c7883c97ea13ea2ad63fa34367906d439 Mon Sep 17 00:00:00 2001 From: moniljethva Date: Thu, 29 Jan 2026 17:39:12 +0530 Subject: [PATCH] Adding support for GFX 11.5 in AQL Profiler (#2340) * Adding support of AQL Profiler for GFX 11.5 * Removing hard coded value for sa_number * Adding instance count for WGP block, removing hard coded values. * Fixed SQ counter block and TD counter block instances --- projects/aqlprofile/build.sh | 2 +- projects/aqlprofile/gfxip/gfx11/gfx11_block_table.h | 4 ++-- projects/aqlprofile/src/core/aql_profile.cpp | 5 ++--- projects/aqlprofile/src/core/counter_dimensions.hpp | 4 ++++ projects/aqlprofile/src/core/gfx115x_factory.cpp | 9 +++++++++ projects/aqlprofile/src/core/pm4_factory.h | 5 ++--- projects/aqlprofile/src/pm4/pmc_builder.h | 2 +- 7 files changed, 21 insertions(+), 10 deletions(-) diff --git a/projects/aqlprofile/build.sh b/projects/aqlprofile/build.sh index 0de00a2c7e..5b2a504cb3 100755 --- a/projects/aqlprofile/build.sh +++ b/projects/aqlprofile/build.sh @@ -64,7 +64,7 @@ if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi -if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151"; fi +if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151 gfx1152 gfx1153"; fi AQLPROFILE_ROOT=$(cd $AQLPROFILE_ROOT && echo $PWD) diff --git a/projects/aqlprofile/gfxip/gfx11/gfx11_block_table.h b/projects/aqlprofile/gfxip/gfx11/gfx11_block_table.h index 9ca6f9a900..f9e6f19d20 100644 --- a/projects/aqlprofile/gfxip/gfx11/gfx11_block_table.h +++ b/projects/aqlprofile/gfxip/gfx11/gfx11_block_table.h @@ -325,7 +325,7 @@ static const GpuBlockInfo SqCounterBlockInfo = { SqCounterBlockNumCounters, SqCounterRegAddr, gfx11_cntx_prim::sq_select_value, - CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr, + CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr | CounterBlockWgpAttr, BLOCK_DELAY_NONE}; // Counter block SX static const GpuBlockInfo SxCounterBlockInfo = { @@ -358,7 +358,7 @@ static const GpuBlockInfo TcpCounterBlockInfo = { TcpCounterBlockNumCounters, TcpCounterRegAddr, gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT, - CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr, + CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockWgpAttr, BLOCK_DELAY_NONE}; // Counter block GL1A static const GpuBlockInfo Gl1aCounterBlockInfo = { diff --git a/projects/aqlprofile/src/core/aql_profile.cpp b/projects/aqlprofile/src/core/aql_profile.cpp index eb213140ff..73199900b6 100644 --- a/projects/aqlprofile/src/core/aql_profile.cpp +++ b/projects/aqlprofile/src/core/aql_profile.cpp @@ -600,6 +600,7 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof const bool is_concurrent = pm4_factory->IsConcurrent(); const uint32_t xcc_num = pm4_factory->GetXccNumber(); const uint32_t se_number = pm4_factory->GetShaderEnginesNumber() / xcc_num; + const uint32_t sa_number = pm4_factory->GetShaderArraysNumber(); if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) { uint64_t* samples = reinterpret_cast(profile->output_buffer.ptr); @@ -648,11 +649,9 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSeAttr) block_samples_count *= se_number; if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSaAttr) - block_samples_count *= 2; + block_samples_count *= sa_number; if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockWgpAttr) block_samples_count *= pm4_factory->GetNumWGPs(); - if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSqAttr && pm4_factory->IsGFX11()) - block_samples_count *= pm4_factory->GetNumWGPs(); for (uint32_t blk = 0; blk < block_samples_count; ++blk) { hsa_ven_amd_aqlprofile_info_data_t sample_info; diff --git a/projects/aqlprofile/src/core/counter_dimensions.hpp b/projects/aqlprofile/src/core/counter_dimensions.hpp index 5b404321c9..a227cbaa15 100644 --- a/projects/aqlprofile/src/core/counter_dimensions.hpp +++ b/projects/aqlprofile/src/core/counter_dimensions.hpp @@ -123,7 +123,11 @@ class EventAttribDimension { if (num_aid > 1) dimensions.push_back({"AID", num_aid}); if (workgroup_processor) + { dimensions.push_back({"WGP", wgp_num}); + if(bIsGFX11) + dimensions.push_back({"INSTANCE", block_instance_count}); + } else dimensions.push_back({"INSTANCE", block_instance_count}); diff --git a/projects/aqlprofile/src/core/gfx115x_factory.cpp b/projects/aqlprofile/src/core/gfx115x_factory.cpp index f16c888281..e413c33f16 100644 --- a/projects/aqlprofile/src/core/gfx115x_factory.cpp +++ b/projects/aqlprofile/src/core/gfx115x_factory.cpp @@ -50,6 +50,15 @@ Gfx115xFactory::Gfx115xFactory(const AgentInfo* agent_info) case Gl2cCounterBlockId: block_info->instance_count = 4; break; + case TcpCounterBlockId: + block_info->instance_count = 2; + break; + case TaCounterBlockId: + block_info->instance_count = 2; + break; + case TdCounterBlockId: + block_info->instance_count = 2; + break; default: break; } diff --git a/projects/aqlprofile/src/core/pm4_factory.h b/projects/aqlprofile/src/core/pm4_factory.h index cfa5ca58f9..5c1e48adf7 100644 --- a/projects/aqlprofile/src/core/pm4_factory.h +++ b/projects/aqlprofile/src/core/pm4_factory.h @@ -191,17 +191,16 @@ class Pm4Factory { virtual size_t GetNumEvents(uint32_t block_name) const { size_t se_number = GetShaderEnginesNumber() / GetXccNumber(); + size_t sa_number = GetShaderArraysNumber(); size_t block_samples_count = 1; auto* block_info = GetBlockInfo(block_name); if (block_info->attr & CounterBlockSeAttr) block_samples_count *= se_number; if (block_info->attr & CounterBlockSaAttr) - block_samples_count *= 2; + block_samples_count *= sa_number; if (block_info->attr & CounterBlockWgpAttr) block_samples_count *= GetNumWGPs(); - if ((block_info->attr & CounterBlockSqAttr) && IsGFX11()) // TODO: Move to CounterBlockWgpAttr - block_samples_count *= GetNumWGPs(); return block_samples_count; } diff --git a/projects/aqlprofile/src/pm4/pmc_builder.h b/projects/aqlprofile/src/pm4/pmc_builder.h index 96052dfd19..22609bb71d 100644 --- a/projects/aqlprofile/src/pm4/pmc_builder.h +++ b/projects/aqlprofile/src/pm4/pmc_builder.h @@ -577,7 +577,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives { // skip } else { const uint32_t se_end_index = (block_info->attr & CounterBlockSeAttr) ? se_number_ : 1; - const uint32_t sa_end_index = (block_info->attr & CounterBlockSaAttr) ? 2 : 1; + const uint32_t sa_end_index = (block_info->attr & CounterBlockSaAttr) ? sarrays_per_se : 1; for (uint32_t se_index = 0; se_index < se_end_index; ++se_index) for (uint32_t sarray = 0; sarray < sa_end_index; ++sarray) { uint32_t grbm_value = Primitives::grbm_broadcast_value();