Adding support for GFX 11.5 in AQL Profiler (#2340)

* Adding support of AQL Profiler for GFX 11.5

* Removing hard coded value for sa_number

* Adding instance count for WGP block, removing hard coded values.

* Fixed SQ counter block and TD counter block instances
Bu işleme şunda yer alıyor:
moniljethva
2026-01-29 17:39:12 +05:30
işlemeyi yapan: GitHub
ebeveyn 190d9a8e27
işleme b5e4074c78
7 değiştirilmiş dosya ile 21 ekleme ve 10 silme
+1 -1
Dosyayı Görüntüle
@@ -64,7 +64,7 @@ if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151"; fi
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151 gfx1152 gfx1153"; fi
AQLPROFILE_ROOT=$(cd $AQLPROFILE_ROOT && echo $PWD)
+2 -2
Dosyayı Görüntüle
@@ -325,7 +325,7 @@ static const GpuBlockInfo SqCounterBlockInfo = {
SqCounterBlockNumCounters,
SqCounterRegAddr,
gfx11_cntx_prim::sq_select_value,
CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr,
CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr | CounterBlockWgpAttr,
BLOCK_DELAY_NONE};
// Counter block SX
static const GpuBlockInfo SxCounterBlockInfo = {
@@ -358,7 +358,7 @@ static const GpuBlockInfo TcpCounterBlockInfo = {
TcpCounterBlockNumCounters,
TcpCounterRegAddr,
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr,
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockWgpAttr,
BLOCK_DELAY_NONE};
// Counter block GL1A
static const GpuBlockInfo Gl1aCounterBlockInfo = {
+2 -3
Dosyayı Görüntüle
@@ -600,6 +600,7 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof
const bool is_concurrent = pm4_factory->IsConcurrent();
const uint32_t xcc_num = pm4_factory->GetXccNumber();
const uint32_t se_number = pm4_factory->GetShaderEnginesNumber() / xcc_num;
const uint32_t sa_number = pm4_factory->GetShaderArraysNumber();
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
uint64_t* samples = reinterpret_cast<uint64_t*>(profile->output_buffer.ptr);
@@ -648,11 +649,9 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSeAttr)
block_samples_count *= se_number;
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSaAttr)
block_samples_count *= 2;
block_samples_count *= sa_number;
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockWgpAttr)
block_samples_count *= pm4_factory->GetNumWGPs();
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSqAttr && pm4_factory->IsGFX11())
block_samples_count *= pm4_factory->GetNumWGPs();
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
hsa_ven_amd_aqlprofile_info_data_t sample_info;
+4
Dosyayı Görüntüle
@@ -123,7 +123,11 @@ class EventAttribDimension {
if (num_aid > 1) dimensions.push_back({"AID", num_aid});
if (workgroup_processor)
{
dimensions.push_back({"WGP", wgp_num});
if(bIsGFX11)
dimensions.push_back({"INSTANCE", block_instance_count});
}
else
dimensions.push_back({"INSTANCE", block_instance_count});
+9
Dosyayı Görüntüle
@@ -50,6 +50,15 @@ Gfx115xFactory::Gfx115xFactory(const AgentInfo* agent_info)
case Gl2cCounterBlockId:
block_info->instance_count = 4;
break;
case TcpCounterBlockId:
block_info->instance_count = 2;
break;
case TaCounterBlockId:
block_info->instance_count = 2;
break;
case TdCounterBlockId:
block_info->instance_count = 2;
break;
default:
break;
}
+2 -3
Dosyayı Görüntüle
@@ -191,17 +191,16 @@ class Pm4Factory {
virtual size_t GetNumEvents(uint32_t block_name) const {
size_t se_number = GetShaderEnginesNumber() / GetXccNumber();
size_t sa_number = GetShaderArraysNumber();
size_t block_samples_count = 1;
auto* block_info = GetBlockInfo(block_name);
if (block_info->attr & CounterBlockSeAttr)
block_samples_count *= se_number;
if (block_info->attr & CounterBlockSaAttr)
block_samples_count *= 2;
block_samples_count *= sa_number;
if (block_info->attr & CounterBlockWgpAttr)
block_samples_count *= GetNumWGPs();
if ((block_info->attr & CounterBlockSqAttr) && IsGFX11()) // TODO: Move to CounterBlockWgpAttr
block_samples_count *= GetNumWGPs();
return block_samples_count;
}
+1 -1
Dosyayı Görüntüle
@@ -577,7 +577,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
// skip
} else {
const uint32_t se_end_index = (block_info->attr & CounterBlockSeAttr) ? se_number_ : 1;
const uint32_t sa_end_index = (block_info->attr & CounterBlockSaAttr) ? 2 : 1;
const uint32_t sa_end_index = (block_info->attr & CounterBlockSaAttr) ? sarrays_per_se : 1;
for (uint32_t se_index = 0; se_index < se_end_index; ++se_index)
for (uint32_t sarray = 0; sarray < sa_end_index; ++sarray) {
uint32_t grbm_value = Primitives::grbm_broadcast_value();