Adding support for GFX 11.5 in AQL Profiler (#2340)
* Adding support of AQL Profiler for GFX 11.5 * Removing hard coded value for sa_number * Adding instance count for WGP block, removing hard coded values. * Fixed SQ counter block and TD counter block instances
这个提交包含在:
@@ -64,7 +64,7 @@ if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151"; fi
|
||||
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx942 gfx950 gfx1030 gfx1100 gfx1101 gfx1102 gfx1031 gfx1150 gfx1151 gfx1152 gfx1153"; fi
|
||||
|
||||
AQLPROFILE_ROOT=$(cd $AQLPROFILE_ROOT && echo $PWD)
|
||||
|
||||
|
||||
@@ -325,7 +325,7 @@ static const GpuBlockInfo SqCounterBlockInfo = {
|
||||
SqCounterBlockNumCounters,
|
||||
SqCounterRegAddr,
|
||||
gfx11_cntx_prim::sq_select_value,
|
||||
CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr,
|
||||
CounterBlockSeAttr | CounterBlockSqAttr | CounterBlockSaAttr | CounterBlockWgpAttr,
|
||||
BLOCK_DELAY_NONE};
|
||||
// Counter block SX
|
||||
static const GpuBlockInfo SxCounterBlockInfo = {
|
||||
@@ -358,7 +358,7 @@ static const GpuBlockInfo TcpCounterBlockInfo = {
|
||||
TcpCounterBlockNumCounters,
|
||||
TcpCounterRegAddr,
|
||||
gfx11_cntx_prim::select_value_TCP_PERFCOUNTER0_SELECT,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr,
|
||||
CounterBlockDfltAttr | CounterBlockSeAttr | CounterBlockSaAttr | CounterBlockWgpAttr,
|
||||
BLOCK_DELAY_NONE};
|
||||
// Counter block GL1A
|
||||
static const GpuBlockInfo Gl1aCounterBlockInfo = {
|
||||
|
||||
@@ -600,6 +600,7 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof
|
||||
const bool is_concurrent = pm4_factory->IsConcurrent();
|
||||
const uint32_t xcc_num = pm4_factory->GetXccNumber();
|
||||
const uint32_t se_number = pm4_factory->GetShaderEnginesNumber() / xcc_num;
|
||||
const uint32_t sa_number = pm4_factory->GetShaderArraysNumber();
|
||||
|
||||
if (profile->type == HSA_VEN_AMD_AQLPROFILE_EVENT_TYPE_PMC) {
|
||||
uint64_t* samples = reinterpret_cast<uint64_t*>(profile->output_buffer.ptr);
|
||||
@@ -648,11 +649,9 @@ hsa_ven_amd_aqlprofile_iterate_data(const hsa_ven_amd_aqlprofile_profile_t* prof
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSeAttr)
|
||||
block_samples_count *= se_number;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSaAttr)
|
||||
block_samples_count *= 2;
|
||||
block_samples_count *= sa_number;
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockWgpAttr)
|
||||
block_samples_count *= pm4_factory->GetNumWGPs();
|
||||
if (pm4_factory->GetBlockInfo(p)->attr & CounterBlockSqAttr && pm4_factory->IsGFX11())
|
||||
block_samples_count *= pm4_factory->GetNumWGPs();
|
||||
|
||||
for (uint32_t blk = 0; blk < block_samples_count; ++blk) {
|
||||
hsa_ven_amd_aqlprofile_info_data_t sample_info;
|
||||
|
||||
@@ -123,7 +123,11 @@ class EventAttribDimension {
|
||||
if (num_aid > 1) dimensions.push_back({"AID", num_aid});
|
||||
|
||||
if (workgroup_processor)
|
||||
{
|
||||
dimensions.push_back({"WGP", wgp_num});
|
||||
if(bIsGFX11)
|
||||
dimensions.push_back({"INSTANCE", block_instance_count});
|
||||
}
|
||||
else
|
||||
dimensions.push_back({"INSTANCE", block_instance_count});
|
||||
|
||||
|
||||
@@ -50,6 +50,15 @@ Gfx115xFactory::Gfx115xFactory(const AgentInfo* agent_info)
|
||||
case Gl2cCounterBlockId:
|
||||
block_info->instance_count = 4;
|
||||
break;
|
||||
case TcpCounterBlockId:
|
||||
block_info->instance_count = 2;
|
||||
break;
|
||||
case TaCounterBlockId:
|
||||
block_info->instance_count = 2;
|
||||
break;
|
||||
case TdCounterBlockId:
|
||||
block_info->instance_count = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -191,17 +191,16 @@ class Pm4Factory {
|
||||
|
||||
virtual size_t GetNumEvents(uint32_t block_name) const {
|
||||
size_t se_number = GetShaderEnginesNumber() / GetXccNumber();
|
||||
size_t sa_number = GetShaderArraysNumber();
|
||||
size_t block_samples_count = 1;
|
||||
auto* block_info = GetBlockInfo(block_name);
|
||||
|
||||
if (block_info->attr & CounterBlockSeAttr)
|
||||
block_samples_count *= se_number;
|
||||
if (block_info->attr & CounterBlockSaAttr)
|
||||
block_samples_count *= 2;
|
||||
block_samples_count *= sa_number;
|
||||
if (block_info->attr & CounterBlockWgpAttr)
|
||||
block_samples_count *= GetNumWGPs();
|
||||
if ((block_info->attr & CounterBlockSqAttr) && IsGFX11()) // TODO: Move to CounterBlockWgpAttr
|
||||
block_samples_count *= GetNumWGPs();
|
||||
return block_samples_count;
|
||||
}
|
||||
|
||||
|
||||
@@ -577,7 +577,7 @@ class GpuPmcBuilder : public PmcBuilder, protected Primitives {
|
||||
// skip
|
||||
} else {
|
||||
const uint32_t se_end_index = (block_info->attr & CounterBlockSeAttr) ? se_number_ : 1;
|
||||
const uint32_t sa_end_index = (block_info->attr & CounterBlockSaAttr) ? 2 : 1;
|
||||
const uint32_t sa_end_index = (block_info->attr & CounterBlockSaAttr) ? sarrays_per_se : 1;
|
||||
for (uint32_t se_index = 0; se_index < se_end_index; ++se_index)
|
||||
for (uint32_t sarray = 0; sarray < sa_end_index; ++sarray) {
|
||||
uint32_t grbm_value = Primitives::grbm_broadcast_value();
|
||||
|
||||
在新工单中引用
屏蔽一个用户