SWDEV-380635: adding gfx11 architecture to rocprofiler which includes navi31 and navi32 for now
Change-Id: Ib2a93a34688471c82b5db0dc10e8da58452dba21
[ROCm/rocprofiler commit: 997c771723]
Este commit está contenido en:
cometido por
Giovanni Baraldi
padre
ac1b1dc139
commit
a260b63b96
@@ -201,7 +201,9 @@ class MetricsDict {
|
||||
std::string("gfx90a") == agent_info->name ||
|
||||
std::string("gfx1032") == agent_info->name ||
|
||||
std::string("gfx1031") == agent_info->name ||
|
||||
std::string("gfx1030") == agent_info->name){
|
||||
std::string("gfx1030") == agent_info->name ||
|
||||
std::string("gfx1100") == agent_info->name ||
|
||||
std::string("gfx1101") == agent_info->name){
|
||||
ImportMetrics(agent_info, agent_info->name);
|
||||
} else {
|
||||
agent_name_ = agent_info->gfxip;
|
||||
|
||||
@@ -425,3 +425,52 @@
|
||||
<gfx1032 base="gfx10">
|
||||
</gfx1032>
|
||||
|
||||
<gfx11>
|
||||
<metric name="GRBM_COUNT" block=GRBM event=0 descr="Tie High - Count Number of Clocks"></metric>
|
||||
<metric name="GRBM_GUI_ACTIVE" block=GRBM event=2 descr="The GUI is Active"></metric>
|
||||
|
||||
<metric name="GL2C_HIT" block=GL2C event=42 descr="Number of cache hits"></metric>
|
||||
<metric name="GL2C_MISS" block=GL2C event=43 descr="Number of cache misses. UC reads count as misses."></metric>
|
||||
<metric name="GL2C_MC_WRREQ" block=GL2C event=83 descr="Number of transactions (either 32-byte or 64-byte) going over the GL2C_EA_wrreq interface. Atomics may travel over the same interface and are generally classified as write requests. This does not include probe commands"></metric>
|
||||
<metric name="GL2C_EA_WRREQ_64B" block=GL2C event=85 descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the TC_EA_wrreq interface."></metric>
|
||||
<metric name="GL2C_MC_WRREQ_STALL" block=GL2C event=88 descr="Number of cycles a write request was stalled."></metric>
|
||||
<metric name="GL2C_MC_RDREQ" block=GL2C event=96 descr="Number of GL2C/EA read requests (either 32-byte or 64-byte or 128-byte)."></metric>
|
||||
<metric name="GL2C_EA_RDREQ_32B" block=GL2C event=99 descr="Number of 32-byte GL2C/EA read requests"></metric>
|
||||
<metric name="GL2C_EA_RDREQ_64B" block=GL2C event=100 descr="Number of 64-byte GL2C/EA read requests"></metric>
|
||||
<metric name="GL2C_EA_RDREQ_96B" block=GL2C event=101 descr="Number of 96-byte GL2C/EA read requests"></metric>
|
||||
<metric name="GL2C_EA_RDREQ_128B" block=GL2C event=102 descr="Number of 128-byte GL2C/EA read requests"></metric>
|
||||
|
||||
<metric name="SQ_WAVES" block=SQ event=4 descr="Count number of waves sent to SQs. {emulated, global, C1}"></metric>
|
||||
<metric name="SQ_WAVE_CYCLES" block=SQ event=24 descr="Number of clock cycles spent by waves in the SQs. Incremented by number of living (valid) waves each cycle. {nondeterministic, C1}"></metric>
|
||||
<metric name="SQ_WAIT_INST_ANY" block=SQ event=26 descr="Number of clock-cycles spent waiting for any instruction issue. In units of cycles. (nondeterministic)"></metric>
|
||||
<metric name="SQ_WAIT_ANY" block=SQ event=35 descr="Number of wave-cycles spent waiting for anything (nondeterministic, C1)"></metric>
|
||||
<metric name="SQ_INSTS_WAVE32" block=SQ event=70 descr="Number of wave32 instructions issued, for flat, lds, valu, tex. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_WAVE32_LDS" block=SQ event=72 descr="Number of wave32 LDS indexed instructions issued. Wave64 may count 1 or 2, depending on what gets issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_WAVE32_VALU" block=SQ event=73 descr="Number of wave32 valu instructions issued. Wave64 may count 1 or 2, depending on what gets issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_WAVE32_INSTS" block=SQ event=82 descr="Number of instructions issued by wave32 waves. Skipped instructions are not counted. {emulated}"></metric>
|
||||
<metric name="SQ_WAVE64_INSTS" block=SQ event=83 descr="Number of instructions issued by wave64 waves. Skipped instructions are not counted. {emulated}"></metric>
|
||||
<metric name="SQ_INST_LEVEL_GDS" block=SQ event=87 descr="Number of in-flight GDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_GDS for average latency. {level, nondeterministic, C1}"></metric>
|
||||
<metric name="SQ_INST_LEVEL_LDS" block=SQ event=88 descr="Number of in-flight LDS instructions. Set next counter to ACCUM_PREV and divide by INSTS_LDS for average latency. Includes FLAT instructions. {level, nondeterministic, C1}"></metric>
|
||||
<metric name="SQ_INST_CYCLES_VMEM" block=SQ event=106 descr="Number of cycles needed to send addr and data for VMEM (lds, buffer, image, flat, scratch, global) instructions, windowed by perf_en. {emulated, C1}"></metric>
|
||||
<metric name="SQC_LDS_BANK_CONFLICT" block=SQ event=256 descr="Number of cycles LDS is stalled by bank conflicts. (emulated, C1)"></metric>
|
||||
<metric name="SQC_LDS_IDX_ACTIVE" block=SQ event=261 descr="Number of cycles LDS is used for indexed (non-direct,non-interpolation) operations. {per-simd, emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_VALU" block=SQ event=62 descr="Number of VALU instructions issued excluding skipped instructions. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_SALU" block=SQ event=58 descr="Number of SALU instructions issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_SMEM" block=SQ event=59 descr="Number of SMEM instructions issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_FLAT" block=SQ event=56 descr="Number of FLAT instructions issued. {emulated, C2}"></metric>
|
||||
<metric name="SQ_INSTS_LDS" block=SQ event=57 descr="Number of LDS indexed instructions issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_GDS" block=SQ event=54 descr="Number of GDS instructions issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_TEX_LOAD" block=SQ event=66 descr="Number of buffer load, image load, sample, or atomic (with return) instructions issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_INSTS_TEX_STORE" block=SQ event=67 descr="Number of buffer store, image store, or atomic (without return) instructions issued. {emulated, C1}"></metric>
|
||||
<metric name="SQ_WAIT_INST_LDS" block=SQ event=29 descr="Number of clock cycles spent waiting for LDS (indexed) instruction issue. In units of cycles. {nondeterministic, C1}"></metric>
|
||||
<metric name="TA_TA_BUSY" block=TA event=15 descr="TA block is busy. Perf_Windowing not supported for this counter."></metric>
|
||||
<metric name="TA_BUFFER_LOAD_WAVEFRONTS" block=TA event=45 descr="Number of buffer load vec32 packets processed by TA"></metric>
|
||||
<metric name="TA_BUFFER_STORE_WAVEFRONTS" block=TA event=46 descr="Number of buffer store vec32 packets processed by TA"></metric>
|
||||
|
||||
</gfx11>
|
||||
|
||||
<gfx1100 base="gfx11">
|
||||
</gfx1100>
|
||||
|
||||
<gfx1101 base="gfx11">
|
||||
</gfx1101>
|
||||
|
||||
@@ -239,6 +239,39 @@
|
||||
<metric name="LDSBankConflict" expr=100*SQC_LDS_BANK_CONFLICT/SQC_LDS_IDX_ACTIVE descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."></metric>
|
||||
</gfx10_expr>
|
||||
|
||||
<gfx11_expr>
|
||||
<metric name="GPU_UTIL" expr=100*GRBM_GUI_ACTIVE/GRBM_COUNT descr="Percentage of the time that GUI is active"></metric>
|
||||
<metric name="WAVE_DEP_WAIT" expr=100*SQ_WAIT_ANY/SQ_WAVE_CYCLES descr="Percentage of the SQ_WAVE_CYCLE time spent waiting for anything."></metric>
|
||||
<metric name="WAVE_ISSUE_WAIT" expr=100*SQ_WAIT_INST_ANY/SQ_WAVE_CYCLES descr="Percentage of the SQ_WAVE_CYCLE time spent waiting for any instruction issue."></metric>
|
||||
|
||||
<metric name="TA_BUSY_avr" expr=avr(TA_TA_BUSY,16) descr="TA block is busy. Average over TA instances."></metric>
|
||||
<metric name="TA_BUSY_max" expr=max(TA_TA_BUSY,16) descr="TA block is busy. Max over TA instances."></metric>
|
||||
<metric name="TA_BUSY_min" expr=min(TA_TA_BUSY,16) descr="TA block is busy. Min over TA instances."></metric>
|
||||
<metric name="TA_BUFFER_LOAD_WAVEFRONTS_sum" expr=sum(TA_BUFFER_LOAD_WAVEFRONTS,16) descr="Number of buffer load vec32 packets processed by the TA. Sum over TA instances."></metric>
|
||||
<metric name="TA_BUFFER_STORE_WAVEFRONTS_sum" expr=sum(TA_BUFFER_STORE_WAVEFRONTS,16) descr="Number of buffer store vec32 packets processed by the TA. Sum over TA instances."></metric>
|
||||
|
||||
<metric name="GL2C_HIT_sum" expr=sum(GL2C_HIT,16) descr="Number of cache hits. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_MISS_sum" expr=sum(GL2C_MISS,16) descr="Number of cache misses. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_EA_RDREQ_32B_sum" expr=sum(GL2C_EA_RDREQ_32B,16) descr="Number of 32-byte GL2C/EA read requests. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_EA_RDREQ_64B_sum" expr=sum(GL2C_EA_RDREQ_64B,16) descr="Number of 64-byte GL2C/EA read requests. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_EA_RDREQ_96B_sum" expr=sum(GL2C_EA_RDREQ_96B,16) descr="Number of 96-byte GL2C/EA read requests. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_EA_RDREQ_128B_sum" expr=sum(GL2C_EA_RDREQ_128B,16) descr="Number of 128-byte GL2C/EA read requests. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_MC_RDREQ_sum" expr=sum(GL2C_MC_RDREQ,16) descr="Number of GL2C/EA read requests (either 32-byte or 64-byte or 128-byte). Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_MC_WRREQ_sum" expr=sum(GL2C_MC_WRREQ,16) descr="Number of transactions (either 32-byte or 64-byte) going over the GL2C_MC_wrreq interface. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_EA_WRREQ_64B_sum" expr=sum(GL2C_EA_WRREQ_64B,16) descr="Number of 64-byte transactions going (64-byte write or CMPSWAP) over the GL2C_EA_wrreq interface. Sum over GL2C instances."></metric>
|
||||
<metric name="GL2C_WRREQ_STALL_max" expr=max(GL2C_MC_WRREQ_STALL,16) descr="Number of cycles a write request was stalled. Max over GL2C instances."></metric>
|
||||
<metric name="L2CacheHit" expr=100*sum(GL2C_HIT,16)/(sum(GL2C_HIT,16)+sum(GL2C_MISS,16)) descr="The percentage of fetch, write, atomic, and other instructions that hit the data in L2 cache. Value range: 0% (no hit) to 100% (optimal)."></metric>
|
||||
<metric name="FETCH_SIZE" expr=(GL2C_EA_RDREQ_32B_sum*32+GL2C_EA_RDREQ_64B_sum*64+GL2C_EA_RDREQ_96B_sum*96+GL2C_EA_RDREQ_128B_sum*128)/1024 descr="The total kilobytes fetched from the video memory. This is measured with all extra fetches and any cache or memory effects taken into account."></metric>
|
||||
<metric name="WriteUnitStalled" expr=100*GL2C_WRREQ_STALL_max/GRBM_GUI_ACTIVE descr="The percentage of GPUTime the Write unit is stalled. Value range: 0% to 100% (bad)."></metric>
|
||||
<metric name="LDSBankConflict" expr=100*SQC_LDS_BANK_CONFLICT/SQC_LDS_IDX_ACTIVE descr="The percentage of GPUTime LDS is stalled by bank conflicts. Value range: 0% (optimal) to 100% (bad)."></metric>
|
||||
</gfx11_expr>
|
||||
|
||||
<gfx1100_expr base="gfx11_expr">
|
||||
</gfx1100_expr>
|
||||
|
||||
<gfx1101_expr base="gfx11_expr">
|
||||
</gfx1101_expr>
|
||||
|
||||
<gfx1030_expr base="gfx10_expr">
|
||||
</gfx1030_expr>
|
||||
|
||||
@@ -264,6 +297,10 @@
|
||||
|
||||
<gfx1032 base="gfx1032_expr"></gfx1032>
|
||||
|
||||
<gfx1100 base="gfx1100_expr"></gfx1100>
|
||||
|
||||
<gfx1101 base="gfx1101_expr"></gfx1101>
|
||||
|
||||
<global>
|
||||
# GPUBusy The percentage of time GPU was busy.
|
||||
<metric
|
||||
|
||||
Referencia en una nueva incidencia
Block a user