Profiler - Add CPC and CPF metrics

Change-Id: I27fd725e9e1868c9afe7624d6e4aafad2a42d47e
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>


[ROCm/rdc commit: 51de344be7]
Bu işleme şunda yer alıyor:
Galantsev, Dmitrii
2025-03-27 18:07:13 +00:00
işlemeyi yapan: Galantsev, Dmitrii
ebeveyn fe868f6763
işleme bfee4ae9ee
4 değiştirilmiş dosya ile 147 ekleme ve 6 silme
+38 -3
Dosyayı Görüntüle
@@ -133,21 +133,56 @@ FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves",
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false)
FLD_DESC_ENT(RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, "Percent of Active Pipe Tensors", "TENSOR_PERCENT", false)
FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL_PERCENT, "Percent of GPU Utilization", "GPU_UTIL_PERCENT", false)
// metrics below are divided by time passed
// metrics with EVAL are divided by time passed
FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_R_BW, "Fetched from video memory kb / ms", "MEM_R_BW", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_W_BW, "Written to video memory kb / ms", "MEM_W_BW", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16, "Number of fp16 OPS / ms", "FLOPS_16", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32, "Number of fp32 OPS / ms", "FLOPS_32", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64, "Number of fp64 OPS / ms", "FLOPS_64", false)
// metrics below are not divided by time passed
FLD_DESC_ENT(RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "Percent of Active Pipe VALU", "VALU_UTILIZATION", false)
FLD_DESC_ENT(RDC_FI_PROF_SM_ACTIVE, "Ratio of Cycles with active warp on SM","VALUBusy", false)
FLD_DESC_ENT(RDC_FI_PROF_OCC_PER_ACTIVE_CU, "Mean occ per active compute unit", "OCC_CU", false)
FLD_DESC_ENT(RDC_FI_PROF_OCC_ELAPSED, "Mean occ per active cu over elapsed", "OCC_CU_ELAPSED", false)
// metrics below are divided by time passed
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16_PERCENT, "Number of fp16 OPS percent of max", "FLOPS_16_PERCENT", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32_PERCENT, "Number of fp32 OPS percent of max", "FLOPS_32_PERCENT", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64_PERCENT, "Number of fp64 OPS percent of max", "FLOPS_64_PERCENT", false)
// CPC
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_STAT_BUSY, "", "CPC_CPC_STAT_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_STAT_IDLE, "", "CPC_CPC_STAT_IDLE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_STAT_STALL, "", "CPC_CPC_STAT_STALL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "", "CPC_CPC_TCIU_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "", "CPC_CPC_TCIU_IDLE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "", "CPC_CPC_UTCL2IU_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "", "CPC_CPC_UTCL2IU_IDLE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "", "CPC_CPC_UTCL2IU_STALL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "", "CPC_ME1_BUSY_FOR_PACKET_DECODE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "", "CPC_ME1_DC0_SPI_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "", "CPC_UTCL1_STALL_ON_TRANSLATION", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_ALWAYS_COUNT, "", "CPC_ALWAYS_COUNT", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "", "CPC_ADC_VALID_CHUNK_NOT_AVAIL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "", "CPC_ADC_DISPATCH_ALLOC_DONE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "", "CPC_ADC_VALID_CHUNK_END", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "", "CPC_SYNC_FIFO_FULL_LEVEL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "", "CPC_SYNC_FIFO_FULL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_GD_BUSY, "", "CPC_GD_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_TG_SEND, "", "CPC_TG_SEND", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "", "CPC_WALK_NEXT_CHUNK", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "", "CPC_STALLED_BY_SE0_SPI", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "", "CPC_STALLED_BY_SE1_SPI", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "", "CPC_STALLED_BY_SE2_SPI", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "", "CPC_STALLED_BY_SE3_SPI", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_LTE_ALL, "", "CPC_LTE_ALL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "", "CPC_SYNC_WRREQ_FIFO_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CANE_BUSY, "", "CPC_CANE_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPC_CANE_STALL, "", "CPC_CANE_STALL", false)
// CPF
FLD_DESC_ENT(RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "", "CPF_CMP_UTCL1_STALL_ON_TRANSLATION", false)
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_STAT_BUSY, "", "CPF_CPF_STAT_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_STAT_IDLE, "", "CPF_CPF_STAT_IDLE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_STAT_STALL, "", "CPF_CPF_STAT_STALL", false)
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "", "CPF_CPF_TCIU_BUSY", false)
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "", "CPF_CPF_TCIU_IDLE", false)
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_STALL, "", "CPF_CPF_TCIU_STALL", false)
// Events
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
+38 -3
Dosyayı Görüntüle
@@ -290,21 +290,56 @@ typedef enum {
RDC_FI_PROF_ELAPSED_CYCLES,
RDC_FI_PROF_TENSOR_ACTIVE_PERCENT,
RDC_FI_PROF_GPU_UTIL_PERCENT,
// metrics below are divided by time passed
// metrics with EVAL are divided by time passed
RDC_FI_PROF_EVAL_MEM_R_BW,
RDC_FI_PROF_EVAL_MEM_W_BW,
RDC_FI_PROF_EVAL_FLOPS_16,
RDC_FI_PROF_EVAL_FLOPS_32,
RDC_FI_PROF_EVAL_FLOPS_64,
// metrics below are not divided by time passed
RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL,
RDC_FI_PROF_SM_ACTIVE,
RDC_FI_PROF_OCC_PER_ACTIVE_CU,
RDC_FI_PROF_OCC_ELAPSED,
// metrics below are divided by time passed
RDC_FI_PROF_EVAL_FLOPS_16_PERCENT,
RDC_FI_PROF_EVAL_FLOPS_32_PERCENT,
RDC_FI_PROF_EVAL_FLOPS_64_PERCENT,
// CPC
RDC_FI_PROF_CPC_CPC_STAT_BUSY,
RDC_FI_PROF_CPC_CPC_STAT_IDLE,
RDC_FI_PROF_CPC_CPC_STAT_STALL,
RDC_FI_PROF_CPC_CPC_TCIU_BUSY,
RDC_FI_PROF_CPC_CPC_TCIU_IDLE,
RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY,
RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE,
RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL,
RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE,
RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY,
RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION,
RDC_FI_PROF_CPC_ALWAYS_COUNT,
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL,
RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE,
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END,
RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL,
RDC_FI_PROF_CPC_SYNC_FIFO_FULL,
RDC_FI_PROF_CPC_GD_BUSY,
RDC_FI_PROF_CPC_TG_SEND,
RDC_FI_PROF_CPC_WALK_NEXT_CHUNK,
RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI,
RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI,
RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI,
RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI,
RDC_FI_PROF_CPC_LTE_ALL,
RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY,
RDC_FI_PROF_CPC_CANE_BUSY,
RDC_FI_PROF_CPC_CANE_STALL,
// CPF
RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION,
RDC_FI_PROF_CPF_CPF_STAT_BUSY,
RDC_FI_PROF_CPF_CPF_STAT_IDLE,
RDC_FI_PROF_CPF_CPF_STAT_STALL,
RDC_FI_PROF_CPF_CPF_TCIU_BUSY,
RDC_FI_PROF_CPF_CPF_TCIU_IDLE,
RDC_FI_PROF_CPF_CPF_TCIU_STALL,
/**
* @brief Raw XGMI counter events
+35
Dosyayı Görüntüle
@@ -177,6 +177,41 @@ class rdc_field_t(c_int):
RDC_FI_PROF_EVAL_FLOPS_16_PERCENT = 815
RDC_FI_PROF_EVAL_FLOPS_32_PERCENT = 816
RDC_FI_PROF_EVAL_FLOPS_64_PERCENT = 817
RDC_FI_PROF_CPC_CPC_STAT_BUSY = 818
RDC_FI_PROF_CPC_CPC_STAT_IDLE = 819
RDC_FI_PROF_CPC_CPC_STAT_STALL = 820
RDC_FI_PROF_CPC_CPC_TCIU_BUSY = 821
RDC_FI_PROF_CPC_CPC_TCIU_IDLE = 822
RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY = 823
RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE = 824
RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL = 825
RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE = 826
RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY = 827
RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION = 828
RDC_FI_PROF_CPC_ALWAYS_COUNT = 829
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL = 830
RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE = 831
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END = 832
RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL = 833
RDC_FI_PROF_CPC_SYNC_FIFO_FULL = 834
RDC_FI_PROF_CPC_GD_BUSY = 835
RDC_FI_PROF_CPC_TG_SEND = 836
RDC_FI_PROF_CPC_WALK_NEXT_CHUNK = 837
RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI = 838
RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI = 839
RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI = 840
RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI = 841
RDC_FI_PROF_CPC_LTE_ALL = 842
RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY = 843
RDC_FI_PROF_CPC_CANE_BUSY = 844
RDC_FI_PROF_CPC_CANE_STALL = 845
RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION = 846
RDC_FI_PROF_CPF_CPF_STAT_BUSY = 847
RDC_FI_PROF_CPF_CPF_STAT_IDLE = 848
RDC_FI_PROF_CPF_CPF_STAT_STALL = 849
RDC_FI_PROF_CPF_CPF_TCIU_BUSY = 850
RDC_FI_PROF_CPF_CPF_TCIU_IDLE = 851
RDC_FI_PROF_CPF_CPF_TCIU_STALL = 852
RDC_EVNT_XGMI_0_NOP_TX = 1000
RDC_EVNT_XGMI_0_REQ_TX = 1001
RDC_EVNT_XGMI_0_RESP_TX = 1002
+36
Dosyayı Görüntüle
@@ -126,6 +126,42 @@ RdcRocpBase::RdcRocpBase() {
{RDC_FI_PROF_SM_ACTIVE, "VALUBusy"},
{RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"},
// RDC_FI_PROF_OCC_ELAPSED is derived from OCC_PER_ACTIVE_CU and ACTIVE_CYCLES
{RDC_FI_PROF_CPC_CPC_STAT_BUSY, "CPC_CPC_STAT_BUSY"},
{RDC_FI_PROF_CPC_CPC_STAT_IDLE, "CPC_CPC_STAT_IDLE"},
{RDC_FI_PROF_CPC_CPC_STAT_STALL, "CPC_CPC_STAT_STALL"},
{RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "CPC_CPC_TCIU_BUSY"},
{RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "CPC_CPC_TCIU_IDLE"},
{RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "CPC_CPC_UTCL2IU_BUSY"},
{RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "CPC_CPC_UTCL2IU_IDLE"},
{RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "CPC_CPC_UTCL2IU_STALL"},
{RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "CPC_ME1_BUSY_FOR_PACKET_DECODE"},
{RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "CPC_ME1_DC0_SPI_BUSY"},
{RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "CPC_UTCL1_STALL_ON_TRANSLATION"},
{RDC_FI_PROF_CPC_ALWAYS_COUNT, "CPC_ALWAYS_COUNT"},
{RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "CPC_ADC_VALID_CHUNK_NOT_AVAIL"},
{RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "CPC_ADC_DISPATCH_ALLOC_DONE"},
{RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "CPC_ADC_VALID_CHUNK_END"},
{RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "CPC_SYNC_FIFO_FULL_LEVEL"},
{RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "CPC_SYNC_FIFO_FULL"},
{RDC_FI_PROF_CPC_GD_BUSY, "CPC_GD_BUSY"},
{RDC_FI_PROF_CPC_TG_SEND, "CPC_TG_SEND"},
{RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "CPC_WALK_NEXT_CHUNK"},
{RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "CPC_STALLED_BY_SE0_SPI"},
{RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "CPC_STALLED_BY_SE1_SPI"},
{RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "CPC_STALLED_BY_SE2_SPI"},
{RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "CPC_STALLED_BY_SE3_SPI"},
{RDC_FI_PROF_CPC_LTE_ALL, "CPC_LTE_ALL"},
{RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "CPC_SYNC_WRREQ_FIFO_BUSY"},
{RDC_FI_PROF_CPC_CANE_BUSY, "CPC_CANE_BUSY"},
{RDC_FI_PROF_CPC_CANE_STALL, "CPC_CANE_STALL"},
{RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "CPF_CMP_UTCL1_STALL_ON_TRANSLATION"},
{RDC_FI_PROF_CPF_CPF_STAT_BUSY, "CPF_CPF_STAT_BUSY"},
{RDC_FI_PROF_CPF_CPF_STAT_IDLE, "CPF_CPF_STAT_IDLE"},
{RDC_FI_PROF_CPF_CPF_STAT_STALL, "CPF_CPF_STAT_STALL"},
{RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "CPF_CPF_TCIU_BUSY"},
{RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"},
{RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"},
};
hsa_status_t status = hsa_init();