Profiler - Add CPC and CPF metrics
Change-Id: I27fd725e9e1868c9afe7624d6e4aafad2a42d47e Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
Цей коміт міститься в:
зафіксовано
Galantsev, Dmitrii
джерело
47692d3ed5
коміт
51de344be7
@@ -133,21 +133,56 @@ FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves",
|
||||
FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, "Percent of Active Pipe Tensors", "TENSOR_PERCENT", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL_PERCENT, "Percent of GPU Utilization", "GPU_UTIL_PERCENT", false)
|
||||
// metrics below are divided by time passed
|
||||
// metrics with EVAL are divided by time passed
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_R_BW, "Fetched from video memory kb / ms", "MEM_R_BW", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_W_BW, "Written to video memory kb / ms", "MEM_W_BW", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16, "Number of fp16 OPS / ms", "FLOPS_16", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32, "Number of fp32 OPS / ms", "FLOPS_32", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64, "Number of fp64 OPS / ms", "FLOPS_64", false)
|
||||
// metrics below are not divided by time passed
|
||||
FLD_DESC_ENT(RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "Percent of Active Pipe VALU", "VALU_UTILIZATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_SM_ACTIVE, "Ratio of Cycles with active warp on SM","VALUBusy", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_OCC_PER_ACTIVE_CU, "Mean occ per active compute unit", "OCC_CU", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_OCC_ELAPSED, "Mean occ per active cu over elapsed", "OCC_CU_ELAPSED", false)
|
||||
// metrics below are divided by time passed
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16_PERCENT, "Number of fp16 OPS percent of max", "FLOPS_16_PERCENT", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32_PERCENT, "Number of fp32 OPS percent of max", "FLOPS_32_PERCENT", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64_PERCENT, "Number of fp64 OPS percent of max", "FLOPS_64_PERCENT", false)
|
||||
// CPC
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_STAT_BUSY, "", "CPC_CPC_STAT_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_STAT_IDLE, "", "CPC_CPC_STAT_IDLE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_STAT_STALL, "", "CPC_CPC_STAT_STALL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "", "CPC_CPC_TCIU_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "", "CPC_CPC_TCIU_IDLE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "", "CPC_CPC_UTCL2IU_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "", "CPC_CPC_UTCL2IU_IDLE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "", "CPC_CPC_UTCL2IU_STALL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "", "CPC_ME1_BUSY_FOR_PACKET_DECODE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "", "CPC_ME1_DC0_SPI_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "", "CPC_UTCL1_STALL_ON_TRANSLATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_ALWAYS_COUNT, "", "CPC_ALWAYS_COUNT", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "", "CPC_ADC_VALID_CHUNK_NOT_AVAIL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "", "CPC_ADC_DISPATCH_ALLOC_DONE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "", "CPC_ADC_VALID_CHUNK_END", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "", "CPC_SYNC_FIFO_FULL_LEVEL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "", "CPC_SYNC_FIFO_FULL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_GD_BUSY, "", "CPC_GD_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_TG_SEND, "", "CPC_TG_SEND", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "", "CPC_WALK_NEXT_CHUNK", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "", "CPC_STALLED_BY_SE0_SPI", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "", "CPC_STALLED_BY_SE1_SPI", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "", "CPC_STALLED_BY_SE2_SPI", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "", "CPC_STALLED_BY_SE3_SPI", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_LTE_ALL, "", "CPC_LTE_ALL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "", "CPC_SYNC_WRREQ_FIFO_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CANE_BUSY, "", "CPC_CANE_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPC_CANE_STALL, "", "CPC_CANE_STALL", false)
|
||||
// CPF
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "", "CPF_CMP_UTCL1_STALL_ON_TRANSLATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_STAT_BUSY, "", "CPF_CPF_STAT_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_STAT_IDLE, "", "CPF_CPF_STAT_IDLE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_STAT_STALL, "", "CPF_CPF_STAT_STALL", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "", "CPF_CPF_TCIU_BUSY", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "", "CPF_CPF_TCIU_IDLE", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_CPF_CPF_TCIU_STALL, "", "CPF_CPF_TCIU_STALL", false)
|
||||
|
||||
// Events
|
||||
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
||||
|
||||
@@ -290,21 +290,56 @@ typedef enum {
|
||||
RDC_FI_PROF_ELAPSED_CYCLES,
|
||||
RDC_FI_PROF_TENSOR_ACTIVE_PERCENT,
|
||||
RDC_FI_PROF_GPU_UTIL_PERCENT,
|
||||
// metrics below are divided by time passed
|
||||
// metrics with EVAL are divided by time passed
|
||||
RDC_FI_PROF_EVAL_MEM_R_BW,
|
||||
RDC_FI_PROF_EVAL_MEM_W_BW,
|
||||
RDC_FI_PROF_EVAL_FLOPS_16,
|
||||
RDC_FI_PROF_EVAL_FLOPS_32,
|
||||
RDC_FI_PROF_EVAL_FLOPS_64,
|
||||
// metrics below are not divided by time passed
|
||||
RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL,
|
||||
RDC_FI_PROF_SM_ACTIVE,
|
||||
RDC_FI_PROF_OCC_PER_ACTIVE_CU,
|
||||
RDC_FI_PROF_OCC_ELAPSED,
|
||||
// metrics below are divided by time passed
|
||||
RDC_FI_PROF_EVAL_FLOPS_16_PERCENT,
|
||||
RDC_FI_PROF_EVAL_FLOPS_32_PERCENT,
|
||||
RDC_FI_PROF_EVAL_FLOPS_64_PERCENT,
|
||||
// CPC
|
||||
RDC_FI_PROF_CPC_CPC_STAT_BUSY,
|
||||
RDC_FI_PROF_CPC_CPC_STAT_IDLE,
|
||||
RDC_FI_PROF_CPC_CPC_STAT_STALL,
|
||||
RDC_FI_PROF_CPC_CPC_TCIU_BUSY,
|
||||
RDC_FI_PROF_CPC_CPC_TCIU_IDLE,
|
||||
RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY,
|
||||
RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE,
|
||||
RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL,
|
||||
RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE,
|
||||
RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY,
|
||||
RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION,
|
||||
RDC_FI_PROF_CPC_ALWAYS_COUNT,
|
||||
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL,
|
||||
RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE,
|
||||
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END,
|
||||
RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL,
|
||||
RDC_FI_PROF_CPC_SYNC_FIFO_FULL,
|
||||
RDC_FI_PROF_CPC_GD_BUSY,
|
||||
RDC_FI_PROF_CPC_TG_SEND,
|
||||
RDC_FI_PROF_CPC_WALK_NEXT_CHUNK,
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI,
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI,
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI,
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI,
|
||||
RDC_FI_PROF_CPC_LTE_ALL,
|
||||
RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY,
|
||||
RDC_FI_PROF_CPC_CANE_BUSY,
|
||||
RDC_FI_PROF_CPC_CANE_STALL,
|
||||
// CPF
|
||||
RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION,
|
||||
RDC_FI_PROF_CPF_CPF_STAT_BUSY,
|
||||
RDC_FI_PROF_CPF_CPF_STAT_IDLE,
|
||||
RDC_FI_PROF_CPF_CPF_STAT_STALL,
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_BUSY,
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_IDLE,
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_STALL,
|
||||
|
||||
/**
|
||||
* @brief Raw XGMI counter events
|
||||
|
||||
@@ -177,6 +177,41 @@ class rdc_field_t(c_int):
|
||||
RDC_FI_PROF_EVAL_FLOPS_16_PERCENT = 815
|
||||
RDC_FI_PROF_EVAL_FLOPS_32_PERCENT = 816
|
||||
RDC_FI_PROF_EVAL_FLOPS_64_PERCENT = 817
|
||||
RDC_FI_PROF_CPC_CPC_STAT_BUSY = 818
|
||||
RDC_FI_PROF_CPC_CPC_STAT_IDLE = 819
|
||||
RDC_FI_PROF_CPC_CPC_STAT_STALL = 820
|
||||
RDC_FI_PROF_CPC_CPC_TCIU_BUSY = 821
|
||||
RDC_FI_PROF_CPC_CPC_TCIU_IDLE = 822
|
||||
RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY = 823
|
||||
RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE = 824
|
||||
RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL = 825
|
||||
RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE = 826
|
||||
RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY = 827
|
||||
RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION = 828
|
||||
RDC_FI_PROF_CPC_ALWAYS_COUNT = 829
|
||||
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL = 830
|
||||
RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE = 831
|
||||
RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END = 832
|
||||
RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL = 833
|
||||
RDC_FI_PROF_CPC_SYNC_FIFO_FULL = 834
|
||||
RDC_FI_PROF_CPC_GD_BUSY = 835
|
||||
RDC_FI_PROF_CPC_TG_SEND = 836
|
||||
RDC_FI_PROF_CPC_WALK_NEXT_CHUNK = 837
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI = 838
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI = 839
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI = 840
|
||||
RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI = 841
|
||||
RDC_FI_PROF_CPC_LTE_ALL = 842
|
||||
RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY = 843
|
||||
RDC_FI_PROF_CPC_CANE_BUSY = 844
|
||||
RDC_FI_PROF_CPC_CANE_STALL = 845
|
||||
RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION = 846
|
||||
RDC_FI_PROF_CPF_CPF_STAT_BUSY = 847
|
||||
RDC_FI_PROF_CPF_CPF_STAT_IDLE = 848
|
||||
RDC_FI_PROF_CPF_CPF_STAT_STALL = 849
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_BUSY = 850
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_IDLE = 851
|
||||
RDC_FI_PROF_CPF_CPF_TCIU_STALL = 852
|
||||
RDC_EVNT_XGMI_0_NOP_TX = 1000
|
||||
RDC_EVNT_XGMI_0_REQ_TX = 1001
|
||||
RDC_EVNT_XGMI_0_RESP_TX = 1002
|
||||
|
||||
@@ -126,6 +126,42 @@ RdcRocpBase::RdcRocpBase() {
|
||||
{RDC_FI_PROF_SM_ACTIVE, "VALUBusy"},
|
||||
{RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"},
|
||||
// RDC_FI_PROF_OCC_ELAPSED is derived from OCC_PER_ACTIVE_CU and ACTIVE_CYCLES
|
||||
{RDC_FI_PROF_CPC_CPC_STAT_BUSY, "CPC_CPC_STAT_BUSY"},
|
||||
{RDC_FI_PROF_CPC_CPC_STAT_IDLE, "CPC_CPC_STAT_IDLE"},
|
||||
{RDC_FI_PROF_CPC_CPC_STAT_STALL, "CPC_CPC_STAT_STALL"},
|
||||
{RDC_FI_PROF_CPC_CPC_TCIU_BUSY, "CPC_CPC_TCIU_BUSY"},
|
||||
{RDC_FI_PROF_CPC_CPC_TCIU_IDLE, "CPC_CPC_TCIU_IDLE"},
|
||||
{RDC_FI_PROF_CPC_CPC_UTCL2IU_BUSY, "CPC_CPC_UTCL2IU_BUSY"},
|
||||
{RDC_FI_PROF_CPC_CPC_UTCL2IU_IDLE, "CPC_CPC_UTCL2IU_IDLE"},
|
||||
{RDC_FI_PROF_CPC_CPC_UTCL2IU_STALL, "CPC_CPC_UTCL2IU_STALL"},
|
||||
{RDC_FI_PROF_CPC_ME1_BUSY_FOR_PACKET_DECODE, "CPC_ME1_BUSY_FOR_PACKET_DECODE"},
|
||||
{RDC_FI_PROF_CPC_ME1_DC0_SPI_BUSY, "CPC_ME1_DC0_SPI_BUSY"},
|
||||
{RDC_FI_PROF_CPC_UTCL1_STALL_ON_TRANSLATION, "CPC_UTCL1_STALL_ON_TRANSLATION"},
|
||||
{RDC_FI_PROF_CPC_ALWAYS_COUNT, "CPC_ALWAYS_COUNT"},
|
||||
{RDC_FI_PROF_CPC_ADC_VALID_CHUNK_NOT_AVAIL, "CPC_ADC_VALID_CHUNK_NOT_AVAIL"},
|
||||
{RDC_FI_PROF_CPC_ADC_DISPATCH_ALLOC_DONE, "CPC_ADC_DISPATCH_ALLOC_DONE"},
|
||||
{RDC_FI_PROF_CPC_ADC_VALID_CHUNK_END, "CPC_ADC_VALID_CHUNK_END"},
|
||||
{RDC_FI_PROF_CPC_SYNC_FIFO_FULL_LEVEL, "CPC_SYNC_FIFO_FULL_LEVEL"},
|
||||
{RDC_FI_PROF_CPC_SYNC_FIFO_FULL, "CPC_SYNC_FIFO_FULL"},
|
||||
{RDC_FI_PROF_CPC_GD_BUSY, "CPC_GD_BUSY"},
|
||||
{RDC_FI_PROF_CPC_TG_SEND, "CPC_TG_SEND"},
|
||||
{RDC_FI_PROF_CPC_WALK_NEXT_CHUNK, "CPC_WALK_NEXT_CHUNK"},
|
||||
{RDC_FI_PROF_CPC_STALLED_BY_SE0_SPI, "CPC_STALLED_BY_SE0_SPI"},
|
||||
{RDC_FI_PROF_CPC_STALLED_BY_SE1_SPI, "CPC_STALLED_BY_SE1_SPI"},
|
||||
{RDC_FI_PROF_CPC_STALLED_BY_SE2_SPI, "CPC_STALLED_BY_SE2_SPI"},
|
||||
{RDC_FI_PROF_CPC_STALLED_BY_SE3_SPI, "CPC_STALLED_BY_SE3_SPI"},
|
||||
{RDC_FI_PROF_CPC_LTE_ALL, "CPC_LTE_ALL"},
|
||||
{RDC_FI_PROF_CPC_SYNC_WRREQ_FIFO_BUSY, "CPC_SYNC_WRREQ_FIFO_BUSY"},
|
||||
{RDC_FI_PROF_CPC_CANE_BUSY, "CPC_CANE_BUSY"},
|
||||
{RDC_FI_PROF_CPC_CANE_STALL, "CPC_CANE_STALL"},
|
||||
{RDC_FI_PROF_CPF_CMP_UTCL1_STALL_ON_TRANSLATION, "CPF_CMP_UTCL1_STALL_ON_TRANSLATION"},
|
||||
{RDC_FI_PROF_CPF_CPF_STAT_BUSY, "CPF_CPF_STAT_BUSY"},
|
||||
{RDC_FI_PROF_CPF_CPF_STAT_IDLE, "CPF_CPF_STAT_IDLE"},
|
||||
{RDC_FI_PROF_CPF_CPF_STAT_STALL, "CPF_CPF_STAT_STALL"},
|
||||
{RDC_FI_PROF_CPF_CPF_TCIU_BUSY, "CPF_CPF_TCIU_BUSY"},
|
||||
{RDC_FI_PROF_CPF_CPF_TCIU_IDLE, "CPF_CPF_TCIU_IDLE"},
|
||||
{RDC_FI_PROF_CPF_CPF_TCIU_STALL, "CPF_CPF_TCIU_STALL"},
|
||||
|
||||
};
|
||||
|
||||
hsa_status_t status = hsa_init();
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача