From 793b2de0cbda0efa7fd02f49bcd8157277b2d964 Mon Sep 17 00:00:00 2001 From: "Galantsev, Dmitrii" Date: Thu, 10 Oct 2024 13:09:29 -0500 Subject: [PATCH] Profiler - Modify metrics Remove occupancy metrics and replace with OccupancyPercent Add OCCUPANCY_PERCENT which uses OccupancyPercent Add GR_ENGINE_ACTIVE which uses GPU_UTIL/100 Add TENSOR_ACTIVE_PERCENT which uses MfmaUtil Modify FLOPS_64 to use FP64_ACTIVE Change-Id: I5f30d77a0c80f5ac78abd1a9e57f8a0a3c6cc00b Signed-off-by: Galantsev, Dmitrii [ROCm/rdc commit: 28acbf0436d0a10e7a0f2bef0e32abe71217ef55] --- projects/rdc/README.md | 6 +++--- projects/rdc/common/rdc_field.data | 21 ++++++++++--------- projects/rdc/include/rdc/rdc.h | 5 +++-- .../rdc_modules/rdc_rocp/RdcRocpBase.cc | 12 ++++++++--- 4 files changed, 26 insertions(+), 18 deletions(-) diff --git a/projects/rdc/README.md b/projects/rdc/README.md index df1857d1e3..7592e36880 100644 --- a/projects/rdc/README.md +++ b/projects/rdc/README.md @@ -204,10 +204,10 @@ E.g. Correct output on MI300 using [gpu-burn](https://github.com/ROCm/HIP-Exampl export HSA_TOOLS_LIB=/opt/rocm/lib/librocprofiler64.so.1 gpu-burn # terminal 3 - rdci dmon -u -e 800,801,803 -i 0 -c 1 + rdci dmon -u -e 800,801 -i 0 -c 1 # output: - # GPU MN_OCC_PER_CU MN_OCC_PER_ACT_CU ACTIVE_WAVES - # 0 1683.422 6479.242 32640.000 + # GPU OCCUPANCY_PERCENT ACTIVE_WAVES + # 0 001.000 32640.000 ### `HSA_STATUS_ERROR_OUT_OF_RESOURCES` diff --git a/projects/rdc/common/rdc_field.data b/projects/rdc/common/rdc_field.data index 17b0e6337d..42c3a89d07 100644 --- a/projects/rdc/common/rdc_field.data +++ b/projects/rdc/common/rdc_field.data @@ -120,17 +120,18 @@ FLD_DESC_ENT(RDC_FI_XGMI_TOTAL_WRITE_KB, "XGMI accumlated data write size acr // This doesn't map to rocprofiler counters directly // See counter_map in rdc/include/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.h // See metrics.xml in rocprofiler -FLD_DESC_ENT(RDC_FI_PROF_MEAN_OCCUPANCY_PER_CU, "Mean occupancy per CU", "MN_OCC_PER_CU", false) -FLD_DESC_ENT(RDC_FI_PROF_MEAN_OCCUPANCY_PER_ACTIVE_CU, "Mean occupancy per active CU", "MN_OCC_PER_ACT_CU", false) -FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "ACTIVE_CYCLES", false) -FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "ACTIVE_WAVES", false) -FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false) +FLD_DESC_ENT(RDC_FI_PROF_OCCUPANCY_PERCENT, "Percent of GPU occupancy", "OCCUPANCY_PERCENT", false) +FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_CYCLES, "Number of Active Cycles", "ACTIVE_CYCLES", false) +FLD_DESC_ENT(RDC_FI_PROF_ACTIVE_WAVES, "Number of Active Waves", "ACTIVE_WAVES", false) +FLD_DESC_ENT(RDC_FI_PROF_ELAPSED_CYCLES, "Number of Elapsed Cycles over all SMs", "ELAPSED_CYCLES", false) +FLD_DESC_ENT(RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, "Percent of Active Pipe Tensors", "TENSOR_PERCENT", false) +FLD_DESC_ENT(RDC_FI_PROF_GPU_UTIL_PERCENT, "Percent of GPU Utilization", "GPU_UTIL_PERCENT", false) // metrics below are divided by time passed -FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_R_BW, "Fetched from video memory kb / ms", "MEM_R_BW", false) -FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_W_BW, "Written to video memory kb / ms", "MEM_W_BW", false) -FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16, "Number of fp16 OPS / ms", "FLOPS_16", false) -FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32, "Number of fp32 OPS / ms", "FLOPS_32", false) -FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64, "Number of fp64 OPS / ms", "FLOPS_64", false) +FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_R_BW, "Fetched from video memory kb / ms", "MEM_R_BW", false) +FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_W_BW, "Written to video memory kb / ms", "MEM_W_BW", false) +FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16, "Number of fp16 OPS / ms", "FLOPS_16", false) +FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32, "Number of fp32 OPS / ms", "FLOPS_32", false) +FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64, "Number of fp64 OPS / ms", "FLOPS_64", false) // Events FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false) diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index 60a52c10e5..51d1385b88 100644 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -258,11 +258,12 @@ typedef enum { /** * @brief ROC-profiler related fields */ - RDC_FI_PROF_MEAN_OCCUPANCY_PER_CU = 800, - RDC_FI_PROF_MEAN_OCCUPANCY_PER_ACTIVE_CU, + RDC_FI_PROF_OCCUPANCY_PERCENT = 800, RDC_FI_PROF_ACTIVE_CYCLES, RDC_FI_PROF_ACTIVE_WAVES, RDC_FI_PROF_ELAPSED_CYCLES, + RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, + RDC_FI_PROF_GPU_UTIL_PERCENT, // metrics below are divided by time passed RDC_FI_PROF_EVAL_MEM_R_BW, RDC_FI_PROF_EVAL_MEM_W_BW, diff --git a/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc b/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc index a8fb1a95fa..9cc61a70ac 100644 --- a/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc +++ b/projects/rdc/rdc_libs/rdc_modules/rdc_rocp/RdcRocpBase.cc @@ -246,17 +246,18 @@ RdcRocpBase::RdcRocpBase() { // all fields static const std::map temp_field_map_k = { - {RDC_FI_PROF_MEAN_OCCUPANCY_PER_CU, "MeanOccupancyPerCU"}, - {RDC_FI_PROF_MEAN_OCCUPANCY_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"}, + {RDC_FI_PROF_OCCUPANCY_PERCENT, "OccupancyPercent"}, {RDC_FI_PROF_ACTIVE_CYCLES, "GRBM_GUI_ACTIVE"}, {RDC_FI_PROF_ACTIVE_WAVES, "SQ_WAVES"}, {RDC_FI_PROF_ELAPSED_CYCLES, "GRBM_COUNT"}, + {RDC_FI_PROF_TENSOR_ACTIVE_PERCENT, "MfmaUtil"}, // same as TENSOR_ACTIVE but available for more GPUs + {RDC_FI_PROF_GPU_UTIL_PERCENT, "GPU_UTIL"}, // metrics below are divided by time passed {RDC_FI_PROF_EVAL_MEM_R_BW, "FETCH_SIZE"}, {RDC_FI_PROF_EVAL_MEM_W_BW, "WRITE_SIZE"}, {RDC_FI_PROF_EVAL_FLOPS_16, "TOTAL_16_OPS"}, {RDC_FI_PROF_EVAL_FLOPS_32, "TOTAL_32_OPS"}, - {RDC_FI_PROF_EVAL_FLOPS_64, "TOTAL_64_OPS"}, + {RDC_FI_PROF_EVAL_FLOPS_64, "FP64_ACTIVE"}, }; std::vector all_fields; @@ -393,6 +394,11 @@ rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, double* value) // << (*value / elapsed) << "]"); *value = *value / elapsed; } + // GPU_UTIL metric is available on more GPUs than ENGINE_ACTIVE. + // ENGINE_ACTIVE = GPU_UTIL/100, so do the math ourselves + if (field == RDC_FI_PROF_GPU_UTIL_PERCENT) { + *value = *value / 100.0F; + } return Rocp2RdcError(status); }