Implementation for RDC_FI_PROF_OCCUPANCY_PER_ACTIVE_CU SWDEV-50895

Signed-off-by: adapryor <Adam.pryor@amd.com>
Change-Id: I8da7d9846edabe5629c75f50cd2bb4b23e019a17
Signed-off-by: adapryor <Adam.pryor@amd.com>


[ROCm/rdc commit: 290b90dc89]
This commit is contained in:
adapryor
2025-01-16 14:51:32 -06:00
committed by Galantsev, Dmitrii
parent 7f4e5c85cb
commit 8286a92fc1
5 changed files with 37 additions and 13 deletions
+4 -1
View File
@@ -139,8 +139,11 @@ FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_W_BW, "Written to video memory kb / ms
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16, "Number of fp16 OPS / ms", "FLOPS_16", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32, "Number of fp32 OPS / ms", "FLOPS_32", false)
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64, "Number of fp64 OPS / ms", "FLOPS_64", false)
// metrics below are not divided by time passed
FLD_DESC_ENT(RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "Percent of Active Pipe VALU", "VALU_UTILIZATION", false)
FLD_DESC_ENT(RDC_FI_PROF_SM_ACTIVE, "Ratio of Cycles with active warp on SM","VALUBusy", false)
FLD_DESC_ENT(RDC_FI_PROF_SM_ACTIVE, "Ratio of Cycles with active warp on SM","VALUBusy", false)
FLD_DESC_ENT(RDC_FI_PROF_OCC_PER_ACTIVE_CU, "Mean occ per active compute unit", "OCC_CU", false)
FLD_DESC_ENT(RDC_FI_PROF_OCC_ELAPSED, "Mean occ per active cu over elapsed", "OCC_CU_ELAPSED", false)
// Events
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
+2 -2
View File
@@ -128,8 +128,8 @@ int run() {
field_ids.push_back(RDC_FI_GPU_MEMORY_USAGE);
field_ids.push_back(RDC_FI_POWER_USAGE);
// profiler metrics
field_ids.push_back(RDC_FI_PROF_MEAN_OCCUPANCY_PER_CU);
field_ids.push_back(RDC_FI_PROF_MEAN_OCCUPANCY_PER_ACTIVE_CU);
field_ids.push_back(RDC_FI_PROF_MEAN_OCC_PER_CU);
field_ids.push_back(RDC_FI_PROF_MEAN_OCC_PER_ACTIVE_CU);
field_ids.push_back(RDC_FI_PROF_ACTIVE_CYCLES);
field_ids.push_back(RDC_FI_PROF_ACTIVE_WAVES);
field_ids.push_back(RDC_FI_PROF_ELAPSED_CYCLES);
+14 -10
View File
@@ -39,9 +39,9 @@ extern "C" {
#include <cstdint>
#else
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <assert.h>
#endif // __cplusplus
/** \file rdc.h
@@ -195,7 +195,8 @@ typedef enum {
RDC_FI_GPU_MM_ENC_UTIL, //!< Multimedia encoder busy percentage
RDC_FI_GPU_MM_DEC_UTIL, //!< Multimedia decoder busy percentage
RDC_FI_GPU_MEMORY_ACTIVITY, //!< Memory busy percentage
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //<! The Memory max bandwidth at current memory clock in Mb/Second
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //<! The Memory max bandwidth at current memory clock in
// Mb/Second
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, //<! The Memory current bandwidth in Mb/Second
/**
@@ -286,8 +287,11 @@ typedef enum {
RDC_FI_PROF_EVAL_FLOPS_16,
RDC_FI_PROF_EVAL_FLOPS_32,
RDC_FI_PROF_EVAL_FLOPS_64,
// metrics below are not divided by time passed
RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL,
RDC_FI_PROF_SM_ACTIVE,
RDC_FI_PROF_OCC_PER_ACTIVE_CU,
RDC_FI_PROF_OCC_ELAPSED,
/**
* @brief Raw XGMI counter events
@@ -350,14 +354,14 @@ typedef enum {
/**
* @brief RDC health related fields
*/
RDC_HEALTH_XGMI_ERROR = 3000, //!< XGMI one or more errors detected
RDC_HEALTH_PCIE_REPLAY_COUNT, //!< Total PCIE replay count
RDC_HEALTH_RETIRED_PAGE_NUM, //!< Retired page number
RDC_HEALTH_PENDING_PAGE_NUM, //!< Pending page number
RDC_HEALTH_RETIRED_PAGE_LIMIT, //!< the threshold of retired page number
RDC_HEALTH_EEPROM_CONFIG_VALID, //!< Reads the EEPROM and verifies the checksums
RDC_HEALTH_POWER_THROTTLE_TIME, //!< Power throttle status counter
RDC_HEALTH_THERMAL_THROTTLE_TIME, //!< Total time in thermal throttle status (microseconds)
RDC_HEALTH_XGMI_ERROR = 3000, //!< XGMI one or more errors detected
RDC_HEALTH_PCIE_REPLAY_COUNT, //!< Total PCIE replay count
RDC_HEALTH_RETIRED_PAGE_NUM, //!< Retired page number
RDC_HEALTH_PENDING_PAGE_NUM, //!< Pending page number
RDC_HEALTH_RETIRED_PAGE_LIMIT, //!< the threshold of retired page number
RDC_HEALTH_EEPROM_CONFIG_VALID, //!< Reads the EEPROM and verifies the checksums
RDC_HEALTH_POWER_THROTTLE_TIME, //!< Power throttle status counter
RDC_HEALTH_THERMAL_THROTTLE_TIME, //!< Total time in thermal throttle status (microseconds)
} rdc_field_t;
// even and odd numbers are used for correctable and uncorrectable errors
@@ -154,6 +154,8 @@ class rdc_field_t(c_int):
RDC_FI_PROF_EVAL_FLOPS_64 = 810
RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL = 811
RDC_FI_PROF_SM_ACTIVE = 812
RDC_FI_PROF_OCC_PER_ACTIVE_CU = 813
RDC_FI_PROF_OCC_ELAPSED = 814
RDC_EVNT_XGMI_0_NOP_TX = 1000
RDC_EVNT_XGMI_0_REQ_TX = 1001
RDC_EVNT_XGMI_0_RESP_TX = 1002
@@ -118,8 +118,11 @@ RdcRocpBase::RdcRocpBase() {
{RDC_FI_PROF_EVAL_FLOPS_16, "TOTAL_16_OPS"},
{RDC_FI_PROF_EVAL_FLOPS_32, "TOTAL_32_OPS"},
{RDC_FI_PROF_EVAL_FLOPS_64, "TOTAL_64_OPS"},
// metrics below are not divided by time passed
{RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "ValuPipeIssueUtil"},
{RDC_FI_PROF_SM_ACTIVE, "VALUBusy"},
{RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"},
// RDC_FI_PROF_OCC_ELAPSED is derived from OCC_PER_ACTIVE_CU and ACTIVE_CYCLES
};
hsa_status_t status = hsa_init();
@@ -185,6 +188,18 @@ rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, double* value)
return RDC_ST_BAD_PARAMETER;
}
if (field == RDC_FI_PROF_OCC_ELAPSED) {
double occupancy_val = run_profiler(gpu_index, RDC_FI_PROF_OCC_PER_ACTIVE_CU);
double active_cycles_val = run_profiler(gpu_index, RDC_FI_PROF_ACTIVE_CYCLES);
if (active_cycles_val != 0.0) {
*value = occupancy_val / active_cycles_val;
return RDC_ST_OK;
} else {
return RDC_ST_BAD_PARAMETER;
}
}
const auto start_time = std::chrono::high_resolution_clock::now();
*value = run_profiler(gpu_index, field);
const auto stop_time = std::chrono::high_resolution_clock::now();