Implementation for RDC_FI_PROF_OCCUPANCY_PER_ACTIVE_CU SWDEV-50895
Signed-off-by: adapryor <Adam.pryor@amd.com>
Change-Id: I8da7d9846edabe5629c75f50cd2bb4b23e019a17
Signed-off-by: adapryor <Adam.pryor@amd.com>
[ROCm/rdc commit: 290b90dc89]
This commit is contained in:
committed by
Galantsev, Dmitrii
parent
7f4e5c85cb
commit
8286a92fc1
@@ -139,8 +139,11 @@ FLD_DESC_ENT(RDC_FI_PROF_EVAL_MEM_W_BW, "Written to video memory kb / ms
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_16, "Number of fp16 OPS / ms", "FLOPS_16", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_32, "Number of fp32 OPS / ms", "FLOPS_32", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_EVAL_FLOPS_64, "Number of fp64 OPS / ms", "FLOPS_64", false)
|
||||
// metrics below are not divided by time passed
|
||||
FLD_DESC_ENT(RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "Percent of Active Pipe VALU", "VALU_UTILIZATION", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_SM_ACTIVE, "Ratio of Cycles with active warp on SM","VALUBusy", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_SM_ACTIVE, "Ratio of Cycles with active warp on SM","VALUBusy", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_OCC_PER_ACTIVE_CU, "Mean occ per active compute unit", "OCC_CU", false)
|
||||
FLD_DESC_ENT(RDC_FI_PROF_OCC_ELAPSED, "Mean occ per active cu over elapsed", "OCC_CU_ELAPSED", false)
|
||||
|
||||
// Events
|
||||
FLD_DESC_ENT(RDC_EVNT_XGMI_0_NOP_TX, "NOPs sent to neighbor 0", "XGMI_NOP_0", false)
|
||||
|
||||
@@ -128,8 +128,8 @@ int run() {
|
||||
field_ids.push_back(RDC_FI_GPU_MEMORY_USAGE);
|
||||
field_ids.push_back(RDC_FI_POWER_USAGE);
|
||||
// profiler metrics
|
||||
field_ids.push_back(RDC_FI_PROF_MEAN_OCCUPANCY_PER_CU);
|
||||
field_ids.push_back(RDC_FI_PROF_MEAN_OCCUPANCY_PER_ACTIVE_CU);
|
||||
field_ids.push_back(RDC_FI_PROF_MEAN_OCC_PER_CU);
|
||||
field_ids.push_back(RDC_FI_PROF_MEAN_OCC_PER_ACTIVE_CU);
|
||||
field_ids.push_back(RDC_FI_PROF_ACTIVE_CYCLES);
|
||||
field_ids.push_back(RDC_FI_PROF_ACTIVE_WAVES);
|
||||
field_ids.push_back(RDC_FI_PROF_ELAPSED_CYCLES);
|
||||
|
||||
@@ -39,9 +39,9 @@ extern "C" {
|
||||
|
||||
#include <cstdint>
|
||||
#else
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#endif // __cplusplus
|
||||
|
||||
/** \file rdc.h
|
||||
@@ -195,7 +195,8 @@ typedef enum {
|
||||
RDC_FI_GPU_MM_ENC_UTIL, //!< Multimedia encoder busy percentage
|
||||
RDC_FI_GPU_MM_DEC_UTIL, //!< Multimedia decoder busy percentage
|
||||
RDC_FI_GPU_MEMORY_ACTIVITY, //!< Memory busy percentage
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //<! The Memory max bandwidth at current memory clock in Mb/Second
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //<! The Memory max bandwidth at current memory clock in
|
||||
// Mb/Second
|
||||
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, //<! The Memory current bandwidth in Mb/Second
|
||||
|
||||
/**
|
||||
@@ -286,8 +287,11 @@ typedef enum {
|
||||
RDC_FI_PROF_EVAL_FLOPS_16,
|
||||
RDC_FI_PROF_EVAL_FLOPS_32,
|
||||
RDC_FI_PROF_EVAL_FLOPS_64,
|
||||
// metrics below are not divided by time passed
|
||||
RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL,
|
||||
RDC_FI_PROF_SM_ACTIVE,
|
||||
RDC_FI_PROF_OCC_PER_ACTIVE_CU,
|
||||
RDC_FI_PROF_OCC_ELAPSED,
|
||||
|
||||
/**
|
||||
* @brief Raw XGMI counter events
|
||||
@@ -350,14 +354,14 @@ typedef enum {
|
||||
/**
|
||||
* @brief RDC health related fields
|
||||
*/
|
||||
RDC_HEALTH_XGMI_ERROR = 3000, //!< XGMI one or more errors detected
|
||||
RDC_HEALTH_PCIE_REPLAY_COUNT, //!< Total PCIE replay count
|
||||
RDC_HEALTH_RETIRED_PAGE_NUM, //!< Retired page number
|
||||
RDC_HEALTH_PENDING_PAGE_NUM, //!< Pending page number
|
||||
RDC_HEALTH_RETIRED_PAGE_LIMIT, //!< the threshold of retired page number
|
||||
RDC_HEALTH_EEPROM_CONFIG_VALID, //!< Reads the EEPROM and verifies the checksums
|
||||
RDC_HEALTH_POWER_THROTTLE_TIME, //!< Power throttle status counter
|
||||
RDC_HEALTH_THERMAL_THROTTLE_TIME, //!< Total time in thermal throttle status (microseconds)
|
||||
RDC_HEALTH_XGMI_ERROR = 3000, //!< XGMI one or more errors detected
|
||||
RDC_HEALTH_PCIE_REPLAY_COUNT, //!< Total PCIE replay count
|
||||
RDC_HEALTH_RETIRED_PAGE_NUM, //!< Retired page number
|
||||
RDC_HEALTH_PENDING_PAGE_NUM, //!< Pending page number
|
||||
RDC_HEALTH_RETIRED_PAGE_LIMIT, //!< the threshold of retired page number
|
||||
RDC_HEALTH_EEPROM_CONFIG_VALID, //!< Reads the EEPROM and verifies the checksums
|
||||
RDC_HEALTH_POWER_THROTTLE_TIME, //!< Power throttle status counter
|
||||
RDC_HEALTH_THERMAL_THROTTLE_TIME, //!< Total time in thermal throttle status (microseconds)
|
||||
} rdc_field_t;
|
||||
|
||||
// even and odd numbers are used for correctable and uncorrectable errors
|
||||
|
||||
@@ -154,6 +154,8 @@ class rdc_field_t(c_int):
|
||||
RDC_FI_PROF_EVAL_FLOPS_64 = 810
|
||||
RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL = 811
|
||||
RDC_FI_PROF_SM_ACTIVE = 812
|
||||
RDC_FI_PROF_OCC_PER_ACTIVE_CU = 813
|
||||
RDC_FI_PROF_OCC_ELAPSED = 814
|
||||
RDC_EVNT_XGMI_0_NOP_TX = 1000
|
||||
RDC_EVNT_XGMI_0_REQ_TX = 1001
|
||||
RDC_EVNT_XGMI_0_RESP_TX = 1002
|
||||
|
||||
@@ -118,8 +118,11 @@ RdcRocpBase::RdcRocpBase() {
|
||||
{RDC_FI_PROF_EVAL_FLOPS_16, "TOTAL_16_OPS"},
|
||||
{RDC_FI_PROF_EVAL_FLOPS_32, "TOTAL_32_OPS"},
|
||||
{RDC_FI_PROF_EVAL_FLOPS_64, "TOTAL_64_OPS"},
|
||||
// metrics below are not divided by time passed
|
||||
{RDC_FI_PROF_VALU_PIPE_ISSUE_UTIL, "ValuPipeIssueUtil"},
|
||||
{RDC_FI_PROF_SM_ACTIVE, "VALUBusy"},
|
||||
{RDC_FI_PROF_OCC_PER_ACTIVE_CU, "MeanOccupancyPerActiveCU"},
|
||||
// RDC_FI_PROF_OCC_ELAPSED is derived from OCC_PER_ACTIVE_CU and ACTIVE_CYCLES
|
||||
};
|
||||
|
||||
hsa_status_t status = hsa_init();
|
||||
@@ -185,6 +188,18 @@ rdc_status_t RdcRocpBase::rocp_lookup(rdc_gpu_field_t gpu_field, double* value)
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
|
||||
if (field == RDC_FI_PROF_OCC_ELAPSED) {
|
||||
double occupancy_val = run_profiler(gpu_index, RDC_FI_PROF_OCC_PER_ACTIVE_CU);
|
||||
double active_cycles_val = run_profiler(gpu_index, RDC_FI_PROF_ACTIVE_CYCLES);
|
||||
|
||||
if (active_cycles_val != 0.0) {
|
||||
*value = occupancy_val / active_cycles_val;
|
||||
return RDC_ST_OK;
|
||||
} else {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
}
|
||||
|
||||
const auto start_time = std::chrono::high_resolution_clock::now();
|
||||
*value = run_profiler(gpu_index, field);
|
||||
const auto stop_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
Reference in New Issue
Block a user