diff --git a/projects/rdc/CMakeLists.txt b/projects/rdc/CMakeLists.txt index 2ce9d1eda0..25e3a828c6 100755 --- a/projects/rdc/CMakeLists.txt +++ b/projects/rdc/CMakeLists.txt @@ -165,7 +165,7 @@ if(BUILD_STANDALONE AND GRPC_ROOT STREQUAL GRPC_ROOT_DEFAULT) Continuing without gRPC install") endif() -find_package(amd_smi 25.4.0 +find_package(amd_smi 25.4.2 NAMES amd_smi HINTS ${ROCM_DIR}/lib/cmake CONFIGURE REQUIRED) diff --git a/projects/rdc/common/rdc_field.data b/projects/rdc/common/rdc_field.data index f4eecac54a..b3690e50b5 100644 --- a/projects/rdc/common/rdc_field.data +++ b/projects/rdc/common/rdc_field.data @@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +// clang-format off + // Description Fields: // Arg # Type Meaning // ------------------------------------------------- @@ -54,7 +56,7 @@ FLD_DESC_ENT(RDC_FI_GPU_MM_DEC_UTIL, "Mutilmedia decoder busy percentage", FLD_DESC_ENT(RDC_FI_GPU_MEMORY_ACTIVITY, "Memory busy percentage", "GPU_MEM_UTIL", true) FLD_DESC_ENT(RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, "Memory max bandwidth", "GPU_MEM_MAX_BANDWIDTH", true) FLD_DESC_ENT(RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, "Memory current bandwidth", "GPU_MEM_CUR_BANDWIDTH", true) - +FLD_DESC_ENT(RDC_FI_GPU_BUSY_PERCENT, "GPU busy percentage", "GPU_BUSY_PERCENT", true) FLD_DESC_ENT(RDC_FI_GPU_PAGE_RETRIED, "Retried page of the GPU instance", "GPU_PAGE_RETRIED", true) // ECC totals diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index 88441502d2..b6bb8309f0 100644 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -207,6 +207,7 @@ typedef enum { RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //status = Smi2RdcError(ret); + value->type = INTEGER; + if (value->status == AMDSMI_STATUS_SUCCESS) { + value->value.l_int = static_cast(gpu_busy_percent); + } + } + default: break; } diff --git a/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc b/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc index d8313a287d..a42de4d289 100644 --- a/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc +++ b/projects/rdc/rdc_libs/rdc/src/RdcSmiLib.cc @@ -148,43 +148,45 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI } // List of fields supported by amd_smi_lib + // clang-format off const std::vector fields{ - - RDC_FI_GPU_COUNT, RDC_FI_DEV_NAME, RDC_FI_GPU_CLOCK, - RDC_FI_MEM_CLOCK, RDC_FI_MEMORY_TEMP, RDC_FI_GPU_TEMP, - RDC_FI_POWER_USAGE, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, - RDC_FI_PCIE_BANDWIDTH, RDC_FI_GPU_UTIL, RDC_FI_GPU_MEMORY_USAGE, - RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, - RDC_FI_ECC_SDMA_CE, RDC_FI_ECC_SDMA_UE, RDC_FI_ECC_GFX_CE, - RDC_FI_ECC_GFX_UE, RDC_FI_ECC_MMHUB_CE, RDC_FI_ECC_MMHUB_UE, - RDC_FI_ECC_ATHUB_CE, RDC_FI_ECC_ATHUB_UE, RDC_FI_ECC_PCIE_BIF_CE, - RDC_FI_ECC_PCIE_BIF_UE, RDC_FI_ECC_HDP_CE, RDC_FI_ECC_HDP_UE, - RDC_FI_ECC_XGMI_WAFL_CE, RDC_FI_ECC_XGMI_WAFL_UE, RDC_FI_ECC_DF_CE, - RDC_FI_ECC_DF_UE, RDC_FI_ECC_SMN_CE, RDC_FI_ECC_SMN_UE, - RDC_FI_ECC_SEM_CE, RDC_FI_ECC_SEM_UE, RDC_FI_ECC_MP0_CE, - RDC_FI_ECC_MP0_UE, RDC_FI_ECC_MP1_CE, RDC_FI_ECC_MP1_UE, - RDC_FI_ECC_FUSE_CE, RDC_FI_ECC_FUSE_UE, RDC_FI_ECC_UMC_CE, - RDC_FI_ECC_UMC_UE, RDC_FI_ECC_MCA_CE, RDC_FI_ECC_MCA_UE, - RDC_FI_ECC_VCN_CE, RDC_FI_ECC_VCN_UE, RDC_FI_ECC_JPEG_CE, - RDC_FI_ECC_JPEG_UE, RDC_FI_ECC_IH_CE, RDC_FI_ECC_IH_UE, - RDC_FI_ECC_MPIO_CE, RDC_FI_ECC_MPIO_UE, RDC_FI_XGMI_0_READ_KB, - RDC_FI_XGMI_1_READ_KB, RDC_FI_XGMI_2_READ_KB, RDC_FI_XGMI_3_READ_KB, - RDC_FI_XGMI_4_READ_KB, RDC_FI_XGMI_5_READ_KB, RDC_FI_XGMI_6_READ_KB, - RDC_FI_XGMI_7_READ_KB, RDC_FI_XGMI_TOTAL_READ_KB, RDC_FI_XGMI_0_WRITE_KB, - RDC_FI_XGMI_1_WRITE_KB, RDC_FI_XGMI_2_WRITE_KB, RDC_FI_XGMI_3_WRITE_KB, - RDC_FI_XGMI_4_WRITE_KB, RDC_FI_XGMI_5_WRITE_KB, RDC_FI_XGMI_6_WRITE_KB, - RDC_FI_XGMI_7_WRITE_KB, RDC_FI_XGMI_TOTAL_WRITE_KB, - RDC_EVNT_XGMI_0_NOP_TX, RDC_EVNT_XGMI_0_REQ_TX, RDC_EVNT_XGMI_0_RESP_TX, - RDC_EVNT_XGMI_0_BEATS_TX, RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX, - RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, RDC_EVNT_XGMI_0_THRPUT, - RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT, - RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT, RDC_FI_OAM_ID, - RDC_FI_GPU_MM_ENC_UTIL, RDC_FI_GPU_MM_DEC_UTIL, RDC_FI_GPU_MEMORY_ACTIVITY, - RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM, - RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_EEPROM_CONFIG_VALID, - RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME, - RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, + RDC_FI_GPU_COUNT, RDC_FI_DEV_NAME, RDC_FI_GPU_CLOCK, + RDC_FI_MEM_CLOCK, RDC_FI_MEMORY_TEMP, RDC_FI_GPU_TEMP, + RDC_FI_POWER_USAGE, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, + RDC_FI_PCIE_BANDWIDTH, RDC_FI_GPU_UTIL, RDC_FI_GPU_MEMORY_USAGE, + RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, + RDC_FI_ECC_SDMA_CE, RDC_FI_ECC_SDMA_UE, RDC_FI_ECC_GFX_CE, + RDC_FI_ECC_GFX_UE, RDC_FI_ECC_MMHUB_CE, RDC_FI_ECC_MMHUB_UE, + RDC_FI_ECC_ATHUB_CE, RDC_FI_ECC_ATHUB_UE, RDC_FI_ECC_PCIE_BIF_CE, + RDC_FI_ECC_PCIE_BIF_UE, RDC_FI_ECC_HDP_CE, RDC_FI_ECC_HDP_UE, + RDC_FI_ECC_XGMI_WAFL_CE, RDC_FI_ECC_XGMI_WAFL_UE, RDC_FI_ECC_DF_CE, + RDC_FI_ECC_DF_UE, RDC_FI_ECC_SMN_CE, RDC_FI_ECC_SMN_UE, + RDC_FI_ECC_SEM_CE, RDC_FI_ECC_SEM_UE, RDC_FI_ECC_MP0_CE, + RDC_FI_ECC_MP0_UE, RDC_FI_ECC_MP1_CE, RDC_FI_ECC_MP1_UE, + RDC_FI_ECC_FUSE_CE, RDC_FI_ECC_FUSE_UE, RDC_FI_ECC_UMC_CE, + RDC_FI_ECC_UMC_UE, RDC_FI_ECC_MCA_CE, RDC_FI_ECC_MCA_UE, + RDC_FI_ECC_VCN_CE, RDC_FI_ECC_VCN_UE, RDC_FI_ECC_JPEG_CE, + RDC_FI_ECC_JPEG_UE, RDC_FI_ECC_IH_CE, RDC_FI_ECC_IH_UE, + RDC_FI_ECC_MPIO_CE, RDC_FI_ECC_MPIO_UE, RDC_FI_XGMI_0_READ_KB, + RDC_FI_XGMI_1_READ_KB, RDC_FI_XGMI_2_READ_KB, RDC_FI_XGMI_3_READ_KB, + RDC_FI_XGMI_4_READ_KB, RDC_FI_XGMI_5_READ_KB, RDC_FI_XGMI_6_READ_KB, + RDC_FI_XGMI_7_READ_KB, RDC_FI_XGMI_TOTAL_READ_KB, RDC_FI_XGMI_0_WRITE_KB, + RDC_FI_XGMI_1_WRITE_KB, RDC_FI_XGMI_2_WRITE_KB, RDC_FI_XGMI_3_WRITE_KB, + RDC_FI_XGMI_4_WRITE_KB, RDC_FI_XGMI_5_WRITE_KB, RDC_FI_XGMI_6_WRITE_KB, + RDC_FI_XGMI_7_WRITE_KB, RDC_FI_XGMI_TOTAL_WRITE_KB, + RDC_EVNT_XGMI_0_NOP_TX, RDC_EVNT_XGMI_0_REQ_TX, RDC_EVNT_XGMI_0_RESP_TX, + RDC_EVNT_XGMI_0_BEATS_TX, RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX, + RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, RDC_EVNT_XGMI_0_THRPUT, + RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT, + RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT, RDC_FI_OAM_ID, + RDC_FI_GPU_MM_ENC_UTIL, RDC_FI_GPU_MM_DEC_UTIL, RDC_FI_GPU_MEMORY_ACTIVITY, + RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM, + RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_EEPROM_CONFIG_VALID, + RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME, + RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, + RDC_FI_GPU_BUSY_PERCENT, RDC_FI_GPU_PAGE_RETRIED }; + // clang-format on std::copy(fields.begin(), fields.end(), field_ids); *field_count = fields.size();