Add RDC_FI_GPU_BUSY_PERCENT
AMDSMI needs to merge first and bump the version to at least 24.4.2
Change-Id: I30149bb78c79ebc3de0dabdc8e63fcef12b2f406
Signed-off-by: Galantsev, Dmitrii <dmitrii.galantsev@amd.com>
[ROCm/rdc commit: a5cb334f8b]
Этот коммит содержится в:
коммит произвёл
Galantsev, Dmitrii
родитель
e15c5a15fa
Коммит
375ab5eace
@@ -165,7 +165,7 @@ if(BUILD_STANDALONE AND GRPC_ROOT STREQUAL GRPC_ROOT_DEFAULT)
|
||||
Continuing without gRPC install")
|
||||
endif()
|
||||
|
||||
find_package(amd_smi 25.4.0
|
||||
find_package(amd_smi 25.4.2
|
||||
NAMES amd_smi
|
||||
HINTS ${ROCM_DIR}/lib/cmake
|
||||
CONFIGURE REQUIRED)
|
||||
|
||||
@@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// clang-format off
|
||||
|
||||
// Description Fields:
|
||||
// Arg # Type Meaning
|
||||
// -------------------------------------------------
|
||||
@@ -54,7 +56,7 @@ FLD_DESC_ENT(RDC_FI_GPU_MM_DEC_UTIL, "Mutilmedia decoder busy percentage",
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_ACTIVITY, "Memory busy percentage", "GPU_MEM_UTIL", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, "Memory max bandwidth", "GPU_MEM_MAX_BANDWIDTH", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, "Memory current bandwidth", "GPU_MEM_CUR_BANDWIDTH", true)
|
||||
|
||||
FLD_DESC_ENT(RDC_FI_GPU_BUSY_PERCENT, "GPU busy percentage", "GPU_BUSY_PERCENT", true)
|
||||
FLD_DESC_ENT(RDC_FI_GPU_PAGE_RETRIED, "Retried page of the GPU instance", "GPU_PAGE_RETRIED", true)
|
||||
|
||||
// ECC totals
|
||||
|
||||
@@ -207,6 +207,7 @@ typedef enum {
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, //<! The Memory max bandwidth at current memory clock in
|
||||
// Mb/Second
|
||||
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH, //<! The Memory current bandwidth in Mb/Second
|
||||
RDC_FI_GPU_BUSY_PERCENT, //<! The GPU busy percentage
|
||||
|
||||
/**
|
||||
* @brief GPU page related fields
|
||||
|
||||
@@ -100,6 +100,7 @@ class rdc_field_t(c_int):
|
||||
RDC_FI_GPU_MEMORY_ACTIVITY = 505
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH = 506
|
||||
RDC_FI_GPU_MEMORY_CUR_BANDWIDTH = 507
|
||||
RDC_FI_GPU_BUSY_PERCENT = 508
|
||||
RDC_FI_GPU_PAGE_RETRIED = 550
|
||||
RDC_FI_ECC_CORRECT_TOTAL = 600
|
||||
RDC_FI_ECC_UNCORRECT_TOTAL = 601
|
||||
|
||||
@@ -1091,6 +1091,16 @@ rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index, rdc_field
|
||||
break;
|
||||
}
|
||||
|
||||
case RDC_FI_GPU_BUSY_PERCENT: {
|
||||
uint32_t gpu_busy_percent = 0;
|
||||
ret = amdsmi_get_gpu_busy_percent(processor_handle, &gpu_busy_percent);
|
||||
value->status = Smi2RdcError(ret);
|
||||
value->type = INTEGER;
|
||||
if (value->status == AMDSMI_STATUS_SUCCESS) {
|
||||
value->value.l_int = static_cast<int64_t>(gpu_busy_percent);
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -148,43 +148,45 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FI
|
||||
}
|
||||
|
||||
// List of fields supported by amd_smi_lib
|
||||
// clang-format off
|
||||
const std::vector<uint32_t> fields{
|
||||
|
||||
RDC_FI_GPU_COUNT, RDC_FI_DEV_NAME, RDC_FI_GPU_CLOCK,
|
||||
RDC_FI_MEM_CLOCK, RDC_FI_MEMORY_TEMP, RDC_FI_GPU_TEMP,
|
||||
RDC_FI_POWER_USAGE, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX,
|
||||
RDC_FI_PCIE_BANDWIDTH, RDC_FI_GPU_UTIL, RDC_FI_GPU_MEMORY_USAGE,
|
||||
RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL,
|
||||
RDC_FI_ECC_SDMA_CE, RDC_FI_ECC_SDMA_UE, RDC_FI_ECC_GFX_CE,
|
||||
RDC_FI_ECC_GFX_UE, RDC_FI_ECC_MMHUB_CE, RDC_FI_ECC_MMHUB_UE,
|
||||
RDC_FI_ECC_ATHUB_CE, RDC_FI_ECC_ATHUB_UE, RDC_FI_ECC_PCIE_BIF_CE,
|
||||
RDC_FI_ECC_PCIE_BIF_UE, RDC_FI_ECC_HDP_CE, RDC_FI_ECC_HDP_UE,
|
||||
RDC_FI_ECC_XGMI_WAFL_CE, RDC_FI_ECC_XGMI_WAFL_UE, RDC_FI_ECC_DF_CE,
|
||||
RDC_FI_ECC_DF_UE, RDC_FI_ECC_SMN_CE, RDC_FI_ECC_SMN_UE,
|
||||
RDC_FI_ECC_SEM_CE, RDC_FI_ECC_SEM_UE, RDC_FI_ECC_MP0_CE,
|
||||
RDC_FI_ECC_MP0_UE, RDC_FI_ECC_MP1_CE, RDC_FI_ECC_MP1_UE,
|
||||
RDC_FI_ECC_FUSE_CE, RDC_FI_ECC_FUSE_UE, RDC_FI_ECC_UMC_CE,
|
||||
RDC_FI_ECC_UMC_UE, RDC_FI_ECC_MCA_CE, RDC_FI_ECC_MCA_UE,
|
||||
RDC_FI_ECC_VCN_CE, RDC_FI_ECC_VCN_UE, RDC_FI_ECC_JPEG_CE,
|
||||
RDC_FI_ECC_JPEG_UE, RDC_FI_ECC_IH_CE, RDC_FI_ECC_IH_UE,
|
||||
RDC_FI_ECC_MPIO_CE, RDC_FI_ECC_MPIO_UE, RDC_FI_XGMI_0_READ_KB,
|
||||
RDC_FI_XGMI_1_READ_KB, RDC_FI_XGMI_2_READ_KB, RDC_FI_XGMI_3_READ_KB,
|
||||
RDC_FI_XGMI_4_READ_KB, RDC_FI_XGMI_5_READ_KB, RDC_FI_XGMI_6_READ_KB,
|
||||
RDC_FI_XGMI_7_READ_KB, RDC_FI_XGMI_TOTAL_READ_KB, RDC_FI_XGMI_0_WRITE_KB,
|
||||
RDC_FI_XGMI_1_WRITE_KB, RDC_FI_XGMI_2_WRITE_KB, RDC_FI_XGMI_3_WRITE_KB,
|
||||
RDC_FI_XGMI_4_WRITE_KB, RDC_FI_XGMI_5_WRITE_KB, RDC_FI_XGMI_6_WRITE_KB,
|
||||
RDC_FI_XGMI_7_WRITE_KB, RDC_FI_XGMI_TOTAL_WRITE_KB,
|
||||
RDC_EVNT_XGMI_0_NOP_TX, RDC_EVNT_XGMI_0_REQ_TX, RDC_EVNT_XGMI_0_RESP_TX,
|
||||
RDC_EVNT_XGMI_0_BEATS_TX, RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX,
|
||||
RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, RDC_EVNT_XGMI_0_THRPUT,
|
||||
RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT,
|
||||
RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT, RDC_FI_OAM_ID,
|
||||
RDC_FI_GPU_MM_ENC_UTIL, RDC_FI_GPU_MM_DEC_UTIL, RDC_FI_GPU_MEMORY_ACTIVITY,
|
||||
RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM,
|
||||
RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_EEPROM_CONFIG_VALID,
|
||||
RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME,
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH,
|
||||
RDC_FI_GPU_COUNT, RDC_FI_DEV_NAME, RDC_FI_GPU_CLOCK,
|
||||
RDC_FI_MEM_CLOCK, RDC_FI_MEMORY_TEMP, RDC_FI_GPU_TEMP,
|
||||
RDC_FI_POWER_USAGE, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX,
|
||||
RDC_FI_PCIE_BANDWIDTH, RDC_FI_GPU_UTIL, RDC_FI_GPU_MEMORY_USAGE,
|
||||
RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL,
|
||||
RDC_FI_ECC_SDMA_CE, RDC_FI_ECC_SDMA_UE, RDC_FI_ECC_GFX_CE,
|
||||
RDC_FI_ECC_GFX_UE, RDC_FI_ECC_MMHUB_CE, RDC_FI_ECC_MMHUB_UE,
|
||||
RDC_FI_ECC_ATHUB_CE, RDC_FI_ECC_ATHUB_UE, RDC_FI_ECC_PCIE_BIF_CE,
|
||||
RDC_FI_ECC_PCIE_BIF_UE, RDC_FI_ECC_HDP_CE, RDC_FI_ECC_HDP_UE,
|
||||
RDC_FI_ECC_XGMI_WAFL_CE, RDC_FI_ECC_XGMI_WAFL_UE, RDC_FI_ECC_DF_CE,
|
||||
RDC_FI_ECC_DF_UE, RDC_FI_ECC_SMN_CE, RDC_FI_ECC_SMN_UE,
|
||||
RDC_FI_ECC_SEM_CE, RDC_FI_ECC_SEM_UE, RDC_FI_ECC_MP0_CE,
|
||||
RDC_FI_ECC_MP0_UE, RDC_FI_ECC_MP1_CE, RDC_FI_ECC_MP1_UE,
|
||||
RDC_FI_ECC_FUSE_CE, RDC_FI_ECC_FUSE_UE, RDC_FI_ECC_UMC_CE,
|
||||
RDC_FI_ECC_UMC_UE, RDC_FI_ECC_MCA_CE, RDC_FI_ECC_MCA_UE,
|
||||
RDC_FI_ECC_VCN_CE, RDC_FI_ECC_VCN_UE, RDC_FI_ECC_JPEG_CE,
|
||||
RDC_FI_ECC_JPEG_UE, RDC_FI_ECC_IH_CE, RDC_FI_ECC_IH_UE,
|
||||
RDC_FI_ECC_MPIO_CE, RDC_FI_ECC_MPIO_UE, RDC_FI_XGMI_0_READ_KB,
|
||||
RDC_FI_XGMI_1_READ_KB, RDC_FI_XGMI_2_READ_KB, RDC_FI_XGMI_3_READ_KB,
|
||||
RDC_FI_XGMI_4_READ_KB, RDC_FI_XGMI_5_READ_KB, RDC_FI_XGMI_6_READ_KB,
|
||||
RDC_FI_XGMI_7_READ_KB, RDC_FI_XGMI_TOTAL_READ_KB, RDC_FI_XGMI_0_WRITE_KB,
|
||||
RDC_FI_XGMI_1_WRITE_KB, RDC_FI_XGMI_2_WRITE_KB, RDC_FI_XGMI_3_WRITE_KB,
|
||||
RDC_FI_XGMI_4_WRITE_KB, RDC_FI_XGMI_5_WRITE_KB, RDC_FI_XGMI_6_WRITE_KB,
|
||||
RDC_FI_XGMI_7_WRITE_KB, RDC_FI_XGMI_TOTAL_WRITE_KB,
|
||||
RDC_EVNT_XGMI_0_NOP_TX, RDC_EVNT_XGMI_0_REQ_TX, RDC_EVNT_XGMI_0_RESP_TX,
|
||||
RDC_EVNT_XGMI_0_BEATS_TX, RDC_EVNT_XGMI_1_NOP_TX, RDC_EVNT_XGMI_1_REQ_TX,
|
||||
RDC_EVNT_XGMI_1_RESP_TX, RDC_EVNT_XGMI_1_BEATS_TX, RDC_EVNT_XGMI_0_THRPUT,
|
||||
RDC_EVNT_XGMI_1_THRPUT, RDC_EVNT_XGMI_2_THRPUT, RDC_EVNT_XGMI_3_THRPUT,
|
||||
RDC_EVNT_XGMI_4_THRPUT, RDC_EVNT_XGMI_5_THRPUT, RDC_FI_OAM_ID,
|
||||
RDC_FI_GPU_MM_ENC_UTIL, RDC_FI_GPU_MM_DEC_UTIL, RDC_FI_GPU_MEMORY_ACTIVITY,
|
||||
RDC_HEALTH_XGMI_ERROR, RDC_HEALTH_PCIE_REPLAY_COUNT, RDC_HEALTH_RETIRED_PAGE_NUM,
|
||||
RDC_HEALTH_PENDING_PAGE_NUM, RDC_HEALTH_RETIRED_PAGE_LIMIT, RDC_HEALTH_EEPROM_CONFIG_VALID,
|
||||
RDC_HEALTH_POWER_THROTTLE_TIME, RDC_HEALTH_THERMAL_THROTTLE_TIME,
|
||||
RDC_FI_GPU_MEMORY_MAX_BANDWIDTH, RDC_FI_GPU_MEMORY_CUR_BANDWIDTH,
|
||||
RDC_FI_GPU_BUSY_PERCENT, RDC_FI_GPU_PAGE_RETRIED
|
||||
};
|
||||
// clang-format on
|
||||
std::copy(fields.begin(), fields.end(), field_ids);
|
||||
*field_count = fields.size();
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user