rocm_smi_lib: add gpu_metrics_v1_3 support

Signed-off-by: Elena Sakhnovitch
Change-Id: I4a9dedc80b8fce60e12c5baf8651d54d16a6a41c
Этот коммит содержится в:
Elena Sakhnovitch
2021-08-03 21:26:25 -04:00
коммит произвёл Shuzhou Liu
родитель 7a8c3f3629
Коммит fee82af1fe
2 изменённых файлов: 56 добавлений и 3 удалений
+15 -1
Просмотреть файл
@@ -835,6 +835,7 @@ struct metrics_table_header_t {
// existing field sizes are changed.
#define RSMI_GPU_METRICS_API_CONTENT_VER_1 1
#define RSMI_GPU_METRICS_API_CONTENT_VER_2 2
#define RSMI_GPU_METRICS_API_CONTENT_VER_3 3
// This should match NUM_HBM_INSTANCES
#define RSMI_NUM_HBM_INSTANCES 4
@@ -900,6 +901,19 @@ typedef struct {
uint32_t gfx_activity_acc; // new in v1
uint32_t mem_actvity_acc; // new in v1
uint16_t temperature_hbm[RSMI_NUM_HBM_INSTANCES]; // new in v1
/* PMFW attached timestamp (10ns resolution) */
uint64_t firmware_timestamp; // added in v1_2
/* Voltage (mV) */
uint16_t voltage_soc; // added in v1_3
uint16_t voltage_gfx; // added in v1_3
uint16_t voltage_mem; // added in v1_3
uint16_t padding1;
/* Throttle status (ASIC independent) */
uint64_t indep_throttle_status; // added in v1_3
/// \endcond
} rsmi_gpu_metrics_t;
@@ -2155,7 +2169,7 @@ rsmi_dev_busy_percent_get(uint32_t dv_ind, uint32_t *busy_percent);
*
* @param[inout] utilization_counters Multiple utilization counters can be retreived with a single
* call. The caller must allocate enough space to the utilization_counters array. The caller also
* needs to set valid RSMI_UTILIZATION_COUNTER_TYPE type for each element of the array.
* needs to set valid RSMI_UTILIZATION_COUNTER_TYPE type for each element of the array.
* ::RSMI_STATUS_NOT_SUPPORTED if it is not supported with the provided arguments.
*
* If the function reutrns RSMI_STATUS_SUCCESS, the counter will be set in the value field of
+41 -2
Просмотреть файл
@@ -121,14 +121,32 @@ typedef struct {
uint64_t firmware_timestamp;
} rsmi_gpu_metrics_v_1_2;
typedef struct {
rsmi_gpu_metrics_t base;
/* PMFW attached timestamp (10ns resolution) */
uint64_t firmware_timestamp;
/* Voltage (mV) */
uint16_t voltage_soc;
uint16_t voltage_gfx;
uint16_t voltage_mem;
/* Throttle status (ASIC independent) */
uint64_t indep_throttle_status;
} rsmi_gpu_metrics_v_1_3;
static rsmi_status_t GetGPUMetricsFormat1(uint32_t dv_ind,
rsmi_gpu_metrics_t *data, uint8_t content_v) {
assert(content_v != RSMI_GPU_METRICS_API_CONTENT_VER_1 &&
content_v != RSMI_GPU_METRICS_API_CONTENT_VER_2 );
content_v != RSMI_GPU_METRICS_API_CONTENT_VER_2 &&
content_v != RSMI_GPU_METRICS_API_CONTENT_VER_3 );
if (content_v == RSMI_GPU_METRICS_API_CONTENT_VER_1 ||
content_v == RSMI_GPU_METRICS_API_CONTENT_VER_2 ) {
content_v == RSMI_GPU_METRICS_API_CONTENT_VER_2 ||
content_v == RSMI_GPU_METRICS_API_CONTENT_VER_3 ) {
// This function shouldn't be called if content version is
// RSMI_GPU_METRICS_API_CONTENT_VER_1 or RSMI_GPU_METRICS_API_CONTENT_VER_2
// or RSMI_GPU_METRICS_API_CONTENT_VER_3
return RSMI_STATUS_INVALID_ARGS;
}
void *metric_data = nullptr;
@@ -226,6 +244,17 @@ static void map_gpu_metrics_1_2_to_rsmi_gpu_metrics_t(
gpu_metrics_v_1_2->firmware_timestamp * 10;
}
static void map_gpu_metrics_1_3_to_rsmi_gpu_metrics_t(
const rsmi_gpu_metrics_v_1_3 *gpu_metrics_v_1_3,
rsmi_gpu_metrics_t *rsmi_gpu_metrics)
{
memcpy(rsmi_gpu_metrics, &gpu_metrics_v_1_3->base,
sizeof(rsmi_gpu_metrics_t));
// firmware_timestamp is at 10ns resolution
rsmi_gpu_metrics->system_clock_counter =
gpu_metrics_v_1_3->firmware_timestamp * 10;
}
rsmi_status_t
rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
@@ -233,6 +262,7 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
DEVICE_MUTEX
CHK_SUPPORT_NAME_ONLY(smu)
rsmi_gpu_metrics_v_1_2 smu_v_1_2;
rsmi_gpu_metrics_v_1_3 smu_v_1_3;
rsmi_status_t ret;
if (!dev->gpu_metrics_ver().structure_size) {
@@ -248,6 +278,10 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
return RSMI_STATUS_NOT_SUPPORTED;
}
// Initialize the smu fiedls to zero as some of them only valid in
// a specific version.
*smu = {};
if (dev->gpu_metrics_ver().content_revision ==
RSMI_GPU_METRICS_API_CONTENT_VER_1) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
@@ -257,6 +291,11 @@ rsmi_dev_gpu_metrics_info_get(uint32_t dv_ind, rsmi_gpu_metrics_t *smu) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
sizeof(rsmi_gpu_metrics_v_1_2), &smu_v_1_2);
map_gpu_metrics_1_2_to_rsmi_gpu_metrics_t(&smu_v_1_2, smu);
} else if (dev->gpu_metrics_ver().content_revision ==
RSMI_GPU_METRICS_API_CONTENT_VER_3) {
ret = GetDevBinaryBlob(amd::smi::kDevGpuMetrics, dv_ind,
sizeof(rsmi_gpu_metrics_v_1_3), &smu_v_1_3);
map_gpu_metrics_1_3_to_rsmi_gpu_metrics_t(&smu_v_1_3, smu);
} else {
ret = GetGPUMetricsFormat1(dv_ind, smu,
dev->gpu_metrics_ver().content_revision);