SWDEV-353742 - Port smilib function to amdsmi
Change-Id: I99df249755a5c665a8dd1777fa82d046e139bd77
Signed-off-by: Dalibor Stanisavljevic <Dalibor.Stanisavljevic@amd.com>
[ROCm/amdsmi commit: 3daf9c1063]
Этот коммит содержится в:
коммит произвёл
Bill(Shuzhou) Liu
родитель
cf09c187f1
Коммит
bd4ff14bd0
@@ -141,6 +141,8 @@ set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_socket.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_system.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_drm.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_lib_loader.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/amd_smi_utils.cc")
|
||||
set(CMN_SRC_LIST ${CMN_SRC_LIST} "${AMDSMI_SRC_DIR}/fdinfo.cc")
|
||||
|
||||
set(CMN_INC_LIST "${ROCM_INC_DIR}/rocm_smi_device.h")
|
||||
set(CMN_INC_LIST ${CMN_INC_LIST} "${ROCM_INC_DIR}/rocm_smi_main.h")
|
||||
@@ -166,11 +168,16 @@ set(CMN_SRC_LIST ${CMN_SRC_LIST} "${COMMON_INC_DIR}/impl/amd_smi_lib_loader.h")
|
||||
add_subdirectory("rocm_smi")
|
||||
|
||||
# Examples and docs
|
||||
set(SMI_EXAMPLE_EXE "amd_smi_ex")
|
||||
add_executable(${SMI_EXAMPLE_EXE} "example/amd_smi_example.cc")
|
||||
set(SMI_EXAMPLE_EXE "amd_smi_drm_ex")
|
||||
add_executable(${SMI_EXAMPLE_EXE} "example/amd_smi_drm_example.cc")
|
||||
target_link_libraries(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
add_dependencies(${SMI_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
|
||||
set(SMI_NODRM_EXAMPLE_EXE "amd_smi_nodrm_ex")
|
||||
add_executable(${SMI_NODRM_EXAMPLE_EXE} "example/amd_smi_nodrm_example.cc")
|
||||
target_link_libraries(${SMI_NODRM_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
add_dependencies(${SMI_NODRM_EXAMPLE_EXE} ${ROCM_SMI_TARGET})
|
||||
|
||||
# Generate Doxygen documentation
|
||||
find_package(Doxygen)
|
||||
find_package(LATEX COMPONENTS PDFLATEX)
|
||||
|
||||
@@ -0,0 +1,535 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2022, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "amd_smi/amd_smi.h"
|
||||
#include <bitset>
|
||||
#include <iostream>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <vector>
|
||||
|
||||
#define CHK_AMDSMI_RET(RET) \
|
||||
{ \
|
||||
if (RET != AMDSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
std::cout << "AMDSMI call returned " << RET << " at line " \
|
||||
<< __LINE__ << std::endl; \
|
||||
amdsmi_status_string(RET, &err_str); \
|
||||
std::cout << err_str << std::endl; \
|
||||
return RET; \
|
||||
} \
|
||||
}
|
||||
|
||||
int main() {
|
||||
amdsmi_status_t ret;
|
||||
|
||||
// Init amdsmi for sockets and devices.
|
||||
// Here we are only interested in AMD_GPUS.
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Get all sockets
|
||||
uint32_t socket_count = 0;
|
||||
amdsmi_socket_handle *sockets = nullptr;
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Total Socket: " << socket_count << std::endl;
|
||||
|
||||
// For each socket, get identifier and devices
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_name[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], socket_name, 128);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Socket " << socket_name << std::endl;
|
||||
|
||||
// Get all devices of the socket
|
||||
uint32_t device_count = 0;
|
||||
amdsmi_device_handle *device_handles = nullptr;
|
||||
ret = amdsmi_get_device_handles(sockets[i], &device_count,
|
||||
&device_handles);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// For each device of the socket, get name and temperature.
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMD_SMI_INIT_AMD_GPUS, the device_type must be AMD_GPU.
|
||||
device_type_t device_type = {};
|
||||
ret = amdsmi_get_device_type(device_handles[j], &device_type);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (device_type != AMD_GPU) {
|
||||
std::cout << "Expect AMD_GPU device type!\n";
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
amdsmi_bdf_t bdf = {};
|
||||
ret = amdsmi_get_device_bdf(device_handles[j], &bdf);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_device_bdf:\n");
|
||||
printf("\tDevice[%d] BDF %04x:%02x:%02x.%d\n\n", i,
|
||||
bdf.domain_number, bdf.bus_number, bdf.device_number,
|
||||
bdf.function_number);
|
||||
|
||||
amdsmi_asic_info_t asic_info = {};
|
||||
ret = amdsmi_get_asic_info(device_handles[j], &asic_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_asic_info:\n");
|
||||
printf("\tMarket Name: %s\n", asic_info.market_name);
|
||||
printf("\tFamilyID: 0x%x\n", asic_info.family);
|
||||
printf("\tDeviceID: 0x%lx\n", asic_info.device_id);
|
||||
printf("\tVendorID: 0x%x\n", asic_info.vendor_id);
|
||||
printf("\tRevisionID: 0x%x\n", asic_info.rev_id);
|
||||
printf("\tAsic serial: 0x%s\n\n", asic_info.asic_serial);
|
||||
|
||||
// Get VBIOS info
|
||||
amdsmi_vbios_info_t vbios_info = {};
|
||||
ret = amdsmi_get_vbios_info(device_handles[j], &vbios_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_vbios_info:\n");
|
||||
printf("\tVBios Name: %s\n", vbios_info.name);
|
||||
printf("\tBuild Date: %s\n", vbios_info.build_date);
|
||||
printf("\tPart Number: %s\n", vbios_info.part_number);
|
||||
printf("\tVBios Version: %d\n", vbios_info.vbios_version);
|
||||
printf("\tVBios Version String: %s\n\n",
|
||||
vbios_info.vbios_version_string);
|
||||
|
||||
// Get engine usage info
|
||||
amdsmi_engine_usage_t engine_usage = {};
|
||||
ret = amdsmi_get_gpu_activity(device_handles[j], &engine_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_activity:\n");
|
||||
printf("\tAverage GFX Activity: %d\n",
|
||||
engine_usage.average_gfx_activity);
|
||||
printf("\tAverage MM Activity: %d\n",
|
||||
engine_usage.average_mm_activity[0]);
|
||||
printf("\tAverage UMC Activity: %d\n\n",
|
||||
engine_usage.average_umc_activity);
|
||||
|
||||
// Get firmware info
|
||||
amdsmi_fw_info_t fw_information = {};
|
||||
ret = amdsmi_get_fw_info(device_handles[j], &fw_information);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_fw_info:\n");
|
||||
printf("\tFirmware version: %d\n", fw_information.num_fw_info);
|
||||
printf("\tSMU: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_SMU]
|
||||
.fw_version);
|
||||
printf("\tSMC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_SMC]
|
||||
.fw_version);
|
||||
printf("\tVCN: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_VCN]
|
||||
.fw_version);
|
||||
printf("\tCP_ME: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_ME]
|
||||
.fw_version);
|
||||
printf("\tCP_PFP: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_PFP]
|
||||
.fw_version);
|
||||
printf("\tCP_CE: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_CE]
|
||||
.fw_version);
|
||||
printf("\tRLC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_RLC]
|
||||
.fw_version);
|
||||
printf("\tCP_MEC1: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_MEC1]
|
||||
.fw_version);
|
||||
printf("\tCP_MEC2: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_MEC2]
|
||||
.fw_version);
|
||||
printf("\tSDMA0: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_SDMA0]
|
||||
.fw_version);
|
||||
printf("\tMC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_MC]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST CNTL: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::FW_ID_RLC_RESTORE_LIST_CNTL]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST GPM MEM: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::FW_ID_RLC_RESTORE_LIST_GPM_MEM]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST SRM MEM: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::FW_ID_RLC_RESTORE_LIST_SRM_MEM]
|
||||
.fw_version);
|
||||
printf(
|
||||
"\tPSP SOSDRV: %ld\n\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_PSP_SOSDRV]
|
||||
.fw_version);
|
||||
|
||||
// Get GPU power limit info
|
||||
amdsmi_power_limit_t power_limit = {};
|
||||
ret = amdsmi_get_power_limit(device_handles[j], &power_limit);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_power_limit:\n");
|
||||
printf("\tGPU Power limit: %d\n\n", power_limit.limit);
|
||||
|
||||
// Get GFX clock measurements
|
||||
amdsmi_clock_measure_t gfx_clk_values = {};
|
||||
ret = amdsmi_get_clock_measure(device_handles[j], CLOCK_TYPE_GFX,
|
||||
&gfx_clk_values);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_clock_measure:\n");
|
||||
printf("\tGPU GFX Max Clock: %d\n", gfx_clk_values.max_clk);
|
||||
printf("\tGPU GFX Average Clock: %d\n", gfx_clk_values.avg_clk);
|
||||
printf("\tGPU GFX Current Clock: %d\n", gfx_clk_values.cur_clk);
|
||||
|
||||
// Get MEM clock measurements
|
||||
amdsmi_clock_measure_t mem_clk_values = {};
|
||||
ret = amdsmi_get_clock_measure(device_handles[j], CLOCK_TYPE_MEM,
|
||||
&mem_clk_values);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf("\tGPU MEM Max Clock: %d\n", mem_clk_values.max_clk);
|
||||
printf("\tGPU MEM Average Clock: %d\n", mem_clk_values.avg_clk);
|
||||
printf("\tGPU MEM Current Clock: %d\n\n", mem_clk_values.cur_clk);
|
||||
|
||||
// Get VRAM temperature limit
|
||||
amdsmi_temperature_limit_t mem_temp_limit = {};
|
||||
ret = amdsmi_get_temperature_limit(
|
||||
device_handles[j], TEMPERATURE_TYPE_VRAM, &mem_temp_limit);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_temperature_limit:\n");
|
||||
printf("\tGPU VRAM temp limit: %d\n", mem_temp_limit.limit);
|
||||
|
||||
// Get GFX temperature limit
|
||||
amdsmi_temperature_limit_t gfx_temp_limit = {};
|
||||
ret = amdsmi_get_temperature_limit(
|
||||
device_handles[j], TEMPERATURE_TYPE_EDGE, &gfx_temp_limit);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf("\tGPU GFX temp limit: %d\n\n", gfx_temp_limit.limit);
|
||||
|
||||
// Get temperature measurements
|
||||
// amdsmi_temperature_t edge_temp, junction_temp, vram_temp,
|
||||
// plx_temp;
|
||||
amdsmi_temperature_t temp_measurements[4];
|
||||
amdsmi_temperature_type_t temp_types[4] = {
|
||||
TEMPERATURE_TYPE_EDGE, TEMPERATURE_TYPE_JUNCTION,
|
||||
TEMPERATURE_TYPE_VRAM, TEMPERATURE_TYPE_PLX};
|
||||
for (const auto &temp_type : temp_types) {
|
||||
ret = amdsmi_get_temperature_measure(
|
||||
device_handles[j], temp_type,
|
||||
&temp_measurements[(int)(temp_type)]);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
}
|
||||
printf(" Output of amdsmi_get_temperature_measure:\n");
|
||||
printf("\tGPU Edge temp measurement: %d\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_EDGE].cur_temp);
|
||||
printf("\tGPU Junction temp measurement: %d\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_JUNCTION].cur_temp);
|
||||
printf("\tGPU VRAM temp measurement: %d\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_VRAM].cur_temp);
|
||||
printf("\tGPU PLX temp measurement: %d\n\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_PLX].cur_temp);
|
||||
|
||||
// Get RAS features enabled
|
||||
char block_names[14][10] = {"UMC", "SDMA", "GFX", "MMHUB",
|
||||
"ATHUB", "PCIE_BIF", "HDP", "XGMI_WAFL",
|
||||
"DF", "SMN", "SEM", "MP0",
|
||||
"MP1", "FUSE"};
|
||||
char status_names[7][10] = {"NONE", "DISABLED", "PARITY",
|
||||
"SING_C", "MULT_UC", "POISON",
|
||||
"ENABLED"};
|
||||
amdsmi_ras_err_state_t state = {};
|
||||
int index = 0;
|
||||
printf(" Output of amdsmi_get_ras_features_enabled:\n");
|
||||
for (auto block = AMDSMI_GPU_BLOCK_FIRST;
|
||||
block <= AMDSMI_GPU_BLOCK_LAST;
|
||||
block = (amdsmi_gpu_block_t)(block * 2)) {
|
||||
ret = amdsmi_get_ras_features_enabled(device_handles[j], block,
|
||||
&state);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf("\tBlock: %s\n", block_names[index]);
|
||||
printf("\tStatus: %s\n", status_names[state]);
|
||||
index++;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Get bad pages
|
||||
char bad_page_status_names[3][15] = {"RESERVED", "PENDING",
|
||||
"UNRESERVABLE"};
|
||||
uint32_t num_pages = 0;
|
||||
ret = amdsmi_get_bad_page_info(device_handles[j], &num_pages,
|
||||
nullptr);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_bad_page_info:\n");
|
||||
if (!num_pages) {
|
||||
printf("\tNo bad pages found.\n");
|
||||
} else {
|
||||
amdsmi_retired_page_record_t bad_page_info[num_pages] = {};
|
||||
ret = amdsmi_get_bad_page_info(device_handles[j], &num_pages,
|
||||
bad_page_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
for (uint32_t page_it = 0; page_it < num_pages; page_it += 1) {
|
||||
printf(" Page[%d]\n", page_it);
|
||||
printf("\tAddress: %lu\n",
|
||||
bad_page_info[page_it].page_address);
|
||||
printf("\tSize: %lu\n", bad_page_info[page_it].page_size);
|
||||
printf(
|
||||
"\tStatus: %s\n",
|
||||
bad_page_status_names[bad_page_info[page_it].status]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Get ECC error counts
|
||||
amdsmi_error_count_t err_cnt_info = {};
|
||||
ret = amdsmi_get_ecc_error_count(device_handles[j], &err_cnt_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_ecc_error_count:\n");
|
||||
printf("\tCorrectable errors: %lu\n", err_cnt_info.correctable_count);
|
||||
printf("\tUncorrectable errors: %lu\n\n",
|
||||
err_cnt_info.uncorrectable_count);
|
||||
// Get process list
|
||||
auto compare = [](const void *a, const void *b) -> int {
|
||||
return (*(amdsmi_proc_info_t *)a).pid >
|
||||
(*(amdsmi_proc_info_t *)b).pid
|
||||
? 1
|
||||
: -1;
|
||||
};
|
||||
|
||||
auto sum_item = [](uint16_t *a) -> float {
|
||||
float b = 0;
|
||||
for (int iterator = 0; iterator < AMDSMI_MAX_MM_IP_COUNT;
|
||||
iterator += 1) {
|
||||
b += (float)a[iterator] / 100.0;
|
||||
}
|
||||
return b;
|
||||
};
|
||||
|
||||
// Get frequency ranges
|
||||
amdsmi_frequency_range_t freq_ranges = {};
|
||||
ret = amdsmi_get_target_frequency_range(
|
||||
device_handles[j], CLOCK_TYPE_GFX, &freq_ranges);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_target_frequency_range:\n");
|
||||
printf("\tSupported min freq: %lu\n",
|
||||
freq_ranges.supported_freq_range.lower_bound);
|
||||
printf("\tSupported max freq: %lu\n",
|
||||
freq_ranges.supported_freq_range.upper_bound);
|
||||
printf("\tCurrent min freq: %lu\n",
|
||||
freq_ranges.current_freq_range.lower_bound);
|
||||
printf("\tCurrent max freq: %lu\n\n",
|
||||
freq_ranges.current_freq_range.upper_bound);
|
||||
|
||||
uint32_t num_process = 0;
|
||||
ret = amdsmi_get_process_list(device_handles[j], nullptr,
|
||||
&num_process);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (!num_process) {
|
||||
printf("No processes found.\n");
|
||||
} else {
|
||||
amdsmi_process_handle process_list[num_process];
|
||||
amdsmi_proc_info_t info_list[num_process];
|
||||
amdsmi_proc_info_t process = {};
|
||||
uint64_t mem = 0, gtt_mem = 0, cpu_mem = 0, vram_mem = 0;
|
||||
float gfx = 0, comp = 0, dma = 0, enc = 0, dec = 0;
|
||||
char bdf_str[20];
|
||||
sprintf(bdf_str, "%04x:%02x:%02x.%d", bdf.domain_number,
|
||||
bdf.bus_number, bdf.device_number, bdf.function_number);
|
||||
int num = 0;
|
||||
ret = amdsmi_get_process_list(device_handles[j], process_list,
|
||||
&num_process);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
for (uint32_t it = 0; it < num_process; it += 1) {
|
||||
if (getpid() == process_list[it]) {
|
||||
continue;
|
||||
}
|
||||
ret = amdsmi_get_process_info(device_handles[j],
|
||||
process_list[it], &process);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) {
|
||||
printf("amdsmi_get_process_info() failed for "
|
||||
"process_list[%d], returned %d\n",
|
||||
it, ret);
|
||||
continue;
|
||||
}
|
||||
info_list[num++] = process;
|
||||
}
|
||||
qsort(info_list, num, sizeof(info_list[0]), compare);
|
||||
printf("+=======+==================+============+=============="
|
||||
"+=============+=============+=============+============"
|
||||
"==+=========================================+\n");
|
||||
printf(
|
||||
"| pid | name | user | gpu bdf | "
|
||||
"fb usage | gtt memory | cpu memory | vram memory | "
|
||||
"ring usage (%%) |\n");
|
||||
printf("| | | | "
|
||||
"| | | | "
|
||||
" | gfx comp dma enc dec |\n");
|
||||
printf("+=======+==================+============+=============="
|
||||
"+=============+=============+=============+============"
|
||||
"==+=========================================+\n");
|
||||
for (int it = 0; it < num; it++) {
|
||||
char command[30];
|
||||
struct passwd *pwd = NULL;
|
||||
struct stat st;
|
||||
|
||||
sprintf(command, "/proc/%d", info_list[it].pid);
|
||||
if (stat(command, &st))
|
||||
continue;
|
||||
pwd = getpwuid(st.st_uid);
|
||||
if (!pwd)
|
||||
printf("| %5d | %16s | %10d | %s | %7ld KiB | %7ld KiB "
|
||||
"| %7ld KiB | %7ld KiB | %6.2f %6.2f %6.2f "
|
||||
"%6.2f %6.2f |\n",
|
||||
info_list[it].pid, info_list[it].name, st.st_uid,
|
||||
bdf_str, info_list[it].mem / 1024,
|
||||
info_list[it].memory_usage.gtt_mem / 1024,
|
||||
info_list[it].memory_usage.cpu_mem / 1024,
|
||||
info_list[it].memory_usage.vram_mem / 1024,
|
||||
sum_item(info_list[it].engine_usage.gfx),
|
||||
sum_item(info_list[it].engine_usage.compute),
|
||||
sum_item(info_list[it].engine_usage.sdma),
|
||||
sum_item(info_list[it].engine_usage.enc),
|
||||
sum_item(info_list[it].engine_usage.dec));
|
||||
else
|
||||
printf("| %5d | %16s | %10s | %s | %7ld KiB | %7ld KiB "
|
||||
"| %7ld KiB | %7ld KiB | %6.2f %6.2f %6.2f "
|
||||
"%6.2f %6.2f |\n",
|
||||
info_list[it].pid, info_list[it].name,
|
||||
pwd->pw_name, bdf_str, info_list[it].mem / 1024,
|
||||
info_list[it].memory_usage.gtt_mem / 1024,
|
||||
info_list[it].memory_usage.cpu_mem / 1024,
|
||||
info_list[it].memory_usage.vram_mem / 1024,
|
||||
sum_item(info_list[it].engine_usage.gfx),
|
||||
sum_item(info_list[it].engine_usage.compute),
|
||||
sum_item(info_list[it].engine_usage.sdma),
|
||||
sum_item(info_list[it].engine_usage.enc),
|
||||
sum_item(info_list[it].engine_usage.dec));
|
||||
mem += info_list[it].mem / 1024;
|
||||
gtt_mem += info_list[it].memory_usage.gtt_mem / 1024;
|
||||
cpu_mem += info_list[it].memory_usage.cpu_mem / 1024;
|
||||
vram_mem += info_list[it].memory_usage.vram_mem / 1024;
|
||||
gfx += sum_item(info_list[it].engine_usage.gfx);
|
||||
comp += sum_item(info_list[it].engine_usage.compute);
|
||||
dma += sum_item(info_list[it].engine_usage.sdma);
|
||||
enc += sum_item(info_list[it].engine_usage.enc);
|
||||
dec += sum_item(info_list[it].engine_usage.dec);
|
||||
printf(
|
||||
"+-------+------------------+------------+-------------"
|
||||
"-+-------------+-------------+-------------+----------"
|
||||
"----+-----------------------------------------+\n");
|
||||
}
|
||||
printf("| TOTAL:| %s | %7ld "
|
||||
"KiB | %7ld KiB | %7ld KiB | %7ld KiB | %6.2f %6.2f "
|
||||
"%6.2f %6.2f %6.2f |\n",
|
||||
bdf_str, mem, gtt_mem, cpu_mem, vram_mem, gfx, comp, dma,
|
||||
enc, dec);
|
||||
printf("+=======+==================+============+=============="
|
||||
"+=============+=============+=============+============"
|
||||
"=+==========================================+\n");
|
||||
}
|
||||
|
||||
// Get device name
|
||||
amdsmi_board_info board_info = {};
|
||||
ret = amdsmi_get_board_info(device_handles[j], &board_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_board_info:\n");
|
||||
std::cout << "\tdevice [" << j
|
||||
<< "]\n\t\tProduct name: " << board_info.product_name
|
||||
<< "\n"
|
||||
<< "\t\tProduct number: " << board_info.product_serial
|
||||
<< "\n"
|
||||
<< "\t\tSerial number: " << board_info.serial_number
|
||||
<< "\n\n";
|
||||
|
||||
// Get temperature
|
||||
int64_t val_i64 = 0;
|
||||
ret = amdsmi_dev_temp_metric_get(device_handles[j], 0,
|
||||
AMDSMI_TEMP_CURRENT, &val_i64);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_dev_temp_metric_get:\n");
|
||||
std::cout << "\t\tTemperature: " << val_i64 / 1000 << "C"
|
||||
<< "\n\n";
|
||||
|
||||
// Get frame buffer
|
||||
amdsmi_vram_info_t vram_usage = {};
|
||||
ret = amdsmi_get_vram_usage(device_handles[j], &vram_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_vram_usage:\n");
|
||||
std::cout << "\t\tFrame buffer usage (MB): " << vram_usage.vram_used
|
||||
<< "/" << vram_usage.vram_total << "\n\n";
|
||||
|
||||
// Get Cap info
|
||||
amdsmi_gpu_caps_t caps_info = {};
|
||||
ret = amdsmi_get_caps_info(device_handles[j], &caps_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_caps_info:\n");
|
||||
std::cout << "\t\tGFX IP Major: " << caps_info.gfx.gfxip_major
|
||||
<< "\n"
|
||||
<< "\t\tGFX IP Minor: " << caps_info.gfx.gfxip_minor
|
||||
<< "\n"
|
||||
<< "\t\tCU IP Count: " << caps_info.gfx.gfxip_cu_count
|
||||
<< "\n"
|
||||
<< "\t\tDMA IP Count: " << caps_info.dma_ip_count << "\n"
|
||||
<< "\t\tGFX IP Count: " << caps_info.gfx_ip_count << "\n"
|
||||
<< "\t\tMM IP Count: " << int(caps_info.mm.mm_ip_count)
|
||||
<< "\n\n";
|
||||
|
||||
amdsmi_power_cap_info cap_info = {};
|
||||
ret = amdsmi_get_power_cap_info(device_handles[j], 0, &cap_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_power_cap_info:\n");
|
||||
std::cout << "\t\t Power Cap: " << cap_info.power_cap
|
||||
<< "W\n";
|
||||
std::cout << "\t\t Dpm Cap: " << cap_info.dpm_cap
|
||||
<< "\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
|
||||
// and devices pointers
|
||||
ret = amdsmi_shut_down();
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2022, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <bitset>
|
||||
#include "amd_smi/amd_smi.h"
|
||||
|
||||
#define CHK_AMDSMI_RET(RET) { \
|
||||
if (RET != AMDSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
std::cout << "AMDSMI call returned " << RET \
|
||||
<< " at line " << __LINE__ << std::endl; \
|
||||
amdsmi_status_string(RET, &err_str); \
|
||||
std::cout << err_str << std::endl; \
|
||||
return RET; \
|
||||
} \
|
||||
}
|
||||
|
||||
int main() {
|
||||
amdsmi_status_t ret;
|
||||
|
||||
// Init amdsmi for sockets and devices.
|
||||
// Here we are only interested in AMD_GPUS.
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Get all sockets
|
||||
uint32_t socket_count = 0;
|
||||
amdsmi_socket_handle* sockets = nullptr;
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Total Socket: " << socket_count << std::endl;
|
||||
|
||||
// For each socket, get identifier and devices
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_name[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], socket_name, 128);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Socket " << socket_name << std::endl;
|
||||
|
||||
// Get all devices of the socket
|
||||
uint32_t device_count = 0;
|
||||
amdsmi_device_handle* device_handles = nullptr;
|
||||
ret = amdsmi_get_device_handles(sockets[i],
|
||||
&device_count, &device_handles);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// For each device of the socket, get name and temperature.
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMDSMI_INIT_AMD_GPUS, the device_type must be AMD_GPU.
|
||||
device_type_t device_type;
|
||||
ret = amdsmi_get_device_type(device_handles[j], &device_type);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (device_type != AMD_GPU) {
|
||||
std::cout << "Expect AMD_GPU device type!\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Get device name
|
||||
amdsmi_board_info board_info;
|
||||
ret = amdsmi_get_board_info(device_handles[j], &board_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "\tdevice "
|
||||
<< j << "\n\t\tName:" << board_info.product_name << std::endl;
|
||||
|
||||
// Get temperature
|
||||
int64_t val_i64 = 0;
|
||||
ret = amdsmi_dev_temp_metric_get(device_handles[j], 0,
|
||||
AMDSMI_TEMP_CURRENT, &val_i64);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "\t\tTemperature: " << val_i64/1000 << "C" << std::endl;
|
||||
|
||||
// Get frame buffer
|
||||
amdsmi_vram_info_t vram_usage;
|
||||
ret = amdsmi_get_vram_usage(device_handles[j], &vram_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "\t\tFrame buffer usage (MB): " << vram_usage.vram_used << "/"
|
||||
<< vram_usage.vram_total << std::endl;
|
||||
|
||||
// Get Cap info
|
||||
amdsmi_gpu_caps_t caps_info = {};
|
||||
ret = amdsmi_get_caps_info(device_handles[j], &caps_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "\t\tGFX IP Major: " << caps_info.gfx.gfxip_major << "\n";
|
||||
std::cout << "\t\tGFX IP Minor: " << caps_info.gfx.gfxip_minor << "\n";
|
||||
std::cout << "\t\tCU IP Count: " << caps_info.gfx.gfxip_cu_count << "\n";
|
||||
std::cout << "\t\tDMA IP Count: " << caps_info.dma_ip_count << "\n";
|
||||
std::cout << "\t\tGFX IP Count: " << caps_info.gfx_ip_count << "\n";
|
||||
std::cout << "\t\tMM IP Count: " << int(caps_info.mm.mm_ip_count) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
|
||||
// and devices pointers
|
||||
ret = amdsmi_shut_down();
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,335 @@
|
||||
/*
|
||||
* =============================================================================
|
||||
* The University of Illinois/NCSA
|
||||
* Open Source License (NCSA)
|
||||
*
|
||||
* Copyright (c) 2022, Advanced Micro Devices, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Developed by:
|
||||
*
|
||||
* AMD Research and AMD ROC Software Development
|
||||
*
|
||||
* Advanced Micro Devices, Inc.
|
||||
*
|
||||
* www.amd.com
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal with the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimers.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimers in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the names of <Name of Development Group, Name of Institution>,
|
||||
* nor the names of its contributors may be used to endorse or promote
|
||||
* products derived from this Software without specific prior written
|
||||
* permission.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS WITH THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "amd_smi/amd_smi.h"
|
||||
#include <bitset>
|
||||
#include <iostream>
|
||||
#include <pwd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <vector>
|
||||
|
||||
#define CHK_AMDSMI_RET(RET) \
|
||||
{ \
|
||||
if (RET != AMDSMI_STATUS_SUCCESS) { \
|
||||
const char *err_str; \
|
||||
std::cout << "AMDSMI call returned " << RET << " at line " \
|
||||
<< __LINE__ << std::endl; \
|
||||
amdsmi_status_string(RET, &err_str); \
|
||||
std::cout << err_str << std::endl; \
|
||||
return RET; \
|
||||
} \
|
||||
}
|
||||
|
||||
int main() {
|
||||
amdsmi_status_t ret;
|
||||
|
||||
// Init amdsmi for sockets and devices.
|
||||
// Here we are only interested in AMD_GPUS.
|
||||
ret = amdsmi_init(AMDSMI_INIT_AMD_GPUS);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// Get all sockets
|
||||
uint32_t socket_count = 0;
|
||||
amdsmi_socket_handle *sockets = nullptr;
|
||||
ret = amdsmi_get_socket_handles(&socket_count, &sockets);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Total Socket: " << socket_count << std::endl;
|
||||
|
||||
// For each socket, get identifier and devices
|
||||
for (uint32_t i = 0; i < socket_count; i++) {
|
||||
// Get Socket info
|
||||
char socket_name[128];
|
||||
ret = amdsmi_get_socket_info(sockets[i], socket_name, 128);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
std::cout << "Socket " << socket_name << std::endl;
|
||||
|
||||
// Get all devices of the socket
|
||||
uint32_t device_count = 0;
|
||||
amdsmi_device_handle *device_handles = nullptr;
|
||||
ret = amdsmi_get_device_handles(sockets[i], &device_count,
|
||||
&device_handles);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
// For each device of the socket, get name and temperature.
|
||||
for (uint32_t j = 0; j < device_count; j++) {
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMD_SMI_INIT_AMD_GPUS, the device_type must be AMD_GPU.
|
||||
device_type_t device_type = {};
|
||||
ret = amdsmi_get_device_type(device_handles[j], &device_type);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (device_type != AMD_GPU) {
|
||||
std::cout << "Expect AMD_GPU device type!\n";
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
amdsmi_bdf_t bdf = {};
|
||||
ret = amdsmi_get_device_bdf(device_handles[j], &bdf);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_device_bdf:\n");
|
||||
printf("\tDevice[%d] BDF %04x:%02x:%02x.%d\n\n", i,
|
||||
bdf.domain_number, bdf.bus_number, bdf.device_number,
|
||||
bdf.function_number);
|
||||
|
||||
amdsmi_asic_info_t asic_info = {};
|
||||
ret = amdsmi_get_asic_info(device_handles[j], &asic_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_asic_info:\n");
|
||||
printf("\tMarket Name: %s\n", asic_info.market_name);
|
||||
printf("\tFamilyID: 0x%x\n", asic_info.family);
|
||||
printf("\tDeviceID: 0x%x\n", asic_info.device_id);
|
||||
printf("\tVendorID: 0x%x\n", asic_info.vendor_id);
|
||||
printf("\tRevisionID: 0x%x\n", asic_info.rev_id);
|
||||
printf("\tAsic serial: 0x%s\n\n", asic_info.asic_serial);
|
||||
|
||||
// Get VBIOS info
|
||||
amdsmi_vbios_info_t vbios_info = {};
|
||||
ret = amdsmi_get_vbios_info(device_handles[j], &vbios_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_vbios_info:\n");
|
||||
printf("\tVBios Name: %s\n", vbios_info.name);
|
||||
printf("\tBuild Date: %s\n", vbios_info.build_date);
|
||||
printf("\tPart Number: %s\n", vbios_info.part_number);
|
||||
printf("\tVBios Version: %d\n", vbios_info.vbios_version);
|
||||
printf("\tVBios Version String: %s\n\n",
|
||||
vbios_info.vbios_version_string);
|
||||
|
||||
// Get engine usage info
|
||||
amdsmi_engine_usage_t engine_usage = {};
|
||||
ret = amdsmi_get_gpu_activity(device_handles[j], &engine_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_gpu_activity:\n");
|
||||
printf("\tAverage GFX Activity: %d\n",
|
||||
engine_usage.average_gfx_activity);
|
||||
printf("\tAverage MM Activity: %d\n",
|
||||
engine_usage.average_mm_activity[0]);
|
||||
printf("\tAverage UMC Activity: %d\n\n",
|
||||
engine_usage.average_umc_activity);
|
||||
|
||||
// Get firmware info
|
||||
amdsmi_fw_info_t fw_information = {};
|
||||
ret = amdsmi_get_fw_info(device_handles[j], &fw_information);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_fw_info:\n");
|
||||
printf("\tFirmware version: %d\n", fw_information.num_fw_info);
|
||||
printf("\tSMU: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_SMU]
|
||||
.fw_version);
|
||||
printf("\tSMC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_SMC]
|
||||
.fw_version);
|
||||
printf("\tVCN: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_VCN]
|
||||
.fw_version);
|
||||
printf("\tCP_ME: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_ME]
|
||||
.fw_version);
|
||||
printf("\tCP_PFP: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_PFP]
|
||||
.fw_version);
|
||||
printf("\tCP_CE: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_CE]
|
||||
.fw_version);
|
||||
printf("\tRLC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_RLC]
|
||||
.fw_version);
|
||||
printf("\tCP_MEC1: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_MEC1]
|
||||
.fw_version);
|
||||
printf("\tCP_MEC2: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_CP_MEC2]
|
||||
.fw_version);
|
||||
printf("\tSDMA0: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_SDMA0]
|
||||
.fw_version);
|
||||
printf("\tMC: %ld\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_MC]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST CNTL: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::FW_ID_RLC_RESTORE_LIST_CNTL]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST GPM MEM: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::FW_ID_RLC_RESTORE_LIST_GPM_MEM]
|
||||
.fw_version);
|
||||
printf("\tRLC RESTORE LIST SRM MEM: %ld\n",
|
||||
fw_information
|
||||
.fw_info_list
|
||||
[amdsmi_fw_block_t::FW_ID_RLC_RESTORE_LIST_SRM_MEM]
|
||||
.fw_version);
|
||||
printf(
|
||||
"\tPSP SOSDRV: %ld\n\n",
|
||||
fw_information.fw_info_list[amdsmi_fw_block_t::FW_ID_PSP_SOSDRV]
|
||||
.fw_version);
|
||||
|
||||
// Get temperature measurements
|
||||
amdsmi_temperature_t temp_measurements[4];
|
||||
amdsmi_temperature_type_t temp_types[4] = {
|
||||
TEMPERATURE_TYPE_EDGE, TEMPERATURE_TYPE_JUNCTION,
|
||||
TEMPERATURE_TYPE_VRAM, TEMPERATURE_TYPE_PLX};
|
||||
for (const auto &temp_type : temp_types) {
|
||||
ret = amdsmi_get_temperature_measure(
|
||||
device_handles[j], temp_type,
|
||||
&temp_measurements[(int)(temp_type)]);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
}
|
||||
printf(" Output of amdsmi_get_temperature_measure:\n");
|
||||
printf("\tGPU Edge temp measurement: %d\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_EDGE].cur_temp);
|
||||
printf("\tGPU Junction temp measurement: %d\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_JUNCTION].cur_temp);
|
||||
printf("\tGPU VRAM temp measurement: %d\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_VRAM].cur_temp);
|
||||
printf("\tGPU PLX temp measurement: %d\n\n",
|
||||
temp_measurements[TEMPERATURE_TYPE_PLX].cur_temp);
|
||||
|
||||
// Get bad pages
|
||||
char bad_page_status_names[3][15] = {"RESERVED", "PENDING",
|
||||
"UNRESERVABLE"};
|
||||
uint32_t num_pages = 0;
|
||||
ret = amdsmi_get_bad_page_info(device_handles[j], &num_pages,
|
||||
nullptr);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_bad_page_info:\n");
|
||||
if (!num_pages) {
|
||||
printf("\tNo bad pages found.\n");
|
||||
} else {
|
||||
amdsmi_retired_page_record_t bad_page_info[num_pages] = {};
|
||||
ret = amdsmi_get_bad_page_info(device_handles[j], &num_pages,
|
||||
bad_page_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
for (uint32_t page_it = 0; page_it < num_pages; page_it += 1) {
|
||||
printf(" Page[%d]\n", page_it);
|
||||
printf("\tAddress: %lu\n",
|
||||
bad_page_info[page_it].page_address);
|
||||
printf("\tSize: %lu\n", bad_page_info[page_it].page_size);
|
||||
printf(
|
||||
"\tStatus: %s\n",
|
||||
bad_page_status_names[bad_page_info[page_it].status]);
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
// Get ECC error counts
|
||||
amdsmi_error_count_t err_cnt_info = {};
|
||||
ret = amdsmi_get_ecc_error_count(device_handles[j], &err_cnt_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_ecc_error_count:\n");
|
||||
printf("\tCorrectable errors: %lu\n", err_cnt_info.correctable_count);
|
||||
printf("\tUncorrectable errors: %lu\n\n",
|
||||
err_cnt_info.uncorrectable_count);
|
||||
// Get process list
|
||||
auto compare = [](const void *a, const void *b) -> int {
|
||||
return (*(amdsmi_proc_info_t *)a).pid >
|
||||
(*(amdsmi_proc_info_t *)b).pid
|
||||
? 1
|
||||
: -1;
|
||||
};
|
||||
|
||||
// Get device name
|
||||
amdsmi_board_info board_info = {};
|
||||
ret = amdsmi_get_board_info(device_handles[j], &board_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_board_info:\n");
|
||||
std::cout << "\tdevice [" << j
|
||||
<< "]\n\t\tProduct name: " << board_info.product_name
|
||||
<< "\n"
|
||||
<< "\t\tProduct number: " << board_info.product_serial
|
||||
<< "\n"
|
||||
<< "\t\tSerial number: " << board_info.serial_number
|
||||
<< "\n\n";
|
||||
|
||||
// Get temperature
|
||||
int64_t val_i64 = 0;
|
||||
ret = amdsmi_dev_temp_metric_get(device_handles[j], 0,
|
||||
AMDSMI_TEMP_CURRENT, &val_i64);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_dev_temp_metric_get:\n");
|
||||
std::cout << "\t\tTemperature: " << val_i64 / 1000 << "C"
|
||||
<< "\n\n";
|
||||
|
||||
// Get frame buffer
|
||||
amdsmi_vram_info_t vram_usage = {};
|
||||
ret = amdsmi_get_vram_usage(device_handles[j], &vram_usage);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_vram_usage:\n");
|
||||
std::cout << "\t\tFrame buffer usage (MB): " << vram_usage.vram_used
|
||||
<< "/" << vram_usage.vram_total << "\n\n";
|
||||
|
||||
// Get Cap info
|
||||
amdsmi_gpu_caps_t caps_info = {};
|
||||
ret = amdsmi_get_caps_info(device_handles[j], &caps_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_caps_info:\n");
|
||||
std::cout << "\t\tGFX IP Major: " << caps_info.gfx.gfxip_major
|
||||
<< "\n"
|
||||
<< "\t\tGFX IP Minor: " << caps_info.gfx.gfxip_minor
|
||||
<< "\n"
|
||||
<< "\t\tCU IP Count: " << caps_info.gfx.gfxip_cu_count
|
||||
<< "\n"
|
||||
<< "\t\tDMA IP Count: " << caps_info.dma_ip_count << "\n"
|
||||
<< "\t\tGFX IP Count: " << caps_info.gfx_ip_count << "\n"
|
||||
<< "\t\tMM IP Count: " << int(caps_info.mm.mm_ip_count)
|
||||
<< "\n\n";
|
||||
|
||||
amdsmi_power_cap_info cap_info = {};
|
||||
ret = amdsmi_get_power_cap_info(device_handles[j], 0, &cap_info);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
printf(" Output of amdsmi_get_power_cap_info:\n");
|
||||
std::cout << "\t\t Power Cap: " << cap_info.power_cap / 1000000
|
||||
<< "W\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up resources allocated at amdsmi_init. It will invalidate sockets
|
||||
// and devices pointers
|
||||
ret = amdsmi_shut_down();
|
||||
CHK_AMDSMI_RET(ret)
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -104,7 +104,7 @@ typedef enum device_type {
|
||||
/**
|
||||
* @brief Error codes retured by amd_smi_lib functions
|
||||
*/
|
||||
typedef enum amdsmi_status {
|
||||
typedef enum amdsmi_status_t {
|
||||
AMDSMI_STATUS_SUCCESS = 0, /**< Call succeeded */
|
||||
AMDSMI_STATUS_INVAL, /**< Invalid parameters */
|
||||
AMDSMI_STATUS_NOT_SUPPORTED, /**< Command not supported */
|
||||
@@ -334,15 +334,15 @@ typedef struct amdsmi_board_info {
|
||||
} amdsmi_board_info_t;
|
||||
|
||||
typedef struct amdsmi_temperature {
|
||||
uint16_t cur_temp;
|
||||
uint32_t cur_temp;
|
||||
} amdsmi_temperature_t;
|
||||
|
||||
typedef struct amdsmi_temperature_limit {
|
||||
uint16_t limit;
|
||||
uint32_t limit;
|
||||
} amdsmi_temperature_limit_t;
|
||||
|
||||
typedef struct amdsmi_power_limit {
|
||||
uint16_t limit;
|
||||
uint32_t limit;
|
||||
} amdsmi_power_limit_t;
|
||||
|
||||
typedef struct amdsmi_power_measure {
|
||||
@@ -369,17 +369,22 @@ typedef struct amdsmi_engine_usage {
|
||||
typedef uint32_t amdsmi_process_handle;
|
||||
|
||||
typedef struct amdsmi_process_info {
|
||||
char name[AMDSMI_NORMAL_STRING_LENGTH];
|
||||
amdsmi_process_handle pid;
|
||||
uint64_t mem; /** in bytes */
|
||||
char name[AMDSMI_NORMAL_STRING_LENGTH];
|
||||
amdsmi_process_handle pid;
|
||||
uint64_t mem; /** in bytes */
|
||||
struct {
|
||||
uint16_t gfx[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t compute[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t sdma[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t enc[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t dec[AMDSMI_MAX_MM_IP_COUNT];
|
||||
} engine_usage; /** percentage 0-100% times 100 */
|
||||
struct {
|
||||
uint16_t gfx[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t compute[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t sdma[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t enc[AMDSMI_MAX_MM_IP_COUNT];
|
||||
uint16_t dec[AMDSMI_MAX_MM_IP_COUNT];
|
||||
} usage; /** percentage 0-100% times 100 */
|
||||
char container_name[AMDSMI_NORMAL_STRING_LENGTH];
|
||||
uint64_t gtt_mem;
|
||||
uint64_t cpu_mem;
|
||||
uint64_t vram_mem;
|
||||
} memory_usage; /** in bytes */
|
||||
char container_name[AMDSMI_NORMAL_STRING_LENGTH];
|
||||
} amdsmi_proc_info_t;
|
||||
|
||||
//! Guaranteed maximum possible number of supported frequencies
|
||||
@@ -1071,8 +1076,8 @@ typedef struct {
|
||||
* @brief This structure holds error counts.
|
||||
*/
|
||||
typedef struct {
|
||||
uint64_t correctable_err; //!< Accumulated correctable errors
|
||||
uint64_t uncorrectable_err; //!< Accumulated uncorrectable errors
|
||||
uint64_t correctable_count; //!< Accumulated correctable errors
|
||||
uint64_t uncorrectable_count; //!< Accumulated uncorrectable errors
|
||||
} amdsmi_error_count_t;
|
||||
|
||||
/**
|
||||
@@ -1795,7 +1800,50 @@ amdsmi_dev_memory_total_get(amdsmi_device_handle device_handle, amdsmi_memory_ty
|
||||
amdsmi_status_t
|
||||
amdsmi_dev_memory_usage_get(amdsmi_device_handle device_handle, amdsmi_memory_type_t mem_type,
|
||||
uint64_t *used);
|
||||
|
||||
/**
|
||||
* @brief The first call to this API returns the number of bad pages which
|
||||
* should be used to allocate the buffer that should contain the bad page
|
||||
* records.
|
||||
* @details This call will query the device @p device_handle for the
|
||||
* number of bad pages (written to @p num_pages address). The results are
|
||||
* written to address held by the @p info pointer.
|
||||
* @param[in] device_handle a device handle
|
||||
* @param[out] num_pages Number of bad page records.
|
||||
* @param[out] info Pointer to amdsmi_retired_page_record_t to which the
|
||||
* results will be written to.
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::AMDSMI_STATUS_INVAL the parameters are not valid or nullptr
|
||||
* @retval ::AMDSMI_STATUS_NOT_SUPPORTED API not supported
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_get_bad_page_info(amdsmi_device_handle device_handle, uint32_t *num_pages, amdsmi_retired_page_record_t *info);
|
||||
/**
|
||||
* @brief Returns if RAS features are enabled or disabled for given block
|
||||
*
|
||||
* @details Given a device handle @p device_handle, this function queries the
|
||||
* state of RAS features for a specific block @p block. Result will be written
|
||||
* to address held by pointer @p state.
|
||||
*
|
||||
* @param[in] device_handle Device handle which to query
|
||||
*
|
||||
* @param[in] block Block which to query
|
||||
*
|
||||
* @param[inout] state A pointer to amdsmi_ras_err_state_t to which the state
|
||||
* of block will be written.
|
||||
* If this parameter is nullptr, this function will return
|
||||
* ::AMDSMI_STATUS_INVALID_ARGS if the function is supported with the provided
|
||||
* arguments and ::AMDSMI_STATUS_NOT_SUPPORTED if it is not supported with the
|
||||
* provided arguments.
|
||||
*
|
||||
* @retval ::AMDSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::AMDSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments.
|
||||
* @retval ::AMDSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
*
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_get_ras_features_enabled(amdsmi_device_handle device_handle, amdsmi_gpu_block block,
|
||||
amdsmi_ras_err_state_t *state);
|
||||
/**
|
||||
* @brief Get percentage of time any device memory is being used
|
||||
*
|
||||
@@ -3676,11 +3724,11 @@ amdsmi_status_t amdsmi_event_notification_stop(amdsmi_device_handle device_handl
|
||||
* \param [out] bdf - Reference to BDF. Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - Parameters are invalid
|
||||
* * -::SMI_ERR_NOT_FOUND - Device cannot be found
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are invalid
|
||||
* * -::AMDSMI_STATUS_NOT_FOUND - Device cannot be found
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_get_device_bdf(amdsmi_device_handle dev, amdsmi_bdf_t *bdf);
|
||||
@@ -3698,13 +3746,13 @@ amdsmi_get_device_bdf(amdsmi_device_handle dev, amdsmi_bdf_t *bdf);
|
||||
* allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - Parameters are invalid
|
||||
* * -::SMI_ERR_NOT_FOUND - Device cannot be found
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are invalid
|
||||
* * -::AMDSMI_STATUS_NOT_FOUND - Device cannot be found
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_device_uuid(amdsmi_device_handle dev, unsigned int *uuid_length, char *uuid);
|
||||
|
||||
/** @} */
|
||||
@@ -3726,13 +3774,13 @@ amdsmi_get_device_uuid(amdsmi_device_handle dev, unsigned int *uuid_length, char
|
||||
* allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NOT_FOUND - Device cannot be found
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NOT_FOUND - Device cannot be found
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_driver_version(amdsmi_device_handle dev, int *length, char *version);
|
||||
|
||||
/** @} */
|
||||
@@ -3750,15 +3798,15 @@ amdsmi_get_driver_version(amdsmi_device_handle dev, int *length, char *version);
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_asic_info(amdsmi_device_handle dev, amdsmi_asic_info_t *info);
|
||||
|
||||
/**
|
||||
@@ -3770,15 +3818,15 @@ amdsmi_get_asic_info(amdsmi_device_handle dev, amdsmi_asic_info_t *info);
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialize
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialize
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_board_info(amdsmi_device_handle dev, amdsmi_board_info_t *info);
|
||||
|
||||
/**
|
||||
@@ -3792,15 +3840,15 @@ amdsmi_get_board_info(amdsmi_device_handle dev, amdsmi_board_info_t *info);
|
||||
* allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_power_cap_info(amdsmi_device_handle dev, uint32_t sensor_ind,
|
||||
amdsmi_power_cap_info_t *info);
|
||||
|
||||
@@ -3814,15 +3862,15 @@ amdsmi_get_power_cap_info(amdsmi_device_handle dev, uint32_t sensor_ind,
|
||||
*
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_xgmi_info(amdsmi_device_handle dev, amdsmi_xgmi_info_t *info);
|
||||
|
||||
/**
|
||||
@@ -3835,15 +3883,15 @@ amdsmi_get_xgmi_info(amdsmi_device_handle dev, amdsmi_xgmi_info_t *info);
|
||||
* allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_caps_info(amdsmi_device_handle dev, amdsmi_gpu_caps_t *info);
|
||||
|
||||
/** @} */
|
||||
@@ -3860,15 +3908,15 @@ amdsmi_get_caps_info(amdsmi_device_handle dev, amdsmi_gpu_caps_t *info);
|
||||
* \param [out] info - Reference to the fw info. Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_fw_info(amdsmi_device_handle dev, amdsmi_fw_info_t *info);
|
||||
|
||||
/**
|
||||
@@ -3880,15 +3928,15 @@ amdsmi_get_fw_info(amdsmi_device_handle dev, amdsmi_fw_info_t *info);
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_vbios_info(amdsmi_device_handle dev, amdsmi_vbios_info_t *info);
|
||||
|
||||
/** @} */
|
||||
@@ -3906,15 +3954,15 @@ amdsmi_get_vbios_info(amdsmi_device_handle dev, amdsmi_vbios_info_t *info);
|
||||
* \param [out] info - Reference to the gpu engine usage structure. Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_gpu_activity(amdsmi_device_handle dev, amdsmi_engine_usage_t *info);
|
||||
|
||||
/**
|
||||
@@ -3926,15 +3974,15 @@ amdsmi_get_gpu_activity(amdsmi_device_handle dev, amdsmi_engine_usage_t *info);
|
||||
* \param [out] info - Reference to the gpu power structure. Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_power_measure(amdsmi_device_handle dev, amdsmi_power_measure_t *info);
|
||||
|
||||
/**
|
||||
@@ -3950,15 +3998,15 @@ amdsmi_get_power_measure(amdsmi_device_handle dev, amdsmi_power_measure_t *info)
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_clock_measure(amdsmi_device_handle dev, amdsmi_clk_type_t clk_type, amdsmi_clock_measure_t *info);
|
||||
|
||||
/**
|
||||
@@ -3973,15 +4021,15 @@ amdsmi_get_clock_measure(amdsmi_device_handle dev, amdsmi_clk_type_t clk_type, a
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_temperature_measure(amdsmi_device_handle dev, amdsmi_temperature_type_t temp_type, amdsmi_temperature_t *info);
|
||||
|
||||
/**
|
||||
@@ -3996,16 +4044,16 @@ amdsmi_get_temperature_measure(amdsmi_device_handle dev, amdsmi_temperature_type
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_get_temperature_limit(amdsmi_device_handle dev, amdsmi_temperature_t temp_type, amdsmi_temperature_limit_t *limit);
|
||||
amdsmi_status_t
|
||||
amdsmi_get_temperature_limit(amdsmi_device_handle dev, amdsmi_temperature_type_t temp_type, amdsmi_temperature_limit_t *limit);
|
||||
|
||||
/**
|
||||
* \brief Returns power limit of the GPU.
|
||||
@@ -4017,15 +4065,15 @@ amdsmi_get_temperature_limit(amdsmi_device_handle dev, amdsmi_temperature_t temp
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_power_limit(amdsmi_device_handle dev, amdsmi_power_limit_t *limit);
|
||||
|
||||
/**
|
||||
@@ -4039,15 +4087,15 @@ amdsmi_get_power_limit(amdsmi_device_handle dev, amdsmi_power_limit_t *limit);
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_vram_usage(amdsmi_device_handle dev, amdsmi_vram_info_t *info);
|
||||
|
||||
/** @} */
|
||||
@@ -4072,15 +4120,15 @@ amdsmi_get_vram_usage(amdsmi_device_handle dev, amdsmi_vram_info_t *info);
|
||||
*
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialize
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_INVAL - Parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialize
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_INVAL - Parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_target_frequency_range(amdsmi_device_handle dev, amdsmi_clk_type_t clk_type, amdsmi_frequency_range_t *range);
|
||||
|
||||
/** @} */
|
||||
@@ -4109,13 +4157,13 @@ amdsmi_get_target_frequency_range(amdsmi_device_handle dev, amdsmi_clk_type_t cl
|
||||
* in list or the number of running processes if equal to 0.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - The parameters are not valid or NULL
|
||||
* * -::SMI_ERR_NOMEM - Provided buffer is not large enough
|
||||
* * -::SMI_ERR_NOT_SUPPORTED - API not supported
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - The parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_NOMEM - Provided buffer is not large enough
|
||||
* * -::AMDSMI_STATUS_NOT_SUPPORTED - API not supported
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_process_list(amdsmi_device_handle dev, amdsmi_process_handle *list, uint32_t *max_processes);
|
||||
|
||||
/**
|
||||
@@ -4129,12 +4177,12 @@ amdsmi_get_process_list(amdsmi_device_handle dev, amdsmi_process_handle *list, u
|
||||
* information. Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - The parameters are not valid or NULL
|
||||
* * -::SMI_ERR_NOT_SUPPORTED - API not supported
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - The parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_NOT_SUPPORTED - API not supported
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_process_info(amdsmi_device_handle dev, amdsmi_process_handle process, amdsmi_proc_info_t *info);
|
||||
|
||||
/** @} */
|
||||
@@ -4154,15 +4202,15 @@ amdsmi_get_process_info(amdsmi_device_handle dev, amdsmi_process_handle process,
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* \return
|
||||
* * ::SMI_SUCCESS - Successful
|
||||
* * -::SMI_ERR_RETRY - Device is busy. Please retry
|
||||
* * -::SMI_ERR_NO_PERM - Library was not initialized
|
||||
* * -::SMI_ERR_INVAL - The parameters are not valid or NULL
|
||||
* * -::SMI_ERR_IO - Device is in an unrecoverable state
|
||||
* * -::SMI_ERR_NOT_INIT - Device is uninitialized
|
||||
* * -::SMI_ERR_API_FAILED - Other errors
|
||||
* * ::AMDSMI_STATUS_SUCCESS - Successful
|
||||
* * -::AMDSMI_STATUS_RETRY - Device is busy. Please retry
|
||||
* * -::AMDSMI_STATUS_NO_PERM - Library was not initialized
|
||||
* * -::AMDSMI_STATUS_INVAL - The parameters are not valid or NULL
|
||||
* * -::AMDSMI_STATUS_IO - Device is in an unrecoverable state
|
||||
* * -::AMDSMI_STATUS_NOT_INIT - Device is uninitialized
|
||||
* * -::AMDSMI_STATUS_API_FAILED - Other errors
|
||||
*/
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_ecc_error_count(amdsmi_device_handle dev, amdsmi_error_count_t *ec);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -58,7 +58,13 @@ class AMDSmiDrm {
|
||||
public:
|
||||
amdsmi_status_t init();
|
||||
amdsmi_status_t cleanup();
|
||||
int get_drm_fd_by_index(uint32_t gpu_index) const;
|
||||
amdsmi_status_t get_drm_fd_by_index(uint32_t gpu_index, uint32_t *fd_info) const;
|
||||
amdsmi_status_t get_bdf_by_index(uint32_t gpu_index, amdsmi_bdf_t *bdf_info) const;
|
||||
amdsmi_status_t get_drm_path_by_index(uint32_t gpu_index, std::string *drm_path) const;
|
||||
std::vector<amdsmi_bdf_t> get_bdfs();
|
||||
std::vector<std::string>& get_drm_paths();
|
||||
bool check_if_drm_is_supported();
|
||||
|
||||
amdsmi_status_t amdgpu_query_info(int fd, unsigned info_id,
|
||||
unsigned size, void *value);
|
||||
amdsmi_status_t amdgpu_query_fw(int fd, unsigned info_id, unsigned fw_type,
|
||||
@@ -70,6 +76,9 @@ class AMDSmiDrm {
|
||||
private:
|
||||
using DrmCmdWriteFunc = int (*)(int, unsigned long, void *, unsigned long);
|
||||
std::vector<int> drm_fds_; // drm file descriptor by gpu_index
|
||||
std::vector<std::string> drm_paths_; // drm path (renderD128 for example)
|
||||
std::vector<amdsmi_bdf_t> drm_bdfs_; // bdf
|
||||
|
||||
AMDSmiLibraryLoader lib_loader_; // lazy load libdrm
|
||||
DrmCmdWriteFunc drm_cmd_write_; // drmCommandWrite
|
||||
std::mutex drm_mutex_;
|
||||
|
||||
@@ -47,16 +47,32 @@
|
||||
#include "amd_smi/amd_smi.h"
|
||||
#include "amd_smi/impl/amd_smi_device.h"
|
||||
#include "amd_smi/impl/amd_smi_drm.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
|
||||
namespace amd {
|
||||
namespace smi {
|
||||
|
||||
class AMDSmiGPUDevice: public AMDSmiDevice {
|
||||
public:
|
||||
explicit AMDSmiGPUDevice(uint32_t gpu_id, AMDSmiDrm& drm):
|
||||
AMDSmiDevice(AMD_GPU), gpu_id_(gpu_id), drm_(drm) {}
|
||||
AMDSmiGPUDevice(uint32_t gpu_id, uint32_t fd, std::string path, amdsmi_bdf_t bdf, AMDSmiDrm& drm):
|
||||
AMDSmiDevice(AMD_GPU), gpu_id_(gpu_id), fd_(fd), path_(path), bdf_(bdf), drm_(drm) {}
|
||||
|
||||
AMDSmiGPUDevice(uint32_t gpu_id, AMDSmiDrm& drm):
|
||||
AMDSmiDevice(AMD_GPU), gpu_id_(gpu_id), drm_(drm) {
|
||||
if (check_if_drm_is_supported()) this->get_drm_data();
|
||||
}
|
||||
~AMDSmiGPUDevice() {
|
||||
if (check_if_drm_is_supported()) shared_mutex_close(mutex_);
|
||||
}
|
||||
|
||||
amdsmi_status_t get_drm_data();
|
||||
pthread_mutex_t* get_mutex();
|
||||
uint32_t get_gpu_id() const;
|
||||
uint32_t get_gpu_fd() const;
|
||||
std::string& get_gpu_path();
|
||||
amdsmi_bdf_t get_bdf();
|
||||
bool check_if_drm_is_supported() { return drm_.check_if_drm_is_supported(); }
|
||||
|
||||
amdsmi_status_t amdgpu_query_info(unsigned info_id,
|
||||
unsigned size, void *value) const;
|
||||
amdsmi_status_t amdgpu_query_hw_ip(unsigned info_id, unsigned hw_ip_type,
|
||||
@@ -66,7 +82,11 @@ class AMDSmiGPUDevice: public AMDSmiDevice {
|
||||
amdsmi_status_t amdgpu_query_vbios(void *info) const;
|
||||
private:
|
||||
uint32_t gpu_id_;
|
||||
uint32_t fd_;
|
||||
std::string path_;
|
||||
amdsmi_bdf_t bdf_;
|
||||
AMDSmiDrm& drm_;
|
||||
shared_mutex_t mutex_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ class AMDSmiSocket {
|
||||
const std::string& get_socket_id() const { return socket_identifier_;}
|
||||
void add_device(AMDSmiDevice* device) { devices_.push_back(device); }
|
||||
std::vector<AMDSmiDevice*>& get_devices() { return devices_;}
|
||||
amdsmi_status_t get_device_count(uint32_t* device_count) const;
|
||||
private:
|
||||
std::string socket_identifier_;
|
||||
std::vector<AMDSmiDevice*> devices_;
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
/* * Copyright (C) 2022 Advanced Micro Devices. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
|
||||
#define AMD_SMI_INCLUDE_AMD_SMI_UTILS_H_
|
||||
|
||||
#include "amd_smi/amd_smi.h"
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "rocm_smi/rocm_smi_utils.h"
|
||||
|
||||
|
||||
#define SMIGPUDEVICE_MUTEX(MUTEX) \
|
||||
amd::smi::pthread_wrap _pw(*(MUTEX)); \
|
||||
amd::smi::ScopedPthread _lock(_pw, true); \
|
||||
if (_lock.mutex_not_acquired()) { \
|
||||
return AMDSMI_STATUS_BUSY; \
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice* device, std::string* full_path);
|
||||
amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amdsmi_board_info_t *info);
|
||||
amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, int *cap);
|
||||
amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_clk_type_t domain, int *max_freq, int *min_freq, int *num_dpm);
|
||||
amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device, uint64_t *enabled_blocks);
|
||||
amdsmi_status_t smi_amdgpu_get_bad_page_info(amd::smi::AMDSmiGPUDevice* device, uint32_t *num_pages, amdsmi_retired_page_record_t *info);
|
||||
amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device, amdsmi_error_count_t *err_cnt);
|
||||
|
||||
#endif //
|
||||
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __FDINFO__
|
||||
#define __FDINFO__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
amdsmi_status_t gpuvsmi_get_pids(const amdsmi_bdf_t &bdf, std::vector<long int> &pids, uint64_t *size);
|
||||
amdsmi_status_t gpuvsmi_get_pid_info(const amdsmi_bdf_t &bdf, long int pid, amdsmi_proc_info_t &info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -50,12 +50,14 @@
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <xf86drm.h>
|
||||
#include "amd_smi/amd_smi.h"
|
||||
#include "amd_smi/impl/fdinfo.h"
|
||||
#include "amd_smi/impl/amd_smi_common.h"
|
||||
#include "amd_smi/impl/amd_smi_system.h"
|
||||
#include "amd_smi/impl/amd_smi_socket.h"
|
||||
@@ -63,6 +65,7 @@
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_common.h"
|
||||
#include "amd_smi/impl/amdgpu_drm.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
|
||||
// TODO(bliu): One to one map to all status code
|
||||
static amdsmi_status_t rsmi_to_amdsmi_status(rsmi_status_t status) {
|
||||
@@ -178,7 +181,9 @@ amdsmi_status_t amdsmi_get_device_handles(amdsmi_socket_handle socket_handle,
|
||||
.handle_to_socket(socket_handle, &socket);
|
||||
if (r != AMDSMI_STATUS_SUCCESS) return r;
|
||||
|
||||
*device_count = static_cast<uint32_t>(socket->get_devices().size());
|
||||
r = socket->get_device_count(device_count);
|
||||
if (r != AMDSMI_STATUS_SUCCESS) return r;
|
||||
|
||||
*device_handles = reinterpret_cast<amdsmi_device_handle*>(
|
||||
socket->get_devices().data());
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
@@ -198,54 +203,44 @@ amdsmi_status_t amdsmi_get_device_type(amdsmi_device_handle device_handle ,
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_get_board_info(amdsmi_device_handle device_handle,
|
||||
amdsmi_board_info_t *info) {
|
||||
if (info == NULL) {
|
||||
amdsmi_status_t
|
||||
amdsmi_get_device_bdf(amdsmi_device_handle device_handle, amdsmi_bdf_t *bdf) {
|
||||
|
||||
if (bdf == NULL) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
memset(info, 0, sizeof(amdsmi_board_info_t));
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
|
||||
|
||||
// ignore errors so that if the function is not supported,
|
||||
// it will continue to add other info.
|
||||
auto r = rsmi_wrapper(rsmi_dev_name_get, device_handle,
|
||||
info->product_name, AMDSMI_PRODUCT_NAME_LENGTH);
|
||||
|
||||
r = rsmi_wrapper(rsmi_dev_serial_number_get, device_handle,
|
||||
info->product_serial, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
if (gpu_device->check_if_drm_is_supported()) {
|
||||
*bdf = gpu_device->get_bdf();
|
||||
}
|
||||
else {
|
||||
//TODO
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// TODO(bliu) : add other asic info
|
||||
amdsmi_status amdsmi_get_asic_info(amdsmi_device_handle device_handle,
|
||||
amdsmi_asic_info_t *info) {
|
||||
if (info == nullptr)
|
||||
amdsmi_status_t amdsmi_get_board_info(amdsmi_device_handle device_handle, amdsmi_board_info_t *board_info) {
|
||||
if (board_info == NULL) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
memset(info, 0, sizeof(amdsmi_asic_info_t));
|
||||
}
|
||||
|
||||
auto r = rsmi_wrapper(rsmi_dev_serial_number_get, device_handle,
|
||||
info->asic_serial, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
amdsmi_status_t status;
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
|
||||
|
||||
r = rsmi_wrapper(rsmi_dev_brand_get, device_handle,
|
||||
info->market_name, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
if (gpu_device->check_if_drm_is_supported()) {
|
||||
status = smi_amdgpu_get_board_info(gpu_device, board_info);
|
||||
}
|
||||
else {
|
||||
status = rsmi_wrapper(rsmi_dev_name_get, device_handle, board_info->product_name, AMDSMI_PRODUCT_NAME_LENGTH);
|
||||
status = rsmi_wrapper(rsmi_dev_serial_number_get, device_handle, board_info->product_serial, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
}
|
||||
|
||||
uint16_t vendor_id = 0;
|
||||
r = rsmi_wrapper(rsmi_dev_vendor_id_get, device_handle,
|
||||
&vendor_id);
|
||||
if ( r == AMDSMI_STATUS_SUCCESS)
|
||||
info->vendor_id = static_cast<uint32_t>(vendor_id);
|
||||
|
||||
r = rsmi_wrapper(rsmi_dev_unique_id_get, device_handle,
|
||||
&(info->device_id));
|
||||
|
||||
vendor_id = 0;
|
||||
r = rsmi_wrapper(rsmi_dev_subsystem_vendor_id_get, device_handle,
|
||||
&vendor_id);
|
||||
if ( r == AMDSMI_STATUS_SUCCESS)
|
||||
info->subvendor_id = static_cast<uint32_t>(vendor_id);
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
return status;
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_dev_temp_metric_get(amdsmi_device_handle device_handle,
|
||||
@@ -333,7 +328,7 @@ amdsmi_status_t amdsmi_get_caps_info(amdsmi_device_handle device_handle,
|
||||
sizeof(struct drm_amdgpu_info_device), &device);
|
||||
if (r != AMDSMI_STATUS_SUCCESS) return r;
|
||||
|
||||
info->gfx.gfxip_cu_count = device.cu_active_number;
|
||||
info->gfx.gfxip_cu_count = (uint16_t)device.cu_active_number;
|
||||
|
||||
r = gpu_device->amdgpu_query_hw_ip(AMDGPU_INFO_HW_IP_INFO,
|
||||
AMDGPU_HW_IP_GFX, sizeof(ip), &ip);
|
||||
@@ -392,16 +387,6 @@ amdsmi_status_t amdsmi_get_caps_info(amdsmi_device_handle device_handle,
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// TODO(bliu): add more vbios info
|
||||
amdsmi_status amdsmi_get_vbios_info(amdsmi_device_handle device_handle,
|
||||
amdsmi_vbios_info_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
return rsmi_wrapper(rsmi_dev_vbios_version_get, device_handle,
|
||||
info->vbios_version_string, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
}
|
||||
|
||||
amdsmi_status_t amdsmi_dev_fan_rpms_get(amdsmi_device_handle device_handle,
|
||||
uint32_t sensor_ind, int64_t *speed) {
|
||||
return rsmi_wrapper(rsmi_dev_fan_rpms_get, device_handle, sensor_ind,
|
||||
@@ -437,7 +422,7 @@ amdsmi_status_t amdsmi_dev_id_get(amdsmi_device_handle device_handle,
|
||||
}
|
||||
|
||||
// TODO(bliu) : add fw info from libdrm
|
||||
amdsmi_status amdsmi_get_fw_info(amdsmi_device_handle dev,
|
||||
amdsmi_status_t amdsmi_get_fw_info(amdsmi_device_handle dev,
|
||||
amdsmi_fw_info_t *info) {
|
||||
const std::map<amdsmi_fw_block, rsmi_fw_block_t> fw_in_rsmi = {
|
||||
{ FW_ID_ASD, RSMI_FW_BLOCK_ASD},
|
||||
@@ -477,6 +462,66 @@ amdsmi_status amdsmi_get_fw_info(amdsmi_device_handle dev,
|
||||
info->num_fw_info++;
|
||||
}
|
||||
}
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// TODO(bliu) : add other asic info
|
||||
amdsmi_status_t
|
||||
amdsmi_get_asic_info(amdsmi_device_handle device_handle, amdsmi_asic_info_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
struct drm_amdgpu_info_device dev_info = {};
|
||||
struct drm_amdgpu_info_vbios vbios = {};
|
||||
char* name;
|
||||
char *tmp;
|
||||
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
|
||||
amdsmi_status_t status;
|
||||
if (gpu_device->check_if_drm_is_supported()){
|
||||
status = gpu_device->amdgpu_query_info(AMDGPU_INFO_DEV_INFO, sizeof(struct drm_amdgpu_info_device), &dev_info);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) return status;
|
||||
status = gpu_device->amdgpu_query_vbios(&vbios);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) return status;
|
||||
|
||||
SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
|
||||
|
||||
std::string path = "/sys/class/drm/" + gpu_device->get_gpu_path() + "/device/unique_id";
|
||||
FILE *fp = fopen(path.c_str(), "r");
|
||||
if (fp) {
|
||||
fscanf(fp, "%s", &info->asic_serial);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
name = strtok_r((char *) vbios.name, " ", &tmp);
|
||||
if (name)
|
||||
strncpy(info->market_name, name, AMDSMI_MAX_STRING_LENGTH);
|
||||
|
||||
info->device_id = dev_info.device_id;
|
||||
info->family = dev_info.family;
|
||||
info->rev_id = dev_info.pci_rev;
|
||||
}
|
||||
else {
|
||||
uint16_t vendor_id = 0;
|
||||
|
||||
amdsmi_status_t status = rsmi_wrapper(rsmi_dev_serial_number_get, device_handle,
|
||||
info->asic_serial, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
|
||||
status = rsmi_wrapper(rsmi_dev_brand_get, device_handle,
|
||||
info->market_name, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
|
||||
status = rsmi_wrapper(rsmi_dev_vendor_id_get, device_handle,
|
||||
&vendor_id);
|
||||
if (status == AMDSMI_STATUS_SUCCESS) info->vendor_id = vendor_id;
|
||||
vendor_id = 0;
|
||||
|
||||
status = rsmi_wrapper(rsmi_dev_subsystem_vendor_id_get, device_handle,
|
||||
&vendor_id);
|
||||
if (status == AMDSMI_STATUS_SUCCESS) info->subvendor_id = vendor_id;
|
||||
return status;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -664,7 +709,7 @@ amdsmi_is_P2P_accessible(amdsmi_device_handle device_handle_src,
|
||||
}
|
||||
|
||||
// TODO(bliu) : other xgmi related information
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_xgmi_info(amdsmi_device_handle device_handle, amdsmi_xgmi_info_t *info) {
|
||||
if (info == nullptr)
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
@@ -882,29 +927,48 @@ amdsmi_status_t amdsmi_dev_gpu_metrics_info_get(
|
||||
reinterpret_cast<rsmi_gpu_metrics_t*>(pgpu_metrics));
|
||||
}
|
||||
|
||||
// TODO(bliu): read from libdrm
|
||||
amdsmi_status
|
||||
amdsmi_status_t
|
||||
amdsmi_get_power_cap_info(amdsmi_device_handle device_handle,
|
||||
uint32_t sensor_ind,
|
||||
amdsmi_power_cap_info_t *info) {
|
||||
if (info == nullptr)
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
|
||||
amd::smi::AMDSmiGPUDevice* gpudevice = nullptr;
|
||||
amdsmi_status_t r = get_gpu_device_from_handle(device_handle, &gpudevice);
|
||||
if (r != AMDSMI_STATUS_SUCCESS)
|
||||
return r;
|
||||
amd::smi::AMDSmiGPUDevice* gpudevice =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
|
||||
amdsmi_status_t status;
|
||||
if (gpudevice->check_if_drm_is_supported()){
|
||||
int power_cap = 0;
|
||||
int dpm = 0;
|
||||
|
||||
// Ignore errors to get as much as possible info.
|
||||
memset(info, 0, sizeof(amdsmi_power_cap_info_t));
|
||||
auto rsmi_status = rsmi_dev_power_cap_default_get(gpudevice->get_gpu_id(),
|
||||
&(info->default_power_cap));
|
||||
rsmi_status = rsmi_dev_power_cap_range_get(gpudevice->get_gpu_id(),
|
||||
sensor_ind, &(info->max_power_cap), &(info->min_power_cap));
|
||||
rsmi_status = rsmi_dev_power_cap_get(gpudevice->get_gpu_id(),
|
||||
sensor_ind, &(info->power_cap));
|
||||
status = smi_amdgpu_get_power_cap(gpudevice, &power_cap);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
info->power_cap = power_cap;
|
||||
|
||||
status = smi_amdgpu_get_ranges(gpudevice, CLOCK_TYPE_GFX,
|
||||
NULL, NULL, &dpm);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
info->dpm_cap = dpm;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
else {
|
||||
// Ignore errors to get as much as possible info.
|
||||
memset(info, 0, sizeof(amdsmi_power_cap_info_t));
|
||||
auto rsmi_status = rsmi_dev_power_cap_default_get(gpudevice->get_gpu_id(),
|
||||
&(info->default_power_cap));
|
||||
rsmi_status = rsmi_dev_power_cap_range_get(gpudevice->get_gpu_id(),
|
||||
sensor_ind, &(info->max_power_cap), &(info->min_power_cap));
|
||||
rsmi_status = rsmi_dev_power_cap_get(gpudevice->get_gpu_id(),
|
||||
sensor_ind, &(info->power_cap));
|
||||
|
||||
// TODO(bliu) : dpm_cap
|
||||
}
|
||||
|
||||
// TODO(bliu) : dpm_cap
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1192,19 +1256,393 @@ amdsmi_status_t amdsmi_version_str_get(amdsmi_sw_component_t component,
|
||||
return rsmi_to_amdsmi_status(status);
|
||||
}
|
||||
|
||||
amdsmi_status amdsmi_get_gpu_activity(amdsmi_device_handle dev,
|
||||
amdsmi_engine_usage_t *info) {
|
||||
if (info == nullptr)
|
||||
amdsmi_status_t
|
||||
amdsmi_get_vbios_info(amdsmi_device_handle dev, amdsmi_vbios_info_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
struct drm_amdgpu_info_vbios vbios = {};
|
||||
|
||||
// Get gpu activity from the gpu_metrics table
|
||||
amdsmi_gpu_metrics_t gpu_metrics_info;
|
||||
auto r = amdsmi_dev_gpu_metrics_info_get(dev, &gpu_metrics_info);
|
||||
if ( r == AMDSMI_STATUS_SUCCESS ) {
|
||||
info->average_gfx_activity = gpu_metrics_info.average_gfx_activity;
|
||||
info->average_umc_activity = gpu_metrics_info.average_umc_activity;
|
||||
info->average_mm_activity[0] = gpu_metrics_info.average_mm_activity;
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
if (gpu_device->check_if_drm_is_supported()){
|
||||
status = gpu_device->amdgpu_query_vbios(&vbios);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
strncpy(info->name, (char *) vbios.name, AMDSMI_MAX_STRING_LENGTH);
|
||||
strncpy(info->build_date, (char *) vbios.date, AMDSMI_MAX_DATE_LENGTH);
|
||||
strncpy(info->part_number, (char *) vbios.vbios_pn, AMDSMI_MAX_STRING_LENGTH);
|
||||
strncpy(info->vbios_version_string, (char *) vbios.vbios_ver_str, AMDSMI_NORMAL_STRING_LENGTH);
|
||||
info->vbios_version = vbios.version;
|
||||
}
|
||||
else {
|
||||
// rocm
|
||||
}
|
||||
|
||||
return r;
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_gpu_activity(amdsmi_device_handle dev, amdsmi_engine_usage_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amdsmi_gpu_metrics_t metrics = {};
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
status = amdsmi_dev_gpu_metrics_info_get(dev, &metrics);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
info->average_gfx_activity = metrics.average_gfx_activity;
|
||||
info->average_mm_activity[0] = metrics.average_mm_activity;
|
||||
info->average_umc_activity = metrics.average_umc_activity;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_power_limit(amdsmi_device_handle dev, amdsmi_power_limit_t *limit) {
|
||||
if (limit == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
int power_limit;
|
||||
status = smi_amdgpu_get_power_cap(gpu_device, &power_limit);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
limit->limit = (uint16_t)(power_limit);
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_clock_measure(amdsmi_device_handle dev, amdsmi_clk_type_t clk_type, amdsmi_clock_measure_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
if (clk_type >= CLOCK_TYPE__MAX) {
|
||||
printf("Domain value greater or equals CLOCK_TYPE__MAX value. Return code: %d", AMDSMI_STATUS_INVAL);
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amdsmi_gpu_metrics_t metrics = {};
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
|
||||
status = amdsmi_dev_gpu_metrics_info_get(dev, &metrics);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
int max_freq;
|
||||
status = smi_amdgpu_get_ranges(gpu_device, clk_type,
|
||||
&max_freq, NULL, NULL);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
info->max_clk = max_freq;
|
||||
|
||||
switch (clk_type) {
|
||||
case CLOCK_TYPE_GFX:
|
||||
info->avg_clk = metrics.average_gfxclk_frequency;
|
||||
info->cur_clk = metrics.current_gfxclk;
|
||||
break;
|
||||
case CLOCK_TYPE_MEM:
|
||||
info->avg_clk = metrics.average_uclk_frequency;
|
||||
info->cur_clk = metrics.current_uclk;
|
||||
break;
|
||||
case CLOCK_TYPE_VCLK0:
|
||||
info->avg_clk = metrics.average_vclk0_frequency;
|
||||
info->cur_clk = metrics.current_vclk0;
|
||||
break;
|
||||
case CLOCK_TYPE_VCLK1:
|
||||
info->avg_clk = metrics.average_vclk1_frequency;
|
||||
info->cur_clk = metrics.current_vclk1;
|
||||
break;
|
||||
default:
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_temperature_limit(amdsmi_device_handle dev, amdsmi_temperature_type_t temp_type, amdsmi_temperature_limit_t *limit) {
|
||||
if (limit == nullptr || temp_type >= TEMPERATURE_TYPE__MAX) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
std::string name;
|
||||
std::string path;
|
||||
switch (temp_type) {
|
||||
case TEMPERATURE_TYPE_EDGE:
|
||||
name = "edge";
|
||||
break;
|
||||
case TEMPERATURE_TYPE_JUNCTION:
|
||||
name = "junction";
|
||||
break;
|
||||
case TEMPERATURE_TYPE_VRAM:
|
||||
name = "mem";
|
||||
break;
|
||||
default:
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
status = smi_amdgpu_find_hwmon_dir(gpu_device, &path);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(gpu_device->get_mutex())
|
||||
|
||||
for (int count = 1; ; count++) {
|
||||
std::string local_path = path + "/temp" +
|
||||
std::to_string(count);
|
||||
std::string temp = local_path + "_label";
|
||||
char f_name[10];
|
||||
std::ifstream file(temp.c_str(), std::ifstream::in);
|
||||
|
||||
if (!file.is_open()) {
|
||||
printf("Failed to open file: %s \n", temp.c_str());
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
file.getline(f_name, 10);
|
||||
|
||||
if (!strstr(name.c_str(), f_name)) {
|
||||
int readTemp = 0;
|
||||
temp = local_path + "_crit";
|
||||
std::ifstream file2(temp.c_str(), std::ifstream::in);
|
||||
|
||||
if (!file2.is_open()) {
|
||||
printf("Failed to open file: %s \n", temp.c_str());
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
file2.getline(f_name, 10);
|
||||
if (!sscanf(f_name, "%d", &readTemp)) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
limit->limit = (uint16_t)(readTemp / 1000);
|
||||
break;
|
||||
}
|
||||
file.close();
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
amdsmi_status_t
|
||||
amdsmi_get_temperature_measure(amdsmi_device_handle dev, amdsmi_temperature_type_t temp_type, amdsmi_temperature_t *info) {
|
||||
if (info == nullptr || temp_type > TEMPERATURE_TYPE__MAX) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amdsmi_gpu_metrics_t metrics;
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
status = amdsmi_dev_gpu_metrics_info_get(dev, &metrics);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
switch (temp_type) {
|
||||
case TEMPERATURE_TYPE_EDGE:
|
||||
info->cur_temp = metrics.temperature_edge;
|
||||
break;
|
||||
case TEMPERATURE_TYPE_JUNCTION:
|
||||
info->cur_temp = metrics.temperature_hotspot;
|
||||
break;
|
||||
case TEMPERATURE_TYPE_VRAM:
|
||||
info->cur_temp = metrics.temperature_mem;
|
||||
break;
|
||||
case TEMPERATURE_TYPE_PLX:
|
||||
info->cur_temp = metrics.temperature_vrsoc;
|
||||
break;
|
||||
default:
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_ras_features_enabled(amdsmi_device_handle device_handle, amdsmi_gpu_block block, amdsmi_ras_err_state_t *state) {
|
||||
if (state == nullptr || block > AMDSMI_GPU_BLOCK_LAST) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
uint64_t features_mask = 0;
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
|
||||
amdsmi_status_t status;
|
||||
status = smi_amdgpu_get_enabled_blocks(gpu_device, &features_mask);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
*state = (features_mask & block) ? AMDSMI_RAS_ERR_STATE_ENABLED : AMDSMI_RAS_ERR_STATE_DISABLED;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_bad_page_info(amdsmi_device_handle device_handle, uint32_t *num_pages, amdsmi_retired_page_record_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(device_handle);
|
||||
amdsmi_status_t status;
|
||||
if (gpu_device->check_if_drm_is_supported()){
|
||||
status = smi_amdgpu_get_bad_page_info(gpu_device, num_pages, info);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// rocm
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_ecc_error_count(amdsmi_device_handle dev, amdsmi_error_count_t *ec) {
|
||||
if (ec == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
if (gpu_device->check_if_drm_is_supported()){
|
||||
status = smi_amdgpu_get_ecc_error_count(gpu_device, ec);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// rocm
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_process_list(amdsmi_device_handle dev, amdsmi_process_handle *list, uint32_t *max_processes) {
|
||||
if (max_processes == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
std::vector<long int> pids;
|
||||
uint32_t i = 0;
|
||||
uint64_t size = 0;
|
||||
amdsmi_status_t status;
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
if (gpu_device->check_if_drm_is_supported()){
|
||||
amdsmi_bdf_t bdf = gpu_device->get_bdf();
|
||||
status = gpuvsmi_get_pids(bdf, pids, &size);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
if (*max_processes == 0 || (pids.size() == 0)) {
|
||||
*max_processes = (uint32_t)pids.size();
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
if (!list) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
if (*max_processes < pids.size()) {
|
||||
return AMDSMI_STATUS_OUT_OF_RESOURCES;
|
||||
}
|
||||
for (auto &pid : pids) {
|
||||
if (i >= *max_processes) {
|
||||
break;
|
||||
}
|
||||
list[i++] = (uint32_t)pid;
|
||||
}
|
||||
*max_processes = (uint32_t)pids.size();
|
||||
}
|
||||
else {
|
||||
// rocm
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_process_info(amdsmi_device_handle dev, amdsmi_process_handle process, amdsmi_proc_info_t *info) {
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
if (gpu_device->check_if_drm_is_supported()) {
|
||||
status = gpuvsmi_get_pid_info(gpu_device->get_bdf(), process, *info);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) return status;
|
||||
}
|
||||
else {
|
||||
// rocm
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t
|
||||
amdsmi_get_target_frequency_range(amdsmi_device_handle dev, amdsmi_clk_type_t clk_type, amdsmi_frequency_range_t *range) {
|
||||
if (range == nullptr || clk_type > CLOCK_TYPE__MAX) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
amdsmi_gpu_metrics_t metrics = {};
|
||||
amd::smi::AMDSmiGPUDevice* gpu_device =
|
||||
static_cast<amd::smi::AMDSmiGPUDevice*>(dev);
|
||||
amdsmi_status_t status;
|
||||
|
||||
int min = 0, max = 0;
|
||||
status = amdsmi_dev_gpu_metrics_info_get(dev, &metrics);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
status = smi_amdgpu_get_ranges(gpu_device, clk_type, &max, &min, nullptr);
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
range->supported_freq_range.lower_bound = (long)min;
|
||||
range->current_freq_range.lower_bound = (long)min;
|
||||
range->supported_freq_range.upper_bound = (long)max;
|
||||
max = 0;
|
||||
switch (clk_type) {
|
||||
case CLOCK_TYPE_GFX:
|
||||
max = metrics.current_gfxclk;
|
||||
break;
|
||||
case CLOCK_TYPE_MEM:
|
||||
max = metrics.current_uclk;
|
||||
break;
|
||||
case CLOCK_TYPE_VCLK0:
|
||||
max = metrics.current_vclk0;
|
||||
break;
|
||||
case CLOCK_TYPE_VCLK1:
|
||||
max = metrics.current_vclk1;
|
||||
break;
|
||||
default:
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
range->current_freq_range.upper_bound = (long)max;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -55,27 +55,40 @@ namespace smi {
|
||||
|
||||
amdsmi_status_t AMDSmiDrm::init() {
|
||||
// A few RAII handler
|
||||
|
||||
using dir_ptr = std::unique_ptr<DIR, decltype(&closedir)>;
|
||||
using drm_version_ptr = std::unique_ptr<drmVersion,
|
||||
decltype(&drmFreeVersion)>;
|
||||
// using drm_device_ptr = std::unique_ptr(drmDevicePtr,
|
||||
// decltype(&drmFreeDevice));
|
||||
|
||||
struct dirent *dir = nullptr;
|
||||
int fd = -1;
|
||||
|
||||
|
||||
amdsmi_status_t status = lib_loader_.load("libdrm.so");
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// load symbol from libdrm
|
||||
drm_cmd_write_ = nullptr;
|
||||
status = lib_loader_.load_symbol(&drm_cmd_write_, "drmCommandWrite");
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
using drmGetVersionType = drmVersionPtr (*)(int); // drmGetVersion
|
||||
using drmFreeVersionType = void (*)(drmVersionPtr); // drmFreeVersion
|
||||
|
||||
using drmGetVersionType = drmVersionPtr (*)(int); // drmGetVersion
|
||||
using drmFreeVersionType = void (*)(drmVersionPtr); // drmFreeVersion
|
||||
using drmGetDeviceType = int(*)(int, drmDevicePtr*); // drmGetDevice
|
||||
using drmFreeDeviceType = void(*)(drmDevicePtr*); // drmFreeDevice
|
||||
|
||||
drmGetVersionType drm_get_version = nullptr;
|
||||
drmFreeVersionType drm_free_version = nullptr;
|
||||
|
||||
drmGetDeviceType drm_get_device = nullptr;
|
||||
drmFreeDeviceType drm_free_device = nullptr;
|
||||
|
||||
status = lib_loader_.load_symbol(&drm_get_version, "drmGetVersion");
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
@@ -85,10 +98,20 @@ amdsmi_status_t AMDSmiDrm::init() {
|
||||
return status;
|
||||
}
|
||||
|
||||
status = lib_loader_.load_symbol(&drm_get_device, "drmGetDevice");
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
status = lib_loader_.load_symbol(&drm_free_device, "drmFreeDevice");
|
||||
if (status != AMDSMI_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
auto d = dir_ptr(opendir("/dev/dri/"), &closedir);
|
||||
if (d == nullptr) return AMDSMI_STATUS_NOT_INIT;
|
||||
|
||||
drmDevicePtr device;
|
||||
|
||||
while ((dir = readdir(d.get())) != NULL) {
|
||||
char* name_cstr = new char[sizeof(dir->d_name) + 10];
|
||||
auto name = std::unique_ptr<char[]>(name_cstr);
|
||||
@@ -105,7 +128,22 @@ amdsmi_status_t AMDSmiDrm::init() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (drm_get_device(fd, &device) != 0) {
|
||||
drm_free_device(&device);
|
||||
return AMDSMI_STATUS_DRM_ERROR;
|
||||
}
|
||||
|
||||
drm_fds_.push_back(fd);
|
||||
drm_paths_.push_back(dir->d_name);
|
||||
|
||||
amdsmi_bdf_t bdf;
|
||||
bdf.function_number = device->businfo.pci->func;
|
||||
bdf.device_number = device->businfo.pci->dev;
|
||||
bdf.bus_number = device->businfo.pci->bus;
|
||||
bdf.domain_number = device->businfo.pci->domain;
|
||||
|
||||
drm_bdfs_.push_back(bdf);
|
||||
drm_free_device(&device);
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
@@ -115,7 +153,10 @@ amdsmi_status_t AMDSmiDrm::cleanup() {
|
||||
for (unsigned int i=0; i < drm_fds_.size(); i++) {
|
||||
close(drm_fds_[i]);
|
||||
}
|
||||
|
||||
drm_fds_.clear();
|
||||
drm_paths_.clear();
|
||||
drm_bdfs_.clear();
|
||||
lib_loader_.unload();
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -190,9 +231,34 @@ amdsmi_status_t AMDSmiDrm::amdgpu_query_vbios(int fd, void *info) {
|
||||
}
|
||||
|
||||
|
||||
int AMDSmiDrm::get_drm_fd_by_index(uint32_t gpu_index) const {
|
||||
if (gpu_index + 1 > drm_fds_.size()) return -1;
|
||||
return drm_fds_[gpu_index];
|
||||
amdsmi_status_t AMDSmiDrm::get_drm_fd_by_index(uint32_t gpu_index, uint32_t *fd_info) const {
|
||||
if (gpu_index + 1 > drm_fds_.size()) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
*fd_info = drm_fds_[gpu_index];
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiDrm::get_bdf_by_index(uint32_t gpu_index, amdsmi_bdf_t *bdf_info) const {
|
||||
if (gpu_index + 1 > drm_bdfs_.size()) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
*bdf_info = drm_bdfs_[gpu_index];
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiDrm::get_drm_path_by_index(uint32_t gpu_index, std::string *drm_path) const {
|
||||
if (gpu_index + 1 > drm_paths_.size()) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
*drm_path = drm_paths_[gpu_index];
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
std::vector<std::string>& AMDSmiDrm::get_drm_paths() {
|
||||
return drm_paths_;
|
||||
}
|
||||
|
||||
bool AMDSmiDrm::check_if_drm_is_supported() {
|
||||
return drm_cmd_write_ != NULL ? true : false;
|
||||
}
|
||||
|
||||
std::vector<amdsmi_bdf_t> AMDSmiDrm::get_bdfs() {
|
||||
return drm_bdfs_;
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
|
||||
@@ -52,33 +52,78 @@ uint32_t AMDSmiGPUDevice::get_gpu_id() const {
|
||||
return gpu_id_;
|
||||
}
|
||||
|
||||
uint32_t AMDSmiGPUDevice::get_gpu_fd() const {
|
||||
return fd_;
|
||||
}
|
||||
|
||||
std::string& AMDSmiGPUDevice::get_gpu_path() {
|
||||
return path_;
|
||||
}
|
||||
|
||||
amdsmi_bdf_t AMDSmiGPUDevice::get_bdf() {
|
||||
return bdf_;
|
||||
}
|
||||
amdsmi_status_t AMDSmiGPUDevice::get_drm_data() {
|
||||
amdsmi_status_t ret;
|
||||
uint32_t fd = 0;
|
||||
std::string path;
|
||||
amdsmi_bdf_t bdf;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
ret = drm_.get_drm_path_by_index(gpu_id_, &path);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
ret = drm_.get_bdf_by_index(gpu_id_, &bdf);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
mutex_ = shared_mutex_init(path.c_str(), 0777);
|
||||
if (mutex_.ptr == nullptr) {
|
||||
printf("Failed to create shared mem. mutex.");
|
||||
return AMDSMI_STATUS_INIT_ERROR;
|
||||
}
|
||||
bdf_ = bdf, path_ = path, fd_ = fd;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
pthread_mutex_t* AMDSmiGPUDevice::get_mutex() {
|
||||
return mutex_.ptr;
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_info(unsigned info_id,
|
||||
unsigned size, void *value) const {
|
||||
int fd = drm_.get_drm_fd_by_index(gpu_id_);
|
||||
if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
amdsmi_status_t ret;
|
||||
uint32_t fd = 0;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
return drm_.amdgpu_query_info(fd, info_id, size, value);
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_hw_ip(unsigned info_id,
|
||||
unsigned hw_ip_type, unsigned size, void *value) const {
|
||||
int fd = drm_.get_drm_fd_by_index(gpu_id_);
|
||||
if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
amdsmi_status_t ret;
|
||||
uint32_t fd = 0;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
return drm_.amdgpu_query_hw_ip(fd, info_id, hw_ip_type, size, value);
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_fw(unsigned info_id,
|
||||
unsigned fw_type, unsigned size, void *value) const {
|
||||
int fd = drm_.get_drm_fd_by_index(gpu_id_);
|
||||
if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
amdsmi_status_t ret;
|
||||
uint32_t fd = 0;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
return drm_.amdgpu_query_fw(fd, info_id, fw_type, size, value);
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const {
|
||||
int fd = drm_.get_drm_fd_by_index(gpu_id_);
|
||||
if (fd == -1) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
amdsmi_status_t ret;
|
||||
uint32_t fd = 0;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;;
|
||||
|
||||
return drm_.amdgpu_query_vbios(fd, info);
|
||||
}
|
||||
|
||||
@@ -55,6 +55,11 @@ AMDSmiSocket::~AMDSmiSocket() {
|
||||
devices_.clear();
|
||||
}
|
||||
|
||||
amdsmi_status_t AMDSmiSocket::get_device_count(uint32_t* device_count) const {
|
||||
*device_count = static_cast<uint32_t>(devices_.size());
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // namespace smi
|
||||
} // namespace amd
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "amd_smi/impl/amd_smi_system.h"
|
||||
#include "amd_smi/impl/amd_smi_gpu_device.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
#include "rocm_smi/rocm_smi_main.h"
|
||||
|
||||
|
||||
namespace amd {
|
||||
@@ -54,55 +55,91 @@ namespace smi {
|
||||
|
||||
amdsmi_status_t AMDSmiSystem::init(uint64_t flags) {
|
||||
init_flag_ = flags;
|
||||
amdsmi_status_t amd_smi_status;
|
||||
// populate sockets and devices
|
||||
if (flags & AMDSMI_INIT_AMD_GPUS) {
|
||||
drm_.init();
|
||||
amd_smi_status = drm_.init();
|
||||
// init rsmi
|
||||
rsmi_status_t ret = rsmi_init(flags);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return static_cast<amdsmi_status_t>(ret);
|
||||
}
|
||||
|
||||
uint32_t device_count = 0;
|
||||
ret = rsmi_num_monitor_devices(&device_count);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return static_cast<amdsmi_status_t>(ret);
|
||||
}
|
||||
// libdrm is supported
|
||||
if (amd_smi_status == AMDSMI_STATUS_SUCCESS) {
|
||||
amd::smi::RocmSMI::getInstance().DiscoverAmdgpuDevices();
|
||||
uint32_t device_count = amd::smi::RocmSMI::getInstance().devices().size();
|
||||
for (uint32_t i=0; i < device_count; i++) {
|
||||
std::stringstream ss;
|
||||
//values for socked id are harcoded
|
||||
ss << std::setfill('0') << std::uppercase << std::hex
|
||||
<< std::setw(4) << drm_.get_bdfs()[i].domain_number << ":"
|
||||
<< std::setw(2) << drm_.get_bdfs()[i].bus_number << ":"
|
||||
<< std::setw(2) << drm_.get_bdfs()[i].device_number << "."
|
||||
<< std::setw(2) << drm_.get_bdfs()[i].function_number;
|
||||
|
||||
for (uint32_t i=0; i < device_count; i++) {
|
||||
uint64_t bdfid = 0;
|
||||
ret = rsmi_dev_pci_id_get(i, &bdfid);
|
||||
// Multiple devices may share the same socket
|
||||
auto socket_id = ss.str();
|
||||
AMDSmiSocket* socket = nullptr;
|
||||
for (unsigned int j=0; j < sockets_.size(); j++) {
|
||||
if (sockets_[j]->get_socket_id() == socket_id) {
|
||||
socket = sockets_[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (socket == nullptr) {
|
||||
socket = new AMDSmiSocket(ss.str());
|
||||
sockets_.push_back(socket);
|
||||
}
|
||||
|
||||
AMDSmiDevice* device = new AMDSmiGPUDevice(i, drm_);
|
||||
socket->add_device(device);
|
||||
devices_.insert(device);
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
uint32_t device_count = 0;
|
||||
ret = rsmi_num_monitor_devices(&device_count);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return static_cast<amdsmi_status_t>(ret);
|
||||
}
|
||||
|
||||
uint64_t domain = (bdfid >> 32) & 0xffffffff;
|
||||
uint64_t bus = (bdfid >> 8) & 0xff;
|
||||
uint64_t device_id = (bdfid >> 3) & 0x1f;
|
||||
uint64_t function = bdfid & 0x7;
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::setfill('0') << std::uppercase << std::hex
|
||||
<< std::setw(4) << domain << ":" << std::setw(2) << bus << ":"
|
||||
<< std::setw(2) << device_id << "." << std::setw(2) << function;
|
||||
|
||||
// Multiple devices may share the same socket
|
||||
auto socket_id = ss.str();
|
||||
AMDSmiSocket* socket = nullptr;
|
||||
for (unsigned int j=0; j < sockets_.size(); j++) {
|
||||
if (sockets_[j]->get_socket_id() == socket_id) {
|
||||
socket = sockets_[j];
|
||||
break;
|
||||
for (uint32_t i=0; i < device_count; i++) {
|
||||
uint64_t bdfid = 0;
|
||||
ret = rsmi_dev_pci_id_get(i, &bdfid);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
return static_cast<amdsmi_status_t>(ret);
|
||||
}
|
||||
}
|
||||
if (socket == nullptr) {
|
||||
socket = new AMDSmiSocket(ss.str());
|
||||
sockets_.push_back(socket);
|
||||
}
|
||||
|
||||
AMDSmiDevice* device = new AMDSmiGPUDevice(i, drm_);
|
||||
socket->add_device(device);
|
||||
devices_.insert(device);
|
||||
uint64_t domain = (bdfid >> 32) & 0xffffffff;
|
||||
uint64_t bus = (bdfid >> 8) & 0xff;
|
||||
uint64_t device_id = (bdfid >> 3) & 0x1f;
|
||||
uint64_t function = bdfid & 0x7;
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::setfill('0') << std::uppercase << std::hex
|
||||
<< std::setw(4) << domain << ":" << std::setw(2) << bus << ":"
|
||||
<< std::setw(2) << device_id << "." << std::setw(2) << function;
|
||||
|
||||
// Multiple devices may share the same socket
|
||||
auto socket_id = ss.str();
|
||||
AMDSmiSocket* socket = nullptr;
|
||||
for (unsigned int j=0; j < sockets_.size(); j++) {
|
||||
if (sockets_[j]->get_socket_id() == socket_id) {
|
||||
socket = sockets_[j];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (socket == nullptr) {
|
||||
socket = new AMDSmiSocket(ss.str());
|
||||
sockets_.push_back(socket);
|
||||
}
|
||||
|
||||
AMDSmiDevice* device = new AMDSmiGPUDevice(i, drm_);
|
||||
socket->add_device(device);
|
||||
devices_.insert(device);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
@@ -0,0 +1,382 @@
|
||||
/* * Copyright (C) 2022 Advanced Micro Devices. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <xf86drm.h>
|
||||
#include <xf86drmMode.h>
|
||||
#include <dirent.h>
|
||||
#include <sys/types.h>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <sys/ioctl.h>
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
#include "shared_mutex.h" // NOLINT
|
||||
|
||||
static const uint32_t kAmdGpuId = 0x1002;
|
||||
|
||||
static bool isAMDGPU(std::string dev_path) {
|
||||
std::string vend_path = dev_path + "/device/vendor";
|
||||
std::string vbios_v_path = dev_path + "/device/vbios_version";
|
||||
if (!amd::smi::FileExists(vend_path.c_str())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!amd::smi::FileExists(vbios_v_path.c_str())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ifstream fs;
|
||||
fs.open(vend_path);
|
||||
|
||||
if (!fs.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t vendor_id;
|
||||
|
||||
fs >> std::hex >> vendor_id;
|
||||
|
||||
fs.close();
|
||||
|
||||
if (vendor_id == kAmdGpuId) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_find_hwmon_dir(amd::smi::AMDSmiGPUDevice *device, std::string* full_path)
|
||||
{
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
if (full_path == nullptr) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
|
||||
DIR *dh;
|
||||
struct dirent * contents;
|
||||
std::string device_path = "/sys/class/drm/" + device->get_gpu_path();
|
||||
std::string directory_path = device_path + "/device/hwmon/";
|
||||
|
||||
if (!isAMDGPU(device_path)) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
dh = opendir(directory_path.c_str());
|
||||
if (!dh) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/*
|
||||
First directory is '.', second directory is '..' and third directory is
|
||||
valid directory for reading sysfs node
|
||||
*/
|
||||
while ((contents = readdir(dh)) != NULL) {
|
||||
std::string name = contents->d_name;
|
||||
if (name.find("hwmon", 0) != std::string::npos)
|
||||
*full_path = directory_path + name;
|
||||
}
|
||||
|
||||
closedir(dh);
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_board_info(amd::smi::AMDSmiGPUDevice* device, amdsmi_board_info_t *info) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string product_name_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/product_name");
|
||||
std::string product_number_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/product_number");
|
||||
std::string serial_number_path = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/serial_number");
|
||||
|
||||
FILE *fp;
|
||||
|
||||
fp = fopen(product_name_path.c_str(), "rb");
|
||||
if (!fp) {
|
||||
fgets(info->product_name, sizeof(info->product_name), fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
||||
fp = fopen(product_number_path.c_str(), "rb");
|
||||
if (!fp) {
|
||||
fgets(info->model_number, sizeof(info->model_number), fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
||||
fp = fopen(serial_number_path.c_str(), "rb");
|
||||
if (!fp) {
|
||||
fscanf(fp, "%lx", &info->serial_number);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_power_cap(amd::smi::AMDSmiGPUDevice* device, int *cap)
|
||||
{
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
constexpr int DATA_SIZE = 10;
|
||||
char val[DATA_SIZE];
|
||||
std::string fullpath;
|
||||
amdsmi_status_t ret = AMDSMI_STATUS_SUCCESS;
|
||||
|
||||
ret = smi_amdgpu_find_hwmon_dir(device, &fullpath);
|
||||
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
fullpath += "/power1_cap_max";
|
||||
std::ifstream file(fullpath.c_str(), std::ifstream::in);
|
||||
if (!file.is_open()) {
|
||||
printf("Failed to open file: %s \n", fullpath.c_str());
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
file.getline(val, DATA_SIZE);
|
||||
|
||||
if (sscanf(val, "%d", cap) < 0) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
// Dividing by 1000000 to get measurement in Watts
|
||||
*cap /= 1000000;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_ranges(amd::smi::AMDSmiGPUDevice* device, amdsmi_clk_type_t domain,
|
||||
int *max_freq, int *min_freq, int *num_dpm)
|
||||
{
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device";
|
||||
char str[10];
|
||||
unsigned int max, min, dpm;
|
||||
|
||||
switch (domain) {
|
||||
case CLOCK_TYPE_GFX:
|
||||
fullpath += "/pp_dpm_sclk";
|
||||
break;
|
||||
case CLOCK_TYPE_MEM:
|
||||
fullpath += "/pp_dpm_mclk";
|
||||
break;
|
||||
case CLOCK_TYPE_VCLK0:
|
||||
fullpath += "/pp_dpm_vclk";
|
||||
break;
|
||||
case CLOCK_TYPE_VCLK1:
|
||||
fullpath += "/pp_dpm_vclk1";
|
||||
break;
|
||||
default:
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
std::ifstream ranges(fullpath.c_str());
|
||||
|
||||
if (ranges.fail()) {
|
||||
printf("Failed to open file: %s \n", fullpath.c_str());
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
max = 0;
|
||||
min = -1;
|
||||
dpm = 0;
|
||||
for (std::string line; getline(ranges, line);) {
|
||||
unsigned int d, freq;
|
||||
|
||||
if (sscanf(line.c_str(), "%u: %d%s", &d, &freq, str) <= 2){
|
||||
ranges.close();
|
||||
return AMDSMI_STATUS_IO;
|
||||
}
|
||||
|
||||
max = freq > max ? freq : max;
|
||||
min = freq < min ? freq: min;
|
||||
dpm = d > dpm ? d : dpm;
|
||||
}
|
||||
|
||||
if (num_dpm)
|
||||
*num_dpm = dpm;
|
||||
if (max_freq)
|
||||
*max_freq = max;
|
||||
if (min_freq)
|
||||
*min_freq = min;
|
||||
|
||||
ranges.close();
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_enabled_blocks(amd::smi::AMDSmiGPUDevice* device, uint64_t *enabled_blocks) {
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + "/device/ras/features";
|
||||
std::ifstream f(fullpath.c_str());
|
||||
std::string tmp_str;
|
||||
|
||||
if (f.fail()) {
|
||||
printf("Failed to open file: %s \n", fullpath.c_str());
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
getline(f, line);
|
||||
|
||||
std::istringstream f1(line);
|
||||
|
||||
f1 >> tmp_str; // ignore
|
||||
f1 >> tmp_str; // ignore
|
||||
f1 >> tmp_str;
|
||||
|
||||
*enabled_blocks = strtoul(tmp_str.c_str(), nullptr, 16);
|
||||
f.close();
|
||||
|
||||
if (*enabled_blocks == 0 || *enabled_blocks == ULONG_MAX) {
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_bad_page_info(amd::smi::AMDSmiGPUDevice* device, uint32_t *num_pages, amdsmi_retired_page_record_t *info) {
|
||||
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
std::string line;
|
||||
std::vector<std::string> badPagesVec;
|
||||
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/ras/gpu_vram_bad_pages");
|
||||
std::ifstream fs(fullpath.c_str());
|
||||
|
||||
if (fs.fail()) {
|
||||
printf("Failed to open file: %s \n", fullpath.c_str());
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
while (std::getline(fs, line)) {
|
||||
badPagesVec.push_back(line);
|
||||
}
|
||||
|
||||
if (badPagesVec.size() == 0) {
|
||||
num_pages = 0;
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
// Remove any *trailing* empty (whitespace) lines
|
||||
while (badPagesVec.size() != 0 &&
|
||||
badPagesVec.back().find_first_not_of(" \t\n\v\f\r") == std::string::npos) {
|
||||
badPagesVec.pop_back();
|
||||
}
|
||||
|
||||
*num_pages = static_cast<uint32_t>(badPagesVec.size());
|
||||
|
||||
if (info == nullptr) {
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
char status_code;
|
||||
amdsmi_memory_page_status_t tmp_stat;
|
||||
std::string junk;
|
||||
|
||||
for (uint32_t i = 0; i < *num_pages; ++i) {
|
||||
std::istringstream fs1(badPagesVec[i]);
|
||||
|
||||
fs1 >> std::hex >> info[i].page_address;
|
||||
fs1 >> junk;
|
||||
fs1 >> std::hex >> info[i].page_size;
|
||||
fs1 >> junk;
|
||||
fs1 >> status_code;
|
||||
|
||||
switch (status_code) {
|
||||
case 'P':
|
||||
tmp_stat = AMDSMI_MEM_PAGE_STATUS_PENDING;
|
||||
break;
|
||||
|
||||
case 'F':
|
||||
tmp_stat = AMDSMI_MEM_PAGE_STATUS_UNRESERVABLE;
|
||||
break;
|
||||
|
||||
case 'R':
|
||||
tmp_stat = AMDSMI_MEM_PAGE_STATUS_RESERVED;
|
||||
break;
|
||||
default:
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
}
|
||||
info[i].status = tmp_stat;
|
||||
}
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t smi_amdgpu_get_ecc_error_count(amd::smi::AMDSmiGPUDevice* device, amdsmi_error_count_t *err_cnt) {
|
||||
|
||||
if (!device->check_if_drm_is_supported()) {
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
SMIGPUDEVICE_MUTEX(device->get_mutex())
|
||||
char str[10];
|
||||
|
||||
std::string fullpath = "/sys/class/drm/" + device->get_gpu_path() + std::string("/device/ras/umc_err_count");
|
||||
std::ifstream f(fullpath.c_str());
|
||||
|
||||
if (f.fail()) {
|
||||
printf("Failed to open file: %s \n", fullpath.c_str());
|
||||
return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
std::string line;
|
||||
getline(f, line);
|
||||
sscanf(line.c_str(), "%s%ld", str, &(err_cnt->uncorrectable_count));
|
||||
|
||||
getline(f, line);
|
||||
sscanf(line.c_str(), "%s%ld", str, &(err_cnt->correctable_count));
|
||||
|
||||
f.close();
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,264 @@
|
||||
/* * Copyright (C) 2022 Advanced Micro Devices. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
* this software and associated documentation files (the "Software"), to deal in
|
||||
* the Software without restriction, including without limitation the rights to
|
||||
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
* the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in all
|
||||
* copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <dirent.h>
|
||||
#include <unistd.h>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <string.h>
|
||||
|
||||
#include "amd_smi/amd_smi.h"
|
||||
#include "amd_smi/impl/amd_smi_utils.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
amdsmi_status_t gpuvsmi_pid_is_gpu(const std::string &path, const char *bdf)
|
||||
{
|
||||
DIR *d;
|
||||
struct dirent *dir;
|
||||
|
||||
d = opendir(path.c_str());
|
||||
if (!d)
|
||||
return AMDSMI_STATUS_NO_PERM;
|
||||
|
||||
/* iterate through all the fds, try to find
|
||||
* a match for the GPU bdf
|
||||
*/
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
std::string file = path + dir->d_name;
|
||||
std::ifstream fdinfo(file.c_str());
|
||||
for (std::string line; std::getline(fdinfo, line);) {
|
||||
if (line.find(bdf) != std::string::npos) {
|
||||
closedir(d);
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
closedir(d);
|
||||
|
||||
return AMDSMI_STATUS_NOT_FOUND;
|
||||
}
|
||||
|
||||
amdsmi_status_t gpuvsmi_get_pids(const amdsmi_bdf_t &bdf, std::vector<long int> &pids, uint64_t *size)
|
||||
{
|
||||
char bdf_str[13];
|
||||
DIR *d;
|
||||
struct dirent *dir;
|
||||
|
||||
/* 0000:00:00.0 */
|
||||
snprintf(bdf_str, 13, "%04x:%02x:%02x.%d", bdf.domain_number & 0xffff,
|
||||
bdf.bus_number & 0xff,
|
||||
bdf.device_number & 0x1f,
|
||||
bdf.function_number & 0x7);
|
||||
|
||||
d = opendir("/proc");
|
||||
if (!d)
|
||||
return AMDSMI_STATUS_NO_PERM;
|
||||
|
||||
pids.clear();
|
||||
/* Find the pid folders in /proc/ that we have access to */
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_type == DT_DIR) {
|
||||
/* Try to cast the name of the folder to a
|
||||
* number, if it fails, it is not */
|
||||
char *p;
|
||||
long int pid;
|
||||
|
||||
pid = strtol(dir->d_name, &p, 10);
|
||||
if (*p != 0)
|
||||
continue;
|
||||
|
||||
/* Check if fdinfo is accesible */
|
||||
std::string path = "/proc/" + std::string(dir->d_name) + "/fdinfo/";
|
||||
|
||||
if (access(path.c_str(), R_OK))
|
||||
continue;
|
||||
|
||||
/* check if GPU is present */
|
||||
if (gpuvsmi_pid_is_gpu(path, bdf_str))
|
||||
continue;
|
||||
pids.push_back(pid);
|
||||
}
|
||||
}
|
||||
closedir(d);
|
||||
|
||||
*size = pids.size();
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
amdsmi_status_t gpuvsmi_get_pid_info(const amdsmi_bdf_t &bdf, long int pid,
|
||||
amdsmi_proc_info_t &info)
|
||||
{
|
||||
char bdf_str[13];
|
||||
DIR *d;
|
||||
struct dirent *dir;
|
||||
|
||||
/* 0000:00:00.0 */
|
||||
snprintf(bdf_str, 13, "%04x:%02x:%02x.%d", bdf.domain_number & 0xffff,
|
||||
bdf.bus_number & 0xff,
|
||||
bdf.device_number & 0x1f,
|
||||
bdf.function_number & 0x7);
|
||||
|
||||
|
||||
std::string path = "/proc/" + std::to_string(pid) + "/fdinfo/";
|
||||
std::string name_path = "/proc/" + std::to_string(pid) + "/comm";
|
||||
std::string cgroup_path = "/proc/" + std::to_string(pid) + "/cgroup";
|
||||
|
||||
if (gpuvsmi_pid_is_gpu(path.c_str(), bdf_str)) {
|
||||
return AMDSMI_STATUS_INVAL;
|
||||
}
|
||||
|
||||
d = opendir(path.c_str());
|
||||
if (!d)
|
||||
return AMDSMI_STATUS_NO_PERM;
|
||||
|
||||
/* Vectors to check if repated fd pasid */
|
||||
std::vector<int> pasids;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
/* Iterate through all fdinfos */
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
|
||||
std::string file = path + dir->d_name;
|
||||
std::ifstream fdinfo(file.c_str());
|
||||
|
||||
for (std::string line; getline(fdinfo, line);) {
|
||||
if (line.find("pasid:") != std::string::npos) {
|
||||
int pasid;
|
||||
|
||||
if (sscanf(line.c_str(), "pasid: %d", &pasid) != 1)
|
||||
continue;
|
||||
|
||||
auto it = std::find(pasids.begin(), pasids.end(), pasid);
|
||||
|
||||
if (it == pasids.end())
|
||||
pasids.push_back(pasid);
|
||||
} else if (line.find("gtt mem:") != std::string::npos) {
|
||||
unsigned long mem;
|
||||
|
||||
if (sscanf(line.c_str(), "gtt mem: %lu", &mem) != 1)
|
||||
continue;
|
||||
|
||||
info.mem += mem * 1024;
|
||||
info.memory_usage.gtt_mem += mem * 1024;
|
||||
} else if (line.find("cpu mem:") != std::string::npos) {
|
||||
unsigned long mem;
|
||||
|
||||
if (sscanf(line.c_str(), "cpu mem: %lu", &mem) != 1)
|
||||
continue;
|
||||
|
||||
info.mem += mem * 1024;
|
||||
info.memory_usage.cpu_mem += mem * 1024;
|
||||
} else if (line.find("vram mem:") != std::string::npos) {
|
||||
unsigned long mem;
|
||||
|
||||
if (sscanf(line.c_str(), "vram mem: %lu", &mem) != 1)
|
||||
continue;
|
||||
|
||||
info.mem += mem * 1024;
|
||||
info.memory_usage.vram_mem += mem * 1024;
|
||||
} else if (line.find("gfx") != std::string::npos) {
|
||||
float usage;
|
||||
int ring;
|
||||
|
||||
if (sscanf(line.c_str(), "gfx%d: %f%%", &ring, &usage) != 2)
|
||||
continue;
|
||||
|
||||
if (ring >= AMDSMI_MAX_MM_IP_COUNT)
|
||||
continue;
|
||||
|
||||
info.engine_usage.gfx[ring] += (uint16_t)(usage * 100);
|
||||
} else if (line.find("compute") != std::string::npos) {
|
||||
float usage;
|
||||
int ring;
|
||||
|
||||
if (sscanf(line.c_str(), "compute%d: %f%%", &ring, &usage) != 2)
|
||||
continue;
|
||||
|
||||
if (ring >= AMDSMI_MAX_MM_IP_COUNT)
|
||||
continue;
|
||||
|
||||
info.engine_usage.compute[ring] += (uint16_t)(usage * 100);
|
||||
} else if (line.find("dma") != std::string::npos) {
|
||||
float usage;
|
||||
int ring;
|
||||
|
||||
if (sscanf(line.c_str(), "dma%d: %f%%", &ring, &usage) != 2)
|
||||
continue;
|
||||
|
||||
if (ring >= AMDSMI_MAX_MM_IP_COUNT)
|
||||
continue;
|
||||
|
||||
info.engine_usage.sdma[ring] += (uint16_t)(usage * 100);
|
||||
} else if (line.find("enc") != std::string::npos) {
|
||||
float usage;
|
||||
int ring;
|
||||
|
||||
if (sscanf(line.c_str(), "enc%d: %f%%", &ring, &usage) != 2)
|
||||
continue;
|
||||
|
||||
if (ring >= AMDSMI_MAX_MM_IP_COUNT)
|
||||
continue;
|
||||
|
||||
info.engine_usage.enc[ring] += (uint16_t)(usage * 100);
|
||||
} else if (line.find("dec") != std::string::npos) {
|
||||
float usage;
|
||||
int ring;
|
||||
|
||||
if (sscanf(line.c_str(), "dec%d: %f%%", &ring, &usage) != 2)
|
||||
continue;
|
||||
|
||||
if (ring >= AMDSMI_MAX_MM_IP_COUNT)
|
||||
continue;
|
||||
|
||||
info.engine_usage.dec[ring] += (uint16_t)(usage * 100);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
closedir(d);
|
||||
|
||||
if (!pasids.size())
|
||||
return AMDSMI_STATUS_NOT_FOUND;
|
||||
|
||||
std::ifstream filename(name_path.c_str());
|
||||
std::string name;
|
||||
|
||||
getline(filename, name);
|
||||
|
||||
if (name.empty())
|
||||
return AMDSMI_STATUS_API_FAILED;
|
||||
|
||||
strncpy(info.name, name.c_str(), std::min(
|
||||
(unsigned long) AMDSMI_NORMAL_STRING_LENGTH,
|
||||
name.length()));
|
||||
|
||||
info.pid = (uint32_t)pid;
|
||||
|
||||
return AMDSMI_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -160,7 +160,7 @@ void TestErrCntRead::Run(void) {
|
||||
<< std::endl;
|
||||
std::cout << "\t\tCorrectable errors: " << ec.correctable_err
|
||||
<< std::endl;
|
||||
std::cout << "\t\tUncorrectable errors: " << ec.uncorrectable_err
|
||||
std::cout << "\t\tUncorrectable errors: " << ec.uncorrectable_count
|
||||
<< std::endl;
|
||||
}
|
||||
// Verify api support checking functionality is working
|
||||
|
||||
Ссылка в новой задаче
Block a user