838b3dccf1
* Fix the amdgpu version string comparison The intention behind it was to avoid showing the string if it's not got information. Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> * Display the kernel version in amd-smi output This is an interesting debugging point, especially in the case of not having a DKMS package installed. Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org> * Moving os_kernel_version to static --driver Signed-off-by: Maisam Arif <Maisam.Arif@amd.com> --------- Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Mario Limonciello (AMD) <superm1@kernel.org> Signed-off-by: Maisam Arif <Maisam.Arif@amd.com> Co-authored-by: Maisam Arif <Maisam.Arif@amd.com>
5989 рядки
237 KiB
Python
5989 рядки
237 KiB
Python
# Copyright (C) Advanced Micro Devices. All rights reserved.
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
# this software and associated documentation files (the "Software"), to deal in
|
|
# the Software without restriction, including without limitation the rights to
|
|
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
|
# the Software, and to permit persons to whom the Software is furnished to do so,
|
|
# subject to the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be included in all
|
|
# copies or substantial portions of the Software.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
|
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
|
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
import ctypes
|
|
import math
|
|
import os
|
|
import re
|
|
import sys
|
|
from collections.abc import Iterable
|
|
from ctypes import POINTER, c_void_p
|
|
from enum import IntEnum, Enum
|
|
from pathlib import Path
|
|
from time import asctime, localtime, time
|
|
from typing import Any, Dict, List, Tuple, Union
|
|
|
|
from . import amdsmi_wrapper
|
|
from .amdsmi_exception import *
|
|
|
|
### Non Library Specific Constants ###
|
|
class MaxUIntegerTypes(IntEnum):
|
|
UINT8_T = 0xFF
|
|
UINT16_T = 0xFFFF
|
|
UINT32_T = 0xFFFFFFFF
|
|
UINT64_T = 0xFFFFFFFFFFFFFFFF
|
|
|
|
NO_OF_32BITS = (sys.getsizeof(ctypes.c_uint32) * 8)
|
|
NO_OF_64BITS = (sys.getsizeof(ctypes.c_uint64) * 8)
|
|
KILO = math.pow(10, 3)
|
|
|
|
socket_handle_t = c_void_p
|
|
processor_handle_t = c_void_p
|
|
###############################
|
|
|
|
MAX_NUM_PROCESSES = 1024
|
|
|
|
# gpu metrics macros defined in amdsmi.h
|
|
AMDSMI_NUM_HBM_INSTANCES = 4
|
|
AMDSMI_MAX_NUM_VCN = 4
|
|
AMDSMI_MAX_NUM_CLKS = 4
|
|
AMDSMI_MAX_NUM_XGMI_LINKS = 8
|
|
AMDSMI_MAX_NUM_GFX_CLKS = 8
|
|
AMDSMI_MAX_AID = 4
|
|
AMDSMI_MAX_ENGINES = 8
|
|
AMDSMI_MAX_NUM_JPEG = 32
|
|
AMDSMI_MAX_NUM_XCC = 8
|
|
AMDSMI_MAX_NUM_XCP = 8
|
|
|
|
# max num afids per cper record
|
|
MAX_NUMBER_OF_AFIDS_PER_RECORD = 12
|
|
|
|
# Max number of DPM policies
|
|
AMDSMI_MAX_NUM_PM_POLICIES = 32
|
|
|
|
# Max supported frequencies
|
|
AMDSMI_MAX_NUM_FREQUENCIES = 33
|
|
|
|
# Max Fan speed
|
|
AMDSMI_MAX_FAN_SPEED = 255
|
|
|
|
# Max Votlage Curve Points
|
|
AMDSMI_NUM_VOLTAGE_CURVE_POINTS = 3
|
|
|
|
# Max size definitions
|
|
AMDSMI_MAX_MM_IP_COUNT = 8
|
|
AMDSMI_MAX_STRING_LENGTH = 256
|
|
AMDSMI_MAX_DEVICES = 32
|
|
AMDSMI_MAX_CONTAINER_TYPE = 2
|
|
AMDSMI_MAX_CACHE_TYPES = 10
|
|
AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK = 64
|
|
AMDSMI_GPU_UUID_SIZE = 38
|
|
_AMDSMI_STRING_LENGTH = 80
|
|
|
|
class AmdSmiStatus(IntEnum):
|
|
SUCCESS = amdsmi_wrapper.AMDSMI_STATUS_SUCCESS
|
|
INVAL = amdsmi_wrapper.AMDSMI_STATUS_INVAL
|
|
NOT_SUPPORTED = amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED
|
|
NOT_YET_IMPLEMENTED = amdsmi_wrapper.AMDSMI_STATUS_NOT_YET_IMPLEMENTED
|
|
FAIL_LOAD_MODULE = amdsmi_wrapper.AMDSMI_STATUS_FAIL_LOAD_MODULE
|
|
FAIL_LOAD_SYMBOL = amdsmi_wrapper.AMDSMI_STATUS_FAIL_LOAD_SYMBOL
|
|
DRM_ERROR = amdsmi_wrapper.AMDSMI_STATUS_DRM_ERROR
|
|
API_FAILED = amdsmi_wrapper.AMDSMI_STATUS_API_FAILED
|
|
TIMEOUT = amdsmi_wrapper.AMDSMI_STATUS_TIMEOUT
|
|
RETRY = amdsmi_wrapper.AMDSMI_STATUS_RETRY
|
|
NO_PERM = amdsmi_wrapper.AMDSMI_STATUS_NO_PERM
|
|
INTERRUPT = amdsmi_wrapper.AMDSMI_STATUS_INTERRUPT
|
|
IO = amdsmi_wrapper.AMDSMI_STATUS_IO
|
|
ADDRESS_FAULT = amdsmi_wrapper.AMDSMI_STATUS_ADDRESS_FAULT
|
|
FILE_ERROR = amdsmi_wrapper.AMDSMI_STATUS_FILE_ERROR
|
|
OUT_OF_RESOURCES = amdsmi_wrapper.AMDSMI_STATUS_OUT_OF_RESOURCES
|
|
INTERNAL_EXCEPTION = amdsmi_wrapper.AMDSMI_STATUS_INTERNAL_EXCEPTION
|
|
INPUT_OUT_OF_BOUNDS = amdsmi_wrapper.AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS
|
|
INIT_ERROR = amdsmi_wrapper.AMDSMI_STATUS_INIT_ERROR
|
|
REFCOUNT_OVERFLOW = amdsmi_wrapper.AMDSMI_STATUS_REFCOUNT_OVERFLOW
|
|
DIRECTORY_NOT_FOUND = amdsmi_wrapper.AMDSMI_STATUS_DIRECTORY_NOT_FOUND
|
|
BUSY = amdsmi_wrapper.AMDSMI_STATUS_BUSY
|
|
NOT_FOUND = amdsmi_wrapper.AMDSMI_STATUS_NOT_FOUND
|
|
NOT_INIT = amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT
|
|
NO_SLOT = amdsmi_wrapper.AMDSMI_STATUS_NO_SLOT
|
|
DRIVER_NOT_LOADED = amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED
|
|
MORE_DATA = amdsmi_wrapper.AMDSMI_STATUS_MORE_DATA
|
|
NO_DATA = amdsmi_wrapper.AMDSMI_STATUS_NO_DATA
|
|
INSUFFICIENT_SIZE = amdsmi_wrapper.AMDSMI_STATUS_INSUFFICIENT_SIZE
|
|
UNEXPECTED_SIZE = amdsmi_wrapper.AMDSMI_STATUS_UNEXPECTED_SIZE
|
|
UNEXPECTED_DATA = amdsmi_wrapper.AMDSMI_STATUS_UNEXPECTED_DATA
|
|
NON_AMD_CPU = amdsmi_wrapper.AMDSMI_STATUS_NON_AMD_CPU
|
|
NO_ENERGY_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_ENERGY_DRV
|
|
NO_MSR_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_MSR_DRV
|
|
NO_HSMP_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_HSMP_DRV
|
|
NO_HSMP_SUP = amdsmi_wrapper.AMDSMI_STATUS_NO_HSMP_SUP
|
|
NO_HSMP_MSG_SUP = amdsmi_wrapper.AMDSMI_STATUS_NO_HSMP_MSG_SUP
|
|
HSMP_TIMEOUT = amdsmi_wrapper.AMDSMI_STATUS_HSMP_TIMEOUT
|
|
NO_DRV = amdsmi_wrapper.AMDSMI_STATUS_NO_DRV
|
|
FILE_NOT_FOUND = amdsmi_wrapper.AMDSMI_STATUS_FILE_NOT_FOUND
|
|
ARG_PTR_NULL = amdsmi_wrapper.AMDSMI_STATUS_ARG_PTR_NULL
|
|
AMDGPU_RESTART_ERR = amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR
|
|
SETTING_UNAVAILABLE = amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE
|
|
CORRUPTED_EEPROM = amdsmi_wrapper.AMDSMI_STATUS_CORRUPTED_EEPROM
|
|
MAP_ERROR = amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR
|
|
UNKNOWN_ERROR = amdsmi_wrapper.AMDSMI_STATUS_UNKNOWN_ERROR
|
|
|
|
|
|
class AmdSmiInitFlags(IntEnum):
|
|
INIT_ALL_PROCESSORS = amdsmi_wrapper.AMDSMI_INIT_ALL_PROCESSORS
|
|
INIT_AMD_CPUS = amdsmi_wrapper.AMDSMI_INIT_AMD_CPUS
|
|
INIT_AMD_GPUS = amdsmi_wrapper.AMDSMI_INIT_AMD_GPUS
|
|
INIT_AMD_APUS = amdsmi_wrapper.AMDSMI_INIT_AMD_APUS
|
|
INIT_NON_AMD_CPUS = amdsmi_wrapper.AMDSMI_INIT_NON_AMD_CPUS
|
|
INIT_NON_AMD_GPUS = amdsmi_wrapper.AMDSMI_INIT_NON_AMD_GPUS
|
|
|
|
|
|
class AmdSmiContainerTypes(IntEnum):
|
|
LXC = amdsmi_wrapper.AMDSMI_CONTAINER_LXC
|
|
DOCKER = amdsmi_wrapper.AMDSMI_CONTAINER_DOCKER
|
|
|
|
|
|
class AmdSmiDeviceType(IntEnum):
|
|
UNKNOWN_DEVICE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_UNKNOWN
|
|
AMD_GPU_DEVICE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_GPU
|
|
AMD_CPU_DEVICE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_CPU
|
|
NON_AMD_GPU_DEVICE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU
|
|
NON_AMD_CPU_DEVICE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU
|
|
|
|
|
|
class AmdSmiMmIp(IntEnum):
|
|
UVD = amdsmi_wrapper.AMDSMI_MM_UVD
|
|
VCE = amdsmi_wrapper.AMDSMI_MM_VCE
|
|
VCN = amdsmi_wrapper.AMDSMI_MM_VCN
|
|
|
|
|
|
class AmdSmiFwBlock(IntEnum):
|
|
AMDSMI_FW_ID_SMU = amdsmi_wrapper.AMDSMI_FW_ID_SMU
|
|
AMDSMI_FW_ID_CP_CE = amdsmi_wrapper.AMDSMI_FW_ID_CP_CE
|
|
AMDSMI_FW_ID_CP_PFP = amdsmi_wrapper.AMDSMI_FW_ID_CP_PFP
|
|
AMDSMI_FW_ID_CP_ME = amdsmi_wrapper.AMDSMI_FW_ID_CP_ME
|
|
AMDSMI_FW_ID_CP_MEC_JT1 = amdsmi_wrapper.AMDSMI_FW_ID_CP_MEC_JT1
|
|
AMDSMI_FW_ID_CP_MEC_JT2 = amdsmi_wrapper.AMDSMI_FW_ID_CP_MEC_JT2
|
|
AMDSMI_FW_ID_CP_MEC1 = amdsmi_wrapper.AMDSMI_FW_ID_CP_MEC1
|
|
AMDSMI_FW_ID_CP_MEC2 = amdsmi_wrapper.AMDSMI_FW_ID_CP_MEC2
|
|
AMDSMI_FW_ID_RLC = amdsmi_wrapper.AMDSMI_FW_ID_RLC
|
|
AMDSMI_FW_ID_SDMA0 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA0
|
|
AMDSMI_FW_ID_SDMA1 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA1
|
|
AMDSMI_FW_ID_SDMA2 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA2
|
|
AMDSMI_FW_ID_SDMA3 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA3
|
|
AMDSMI_FW_ID_SDMA4 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA4
|
|
AMDSMI_FW_ID_SDMA5 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA5
|
|
AMDSMI_FW_ID_SDMA6 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA6
|
|
AMDSMI_FW_ID_SDMA7 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA7
|
|
AMDSMI_FW_ID_VCN = amdsmi_wrapper.AMDSMI_FW_ID_VCN
|
|
AMDSMI_FW_ID_UVD = amdsmi_wrapper.AMDSMI_FW_ID_UVD
|
|
AMDSMI_FW_ID_VCE = amdsmi_wrapper.AMDSMI_FW_ID_VCE
|
|
AMDSMI_FW_ID_ISP = amdsmi_wrapper.AMDSMI_FW_ID_ISP
|
|
AMDSMI_FW_ID_DMCU_ERAM = amdsmi_wrapper.AMDSMI_FW_ID_DMCU_ERAM
|
|
AMDSMI_FW_ID_DMCU_ISR = amdsmi_wrapper.AMDSMI_FW_ID_DMCU_ISR
|
|
AMDSMI_FW_ID_RLC_RESTORE_LIST_GPM_MEM = amdsmi_wrapper.AMDSMI_FW_ID_RLC_RESTORE_LIST_GPM_MEM
|
|
AMDSMI_FW_ID_RLC_RESTORE_LIST_SRM_MEM = amdsmi_wrapper.AMDSMI_FW_ID_RLC_RESTORE_LIST_SRM_MEM
|
|
AMDSMI_FW_ID_RLC_RESTORE_LIST_CNTL = amdsmi_wrapper.AMDSMI_FW_ID_RLC_RESTORE_LIST_CNTL
|
|
AMDSMI_FW_ID_RLC_V = amdsmi_wrapper.AMDSMI_FW_ID_RLC_V
|
|
AMDSMI_FW_ID_MMSCH = amdsmi_wrapper.AMDSMI_FW_ID_MMSCH
|
|
AMDSMI_FW_ID_PSP_SYSDRV = amdsmi_wrapper.AMDSMI_FW_ID_PSP_SYSDRV
|
|
AMDSMI_FW_ID_PSP_SOSDRV = amdsmi_wrapper.AMDSMI_FW_ID_PSP_SOSDRV
|
|
AMDSMI_FW_ID_PSP_TOC = amdsmi_wrapper.AMDSMI_FW_ID_PSP_TOC
|
|
AMDSMI_FW_ID_PSP_KEYDB = amdsmi_wrapper.AMDSMI_FW_ID_PSP_KEYDB
|
|
AMDSMI_FW_ID_DFC = amdsmi_wrapper.AMDSMI_FW_ID_DFC
|
|
AMDSMI_FW_ID_PSP_SPL = amdsmi_wrapper.AMDSMI_FW_ID_PSP_SPL
|
|
AMDSMI_FW_ID_DRV_CAP = amdsmi_wrapper.AMDSMI_FW_ID_DRV_CAP
|
|
AMDSMI_FW_ID_MC = amdsmi_wrapper.AMDSMI_FW_ID_MC
|
|
AMDSMI_FW_ID_PSP_BL = amdsmi_wrapper.AMDSMI_FW_ID_PSP_BL
|
|
AMDSMI_FW_ID_CP_PM4 = amdsmi_wrapper.AMDSMI_FW_ID_CP_PM4
|
|
AMDSMI_FW_ID_RLC_P = amdsmi_wrapper.AMDSMI_FW_ID_RLC_P
|
|
AMDSMI_FW_ID_SEC_POLICY_STAGE2 = amdsmi_wrapper.AMDSMI_FW_ID_SEC_POLICY_STAGE2
|
|
AMDSMI_FW_ID_REG_ACCESS_WHITELIST = amdsmi_wrapper.AMDSMI_FW_ID_REG_ACCESS_WHITELIST
|
|
AMDSMI_FW_ID_IMU_DRAM = amdsmi_wrapper.AMDSMI_FW_ID_IMU_DRAM
|
|
AMDSMI_FW_ID_IMU_IRAM = amdsmi_wrapper.AMDSMI_FW_ID_IMU_IRAM
|
|
AMDSMI_FW_ID_SDMA_TH0 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA_TH0
|
|
AMDSMI_FW_ID_SDMA_TH1 = amdsmi_wrapper.AMDSMI_FW_ID_SDMA_TH1
|
|
AMDSMI_FW_ID_CP_MES = amdsmi_wrapper.AMDSMI_FW_ID_CP_MES
|
|
AMDSMI_FW_ID_MES_STACK = amdsmi_wrapper.AMDSMI_FW_ID_MES_STACK
|
|
AMDSMI_FW_ID_MES_THREAD1 = amdsmi_wrapper.AMDSMI_FW_ID_MES_THREAD1
|
|
AMDSMI_FW_ID_MES_THREAD1_STACK = amdsmi_wrapper.AMDSMI_FW_ID_MES_THREAD1_STACK
|
|
AMDSMI_FW_ID_RLX6 = amdsmi_wrapper.AMDSMI_FW_ID_RLX6
|
|
AMDSMI_FW_ID_RLX6_DRAM_BOOT = amdsmi_wrapper.AMDSMI_FW_ID_RLX6_DRAM_BOOT
|
|
AMDSMI_FW_ID_RS64_ME = amdsmi_wrapper.AMDSMI_FW_ID_RS64_ME
|
|
AMDSMI_FW_ID_RS64_ME_P0_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_ME_P0_DATA
|
|
AMDSMI_FW_ID_RS64_ME_P1_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_ME_P1_DATA
|
|
AMDSMI_FW_ID_RS64_PFP = amdsmi_wrapper.AMDSMI_FW_ID_RS64_PFP
|
|
AMDSMI_FW_ID_RS64_PFP_P0_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_PFP_P0_DATA
|
|
AMDSMI_FW_ID_RS64_PFP_P1_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_PFP_P1_DATA
|
|
AMDSMI_FW_ID_RS64_MEC = amdsmi_wrapper.AMDSMI_FW_ID_RS64_MEC
|
|
AMDSMI_FW_ID_RS64_MEC_P0_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_MEC_P0_DATA
|
|
AMDSMI_FW_ID_RS64_MEC_P1_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_MEC_P1_DATA
|
|
AMDSMI_FW_ID_RS64_MEC_P2_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_MEC_P2_DATA
|
|
AMDSMI_FW_ID_RS64_MEC_P3_DATA = amdsmi_wrapper.AMDSMI_FW_ID_RS64_MEC_P3_DATA
|
|
AMDSMI_FW_ID_PPTABLE = amdsmi_wrapper.AMDSMI_FW_ID_PPTABLE
|
|
AMDSMI_FW_ID_PSP_SOC = amdsmi_wrapper.AMDSMI_FW_ID_PSP_SOC
|
|
AMDSMI_FW_ID_PSP_DBG = amdsmi_wrapper.AMDSMI_FW_ID_PSP_DBG
|
|
AMDSMI_FW_ID_PSP_INTF = amdsmi_wrapper.AMDSMI_FW_ID_PSP_INTF
|
|
AMDSMI_FW_ID_RLX6_CORE1 = amdsmi_wrapper.AMDSMI_FW_ID_RLX6_CORE1
|
|
AMDSMI_FW_ID_RLX6_DRAM_BOOT_CORE1 = amdsmi_wrapper.AMDSMI_FW_ID_RLX6_DRAM_BOOT_CORE1
|
|
AMDSMI_FW_ID_RLCV_LX7 = amdsmi_wrapper.AMDSMI_FW_ID_RLCV_LX7
|
|
AMDSMI_FW_ID_RLC_SAVE_RESTORE_LIST = amdsmi_wrapper.AMDSMI_FW_ID_RLC_SAVE_RESTORE_LIST
|
|
AMDSMI_FW_ID_ASD = amdsmi_wrapper.AMDSMI_FW_ID_ASD
|
|
AMDSMI_FW_ID_TA_RAS = amdsmi_wrapper.AMDSMI_FW_ID_TA_RAS
|
|
AMDSMI_FW_ID_TA_XGMI = amdsmi_wrapper.AMDSMI_FW_ID_TA_XGMI
|
|
AMDSMI_FW_ID_RLC_SRLG = amdsmi_wrapper.AMDSMI_FW_ID_RLC_SRLG
|
|
AMDSMI_FW_ID_RLC_SRLS = amdsmi_wrapper.AMDSMI_FW_ID_RLC_SRLS
|
|
AMDSMI_FW_ID_PM = amdsmi_wrapper.AMDSMI_FW_ID_PM
|
|
AMDSMI_FW_ID_DMCU = amdsmi_wrapper.AMDSMI_FW_ID_DMCU
|
|
AMDSMI_FW_ID_PLDM_BUNDLE = amdsmi_wrapper.AMDSMI_FW_ID_PLDM_BUNDLE
|
|
|
|
|
|
class AmdSmiClkType(IntEnum):
|
|
SYS = amdsmi_wrapper.AMDSMI_CLK_TYPE_SYS
|
|
GFX = amdsmi_wrapper.AMDSMI_CLK_TYPE_GFX
|
|
DF = amdsmi_wrapper.AMDSMI_CLK_TYPE_DF
|
|
DCEF = amdsmi_wrapper.AMDSMI_CLK_TYPE_DCEF
|
|
SOC = amdsmi_wrapper.AMDSMI_CLK_TYPE_SOC
|
|
MEM = amdsmi_wrapper.AMDSMI_CLK_TYPE_MEM
|
|
PCIE = amdsmi_wrapper.AMDSMI_CLK_TYPE_PCIE
|
|
VCLK0 = amdsmi_wrapper.AMDSMI_CLK_TYPE_VCLK0
|
|
VCLK1 = amdsmi_wrapper.AMDSMI_CLK_TYPE_VCLK1
|
|
DCLK0 = amdsmi_wrapper.AMDSMI_CLK_TYPE_DCLK0
|
|
DCLK1 = amdsmi_wrapper.AMDSMI_CLK_TYPE_DCLK1
|
|
|
|
class AmdSmiClkLimitType(IntEnum):
|
|
MIN = amdsmi_wrapper.CLK_LIMIT_MIN
|
|
MAX = amdsmi_wrapper.CLK_LIMIT_MAX
|
|
|
|
class AmdSmiTemperatureType(IntEnum):
|
|
EDGE = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_EDGE
|
|
HOTSPOT = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_HOTSPOT
|
|
JUNCTION = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_JUNCTION
|
|
VRAM = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_VRAM
|
|
HBM_0 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_HBM_0
|
|
HBM_1 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_HBM_1
|
|
HBM_2 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_HBM_2
|
|
HBM_3 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_HBM_3
|
|
PLX = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_PLX
|
|
|
|
# GPU Board Node temperature
|
|
GPUBOARD_NODE_RETIMER_X = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_RETIMER_X # Retimer X temperature
|
|
GPUBOARD_NODE_OAM_X_IBC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_IBC # OAM X IBC temperature
|
|
GPUBOARD_NODE_OAM_X_IBC_2 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_IBC_2 # OAM X IBC 2 temperature
|
|
GPUBOARD_NODE_OAM_X_VDD18_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_VDD18_VR # OAM X VDD 1.8V voltage regulator temperature
|
|
GPUBOARD_NODE_OAM_X_04_HBM_B_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_04_HBM_B_VR # OAM X 0.4V HBM B voltage regulator temperature
|
|
GPUBOARD_NODE_OAM_X_04_HBM_D_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_04_HBM_D_VR # OAM X 0.4V HBM D voltage regulator temperature
|
|
GPUBOARD_NODE_LAST = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_LAST
|
|
|
|
# GPU Board VR (Voltage Regulator) temperature
|
|
GPUBOARD_VDDCR_VDD0 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD0 # VDDCR VDD0 voltage regulator temperature
|
|
GPUBOARD_VDDCR_VDD1 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD1 # VDDCR VDD1 voltage regulator temperature
|
|
GPUBOARD_VDDCR_VDD2 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD2 # VDDCR VDD2 voltage regulator temperature
|
|
GPUBOARD_VDDCR_VDD3 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD3 # VDDCR VDD3 voltage regulator temperature
|
|
GPUBOARD_VDDCR_SOC_A = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOC_A # VDDCR SOC A voltage regulator temperature
|
|
GPUBOARD_VDDCR_SOC_C = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOC_C # VDDCR SOC C voltage regulator temperature
|
|
GPUBOARD_VDDCR_SOCIO_A = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOCIO_A # VDDCR SOCIO A voltage regulator temperature
|
|
GPUBOARD_VDDCR_SOCIO_C = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOCIO_C # VDDCR SOCIO C voltage regulator temperature
|
|
GPUBOARD_VDD_085_HBM = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDD_085_HBM # VDD 0.85V HBM voltage regulator temperature
|
|
GPUBOARD_VDDCR_11_HBM_B = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_11_HBM_B # VDDCR 1.1V HBM B voltage regulator temperature
|
|
GPUBOARD_VDDCR_11_HBM_D = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_11_HBM_D # VDDCR 1.1V HBM D voltage regulator temperature
|
|
GPUBOARD_VDD_USR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDD_USR # VDD USR voltage regulator temperature
|
|
GPUBOARD_VDDIO_11_E32 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDIO_11_E32 # VDDIO 1.1V E32 voltage regulator temperature
|
|
GPUBOARD_VR_LAST = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VR_LAST
|
|
|
|
# Baseboard System temperature
|
|
BASEBOARD_UBB_FPGA = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA # UBB FPGA temperature
|
|
BASEBOARD_UBB_FRONT = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FRONT # UBB front temperature
|
|
BASEBOARD_UBB_BACK = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_BACK # UBB back temperature
|
|
BASEBOARD_UBB_OAM7 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_OAM7 # UBB OAM7 temperature
|
|
BASEBOARD_UBB_IBC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_IBC # UBB IBC temperature
|
|
BASEBOARD_UBB_UFPGA = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_UFPGA # UBB UFPGA temperature
|
|
BASEBOARD_UBB_OAM1 = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_OAM1 # UBB OAM1 temperature
|
|
BASEBOARD_OAM_0_1_HSC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_0_1_HSC # OAM 0-1 HSC temperature
|
|
BASEBOARD_OAM_2_3_HSC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_2_3_HSC # OAM 2-3 HSC temperature
|
|
BASEBOARD_OAM_4_5_HSC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_4_5_HSC # OAM 4-5 HSC temperature
|
|
BASEBOARD_OAM_6_7_HSC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_6_7_HSC # OAM 6-7 HSC temperature
|
|
BASEBOARD_UBB_FPGA_0V72_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA_0V72_VR # UBB FPGA 0.72V voltage regulator temperature
|
|
BASEBOARD_UBB_FPGA_3V3_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA_3V3_VR # UBB FPGA 3.3V voltage regulator temperature
|
|
BASEBOARD_RETIMER_0_1_2_3_1V2_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_0_1_2_3_1V2_VR # Retimer 0-1-2-3 1.2V voltage regulator temperature
|
|
BASEBOARD_RETIMER_4_5_6_7_1V2_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_4_5_6_7_1V2_VR # Retimer 4-5-6-7 1.2V voltage regulator temperature
|
|
BASEBOARD_RETIMER_0_1_0V9_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_0_1_0V9_VR # Retimer 0-1 0.9V voltage regulator temperature
|
|
BASEBOARD_RETIMER_4_5_0V9_VR= amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_4_5_0V9_VR # Retimer 4-5 0.9V voltage regulator temperature
|
|
BASEBOARD_RETIMER_2_3_0V9_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_2_3_0V9_VR # Retimer 2-3 0.9V voltage regulator temperature
|
|
BASEBOARD_RETIMER_6_7_0V9_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_6_7_0V9_VR # Retimer 6-7 0.9V voltage regulator temperature
|
|
BASEBOARD_OAM_0_1_2_3_3V3_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_0_1_2_3_3V3_VR # OAM 0-1-2-3 3.3V voltage regulator temperature
|
|
BASEBOARD_OAM_4_5_6_7_3V3_VR = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_4_5_6_7_3V3_VR # OAM 4-5-6-7 3.3V voltage regulator temperature
|
|
BASEBOARD_IBC_HSC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_IBC_HSC # IBC HSC temperature
|
|
BASEBOARD_IBC = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_IBC # IBC temperature
|
|
BASEBOARD_LAST = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE_BASEBOARD_LAST
|
|
BASEBOARD__MAX = amdsmi_wrapper.AMDSMI_TEMPERATURE_TYPE__MAX # Maximum per GPU temperature type
|
|
|
|
|
|
class AmdSmiDevPerfLevel(IntEnum):
|
|
AUTO = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_AUTO
|
|
LOW = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_LOW
|
|
HIGH = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_HIGH
|
|
MANUAL = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_MANUAL
|
|
STABLE_STD = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_STABLE_STD
|
|
STABLE_PEAK = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_STABLE_PEAK
|
|
STABLE_MIN_MCLK = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK
|
|
STABLE_MIN_SCLK = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK
|
|
DETERMINISM = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_DETERMINISM
|
|
UNKNOWN = amdsmi_wrapper.AMDSMI_DEV_PERF_LEVEL_UNKNOWN
|
|
|
|
|
|
class AmdSmiEventGroup(IntEnum):
|
|
XGMI = amdsmi_wrapper.AMDSMI_EVNT_GRP_XGMI
|
|
XGMI_DATA_OUT = amdsmi_wrapper.AMDSMI_EVNT_GRP_XGMI_DATA_OUT
|
|
GRP_INVALID = amdsmi_wrapper.AMDSMI_EVNT_GRP_INVALID
|
|
|
|
|
|
class AmdSmiEventType(IntEnum):
|
|
XGMI_0_NOP_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_0_NOP_TX
|
|
XGMI_0_REQUEST_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_0_REQUEST_TX
|
|
XGMI_0_RESPONSE_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_0_RESPONSE_TX
|
|
XGMI_0_BEATS_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_0_BEATS_TX
|
|
XGMI_1_NOP_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_1_NOP_TX
|
|
XGMI_1_REQUEST_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_1_REQUEST_TX
|
|
XGMI_1_RESPONSE_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_1_RESPONSE_TX
|
|
XGMI_1_BEATS_TX = amdsmi_wrapper.AMDSMI_EVNT_XGMI_1_BEATS_TX
|
|
XGMI_DATA_OUT_0 = amdsmi_wrapper.AMDSMI_EVNT_XGMI_DATA_OUT_0
|
|
XGMI_DATA_OUT_1 = amdsmi_wrapper.AMDSMI_EVNT_XGMI_DATA_OUT_1
|
|
XGMI_DATA_OUT_2 = amdsmi_wrapper.AMDSMI_EVNT_XGMI_DATA_OUT_2
|
|
XGMI_DATA_OUT_3 = amdsmi_wrapper.AMDSMI_EVNT_XGMI_DATA_OUT_3
|
|
XGMI_DATA_OUT_4 = amdsmi_wrapper.AMDSMI_EVNT_XGMI_DATA_OUT_4
|
|
XGMI_DATA_OUT_5 = amdsmi_wrapper.AMDSMI_EVNT_XGMI_DATA_OUT_5
|
|
|
|
|
|
class AmdSmiCounterCommand(IntEnum):
|
|
CMD_START = amdsmi_wrapper.AMDSMI_CNTR_CMD_START
|
|
CMD_STOP = amdsmi_wrapper.AMDSMI_CNTR_CMD_STOP
|
|
|
|
|
|
class AmdSmiEvtNotificationType(IntEnum):
|
|
NONE = amdsmi_wrapper.AMDSMI_EVT_NOTIF_NONE
|
|
VMFAULT = amdsmi_wrapper.AMDSMI_EVT_NOTIF_VMFAULT
|
|
THERMAL_THROTTLE = amdsmi_wrapper.AMDSMI_EVT_NOTIF_THERMAL_THROTTLE
|
|
GPU_PRE_RESET = amdsmi_wrapper.AMDSMI_EVT_NOTIF_GPU_PRE_RESET
|
|
GPU_POST_RESET = amdsmi_wrapper.AMDSMI_EVT_NOTIF_GPU_POST_RESET
|
|
MIGRATE_START = amdsmi_wrapper.AMDSMI_EVT_NOTIF_MIGRATE_START
|
|
MIGRATE_END = amdsmi_wrapper.AMDSMI_EVT_NOTIF_MIGRATE_END
|
|
PAGE_FAULT_START = amdsmi_wrapper.AMDSMI_EVT_NOTIF_PAGE_FAULT_END
|
|
PAGE_FAULT_END = amdsmi_wrapper.AMDSMI_EVT_NOTIF_PAGE_FAULT_END
|
|
QUEUE_EVICTION = amdsmi_wrapper.AMDSMI_EVT_NOTIF_QUEUE_EVICTION
|
|
QUEUE_RESTORE = amdsmi_wrapper.AMDSMI_EVT_NOTIF_QUEUE_RESTORE
|
|
UNMAP_FROM_GPU = amdsmi_wrapper.AMDSMI_EVT_NOTIF_UNMAP_FROM_GPU
|
|
PROCESS_START = amdsmi_wrapper.AMDSMI_EVT_NOTIF_PROCESS_START
|
|
PROCESS_END = amdsmi_wrapper.AMDSMI_EVT_NOTIF_PROCESS_END
|
|
|
|
|
|
class AmdSmiTemperatureMetric(IntEnum):
|
|
CURRENT = amdsmi_wrapper.AMDSMI_TEMP_CURRENT
|
|
MAX = amdsmi_wrapper.AMDSMI_TEMP_MAX
|
|
MIN = amdsmi_wrapper.AMDSMI_TEMP_MIN
|
|
MAX_HYST = amdsmi_wrapper.AMDSMI_TEMP_MAX_HYST
|
|
MIN_HYST = amdsmi_wrapper.AMDSMI_TEMP_MIN_HYST
|
|
CRITICAL = amdsmi_wrapper.AMDSMI_TEMP_CRITICAL
|
|
CRITICAL_HYST = amdsmi_wrapper.AMDSMI_TEMP_CRITICAL_HYST
|
|
EMERGENCY = amdsmi_wrapper.AMDSMI_TEMP_EMERGENCY
|
|
EMERGENCY_HYST = amdsmi_wrapper.AMDSMI_TEMP_EMERGENCY_HYST
|
|
CRIT_MIN = amdsmi_wrapper.AMDSMI_TEMP_CRIT_MIN
|
|
CRIT_MIN_HYST = amdsmi_wrapper.AMDSMI_TEMP_CRIT_MIN_HYST
|
|
OFFSET = amdsmi_wrapper.AMDSMI_TEMP_OFFSET
|
|
LOWEST = amdsmi_wrapper.AMDSMI_TEMP_LOWEST
|
|
HIGHEST = amdsmi_wrapper.AMDSMI_TEMP_HIGHEST
|
|
|
|
|
|
class AmdSmiVoltageMetric(IntEnum):
|
|
CURRENT = amdsmi_wrapper.AMDSMI_VOLT_CURRENT
|
|
MAX = amdsmi_wrapper.AMDSMI_VOLT_MAX
|
|
MIN_CRIT = amdsmi_wrapper.AMDSMI_VOLT_MIN_CRIT
|
|
MIN = amdsmi_wrapper.AMDSMI_VOLT_MIN
|
|
MAX_CRIT = amdsmi_wrapper.AMDSMI_VOLT_MAX_CRIT
|
|
AVERAGE = amdsmi_wrapper.AMDSMI_VOLT_AVERAGE
|
|
LOWEST = amdsmi_wrapper.AMDSMI_VOLT_LOWEST
|
|
HIGHEST = amdsmi_wrapper.AMDSMI_VOLT_HIGHEST
|
|
|
|
|
|
class AmdSmiVoltageType(IntEnum):
|
|
VDDGFX = amdsmi_wrapper.AMDSMI_VOLT_TYPE_VDDGFX
|
|
VDDBOARD = amdsmi_wrapper.AMDSMI_VOLT_TYPE_VDDBOARD
|
|
INVALID = amdsmi_wrapper.AMDSMI_VOLT_TYPE_INVALID
|
|
|
|
class AmdSmiAcceleratorPartitionResourceType(IntEnum):
|
|
XCC = amdsmi_wrapper.AMDSMI_ACCELERATOR_XCC
|
|
ENCODER = amdsmi_wrapper.AMDSMI_ACCELERATOR_ENCODER
|
|
DECODER = amdsmi_wrapper.AMDSMI_ACCELERATOR_DECODER
|
|
DMA = amdsmi_wrapper.AMDSMI_ACCELERATOR_DMA
|
|
JPEG = amdsmi_wrapper.AMDSMI_ACCELERATOR_JPEG
|
|
MAX = amdsmi_wrapper.AMDSMI_ACCELERATOR_MAX
|
|
|
|
|
|
class AmdSmiAcceleratorPartitionType(IntEnum):
|
|
SPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_SPX
|
|
DPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_DPX
|
|
TPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_TPX
|
|
QPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_QPX
|
|
CPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_CPX
|
|
INVALID = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_INVALID
|
|
|
|
|
|
class AmdSmiComputePartitionType(IntEnum):
|
|
SPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_SPX
|
|
DPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_DPX
|
|
TPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_TPX
|
|
QPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_QPX
|
|
CPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_CPX
|
|
INVALID = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_INVALID
|
|
|
|
|
|
class AmdSmiMemoryPartitionType(IntEnum):
|
|
NPS1 = amdsmi_wrapper.AMDSMI_MEMORY_PARTITION_NPS1
|
|
NPS2 = amdsmi_wrapper.AMDSMI_MEMORY_PARTITION_NPS2
|
|
NPS4 = amdsmi_wrapper.AMDSMI_MEMORY_PARTITION_NPS4
|
|
NPS8 = amdsmi_wrapper.AMDSMI_MEMORY_PARTITION_NPS8
|
|
UNKNOWN = amdsmi_wrapper.AMDSMI_MEMORY_PARTITION_UNKNOWN
|
|
|
|
|
|
class AmdSmiPowerProfilePresetMasks(IntEnum):
|
|
CUSTOM_MASK = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_CUSTOM_MASK
|
|
VIDEO_MASK = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_VIDEO_MASK
|
|
POWER_SAVING_MASK = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_POWER_SAVING_MASK
|
|
COMPUTE_MASK = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_COMPUTE_MASK
|
|
VR_MASK = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_VR_MASK
|
|
THREE_D_FULL_SCR_MASK = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK
|
|
BOOTUP_DEFAULT = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT
|
|
INVALID = amdsmi_wrapper.AMDSMI_PWR_PROF_PRST_INVALID
|
|
|
|
|
|
class AmdSmiGpuBlock(IntEnum):
|
|
INVALID = amdsmi_wrapper.AMDSMI_GPU_BLOCK_INVALID
|
|
UMC = amdsmi_wrapper.AMDSMI_GPU_BLOCK_UMC
|
|
SDMA = amdsmi_wrapper.AMDSMI_GPU_BLOCK_SDMA
|
|
GFX = amdsmi_wrapper.AMDSMI_GPU_BLOCK_GFX
|
|
MMHUB = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MMHUB
|
|
ATHUB = amdsmi_wrapper.AMDSMI_GPU_BLOCK_ATHUB
|
|
PCIE_BIF = amdsmi_wrapper.AMDSMI_GPU_BLOCK_PCIE_BIF
|
|
HDP = amdsmi_wrapper.AMDSMI_GPU_BLOCK_HDP
|
|
XGMI_WAFL = amdsmi_wrapper.AMDSMI_GPU_BLOCK_XGMI_WAFL
|
|
DF = amdsmi_wrapper.AMDSMI_GPU_BLOCK_DF
|
|
SMN = amdsmi_wrapper.AMDSMI_GPU_BLOCK_SMN
|
|
SEM = amdsmi_wrapper.AMDSMI_GPU_BLOCK_SEM
|
|
MP0 = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MP0
|
|
MP1 = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MP1
|
|
FUSE = amdsmi_wrapper.AMDSMI_GPU_BLOCK_FUSE
|
|
MCA = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MCA
|
|
VCN = amdsmi_wrapper.AMDSMI_GPU_BLOCK_VCN
|
|
JPEG = amdsmi_wrapper.AMDSMI_GPU_BLOCK_JPEG
|
|
IH = amdsmi_wrapper.AMDSMI_GPU_BLOCK_IH
|
|
MPIO = amdsmi_wrapper.AMDSMI_GPU_BLOCK_MPIO
|
|
RESERVED = amdsmi_wrapper.AMDSMI_GPU_BLOCK_RESERVED
|
|
|
|
|
|
class AmdSmiRasErrState(IntEnum):
|
|
NONE = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_NONE
|
|
DISABLED = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_DISABLED
|
|
PARITY = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_PARITY
|
|
SING_C = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_SING_C
|
|
MULT_UC = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_MULT_UC
|
|
POISON = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_POISON
|
|
ENABLED = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_ENABLED
|
|
INVALID = amdsmi_wrapper.AMDSMI_RAS_ERR_STATE_INVALID
|
|
|
|
|
|
class AmdSmiCperNotifyType(Enum):
|
|
CMC = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_CMC
|
|
CPE = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_CPE
|
|
MCE = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_MCE
|
|
PCIE = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_PCIE
|
|
INIT = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_INIT
|
|
NMI = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_NMI
|
|
BOOT = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_BOOT
|
|
DMAr = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_DMAR
|
|
SEA = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_SEA
|
|
SEI = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_SEI
|
|
PEI = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_PEI
|
|
CXL_COMPONENT = amdsmi_wrapper.AMDSMI_CPER_NOTIFY_TYPE_CXL_COMPONENT
|
|
|
|
|
|
class AmdSmiMemoryType(IntEnum):
|
|
VRAM = amdsmi_wrapper.AMDSMI_MEM_TYPE_VRAM
|
|
VIS_VRAM = amdsmi_wrapper.AMDSMI_MEM_TYPE_VIS_VRAM
|
|
GTT = amdsmi_wrapper.AMDSMI_MEM_TYPE_GTT
|
|
|
|
|
|
class AmdSmiFreqInd(IntEnum):
|
|
MIN = amdsmi_wrapper.AMDSMI_FREQ_IND_MIN
|
|
MAX = amdsmi_wrapper.AMDSMI_FREQ_IND_MAX
|
|
INVALID = amdsmi_wrapper.AMDSMI_FREQ_IND_INVALID
|
|
|
|
|
|
class AmdSmiXgmiStatus(IntEnum):
|
|
NO_ERRORS = amdsmi_wrapper.AMDSMI_XGMI_STATUS_NO_ERRORS
|
|
ERROR = amdsmi_wrapper.AMDSMI_XGMI_STATUS_ERROR
|
|
MULTIPLE_ERRORS = amdsmi_wrapper.AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS
|
|
|
|
|
|
class AmdSmiMemoryPageStatus(IntEnum):
|
|
RESERVED = amdsmi_wrapper.AMDSMI_MEM_PAGE_STATUS_RESERVED
|
|
PENDING = amdsmi_wrapper.AMDSMI_MEM_PAGE_STATUS_PENDING
|
|
UNRESERVABLE = amdsmi_wrapper.AMDSMI_MEM_PAGE_STATUS_UNRESERVABLE
|
|
|
|
|
|
class AmdSmiLinkType(IntEnum):
|
|
AMDSMI_LINK_TYPE_INTERNAL = amdsmi_wrapper.AMDSMI_LINK_TYPE_INTERNAL
|
|
AMDSMI_LINK_TYPE_XGMI = amdsmi_wrapper.AMDSMI_LINK_TYPE_XGMI
|
|
AMDSMI_LINK_TYPE_PCIE = amdsmi_wrapper.AMDSMI_LINK_TYPE_PCIE
|
|
AMDSMI_LINK_TYPE_NOT_APPLICABLE = amdsmi_wrapper.AMDSMI_LINK_TYPE_NOT_APPLICABLE
|
|
AMDSMI_LINK_TYPE_UNKNOWN = amdsmi_wrapper.AMDSMI_LINK_TYPE_UNKNOWN
|
|
|
|
|
|
class AmdSmiUtilizationCounterType(IntEnum):
|
|
COARSE_GRAIN_GFX_ACTIVITY = amdsmi_wrapper.AMDSMI_COARSE_GRAIN_GFX_ACTIVITY
|
|
COARSE_GRAIN_MEM_ACTIVITY = amdsmi_wrapper.AMDSMI_COARSE_GRAIN_MEM_ACTIVITY
|
|
COARSE_DECODER_ACTIVITY = amdsmi_wrapper.AMDSMI_COARSE_DECODER_ACTIVITY
|
|
FINE_GRAIN_GFX_ACTIVITY = amdsmi_wrapper.AMDSMI_FINE_GRAIN_GFX_ACTIVITY
|
|
FINE_GRAIN_MEM_ACTIVITY = amdsmi_wrapper.AMDSMI_FINE_GRAIN_MEM_ACTIVITY
|
|
FINE_DECODER_ACTIVITY = amdsmi_wrapper.AMDSMI_FINE_DECODER_ACTIVITY
|
|
UTILIZATION_COUNTER_FIRST = amdsmi_wrapper.AMDSMI_UTILIZATION_COUNTER_FIRST
|
|
UTILIZATION_COUNTER_LAST = amdsmi_wrapper.AMDSMI_UTILIZATION_COUNTER_LAST
|
|
|
|
|
|
class AmdSmiProcessorType(IntEnum):
|
|
UNKNOWN = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_UNKNOWN
|
|
AMD_GPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_GPU
|
|
AMD_CPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_CPU
|
|
NON_AMD_GPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU
|
|
NON_AMD_CPU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU
|
|
AMD_CPU_CORE = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE
|
|
AMD_APU = amdsmi_wrapper.AMDSMI_PROCESSOR_TYPE_AMD_APU
|
|
|
|
|
|
class AmdSmiRegType(IntEnum):
|
|
XGMI = amdsmi_wrapper.AMDSMI_REG_XGMI
|
|
WAFL = amdsmi_wrapper.AMDSMI_REG_WAFL
|
|
PCIE = amdsmi_wrapper.AMDSMI_REG_PCIE
|
|
USR = amdsmi_wrapper.AMDSMI_REG_USR
|
|
USR1 = amdsmi_wrapper.AMDSMI_REG_USR1
|
|
|
|
|
|
class AmdSmiVirtualizationMode(IntEnum):
|
|
UNKNOWN = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_UNKNOWN
|
|
BAREMETAL = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_BAREMETAL
|
|
HOST = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_HOST
|
|
GUEST = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_GUEST
|
|
PASSTHROUGH = amdsmi_wrapper.AMDSMI_VIRTUALIZATION_MODE_PASSTHROUGH
|
|
|
|
|
|
class AmdSmiVramType(IntEnum):
|
|
UNKNOWN = amdsmi_wrapper.AMDSMI_VRAM_TYPE_UNKNOWN
|
|
HBM = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM
|
|
HBM2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM2
|
|
HBM2E = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM2E
|
|
HBM3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_HBM3
|
|
DDR2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR2
|
|
DDR3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR3
|
|
DDR4 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_DDR4
|
|
GDDR1 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR1
|
|
GDDR2 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR2
|
|
GDDR3 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR3
|
|
GDDR4 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR4
|
|
GDDR5 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR5
|
|
GDDR6 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR6
|
|
GDDR7 = amdsmi_wrapper.AMDSMI_VRAM_TYPE_GDDR7
|
|
MAX = amdsmi_wrapper.AMDSMI_VRAM_TYPE__MAX
|
|
|
|
|
|
class AmdSmiAffinityScope(IntEnum):
|
|
NUMA_SCOPE = amdsmi_wrapper.AMDSMI_AFFINITY_SCOPE_NODE
|
|
SOCKET_SCOPE = amdsmi_wrapper.AMDSMI_AFFINITY_SCOPE_SOCKET
|
|
|
|
class AmdSmiPtlData(IntEnum):
|
|
I8 = amdsmi_wrapper.AMDSMI_PTL_DATA_FORMAT_I8
|
|
F16 = amdsmi_wrapper.AMDSMI_PTL_DATA_FORMAT_F16
|
|
BF16 = amdsmi_wrapper.AMDSMI_PTL_DATA_FORMAT_BF16
|
|
F32 = amdsmi_wrapper.AMDSMI_PTL_DATA_FORMAT_F32
|
|
F64 = amdsmi_wrapper.AMDSMI_PTL_DATA_FORMAT_F64
|
|
INVALID = amdsmi_wrapper.AMDSMI_PTL_DATA_FORMAT_INVALID
|
|
|
|
class AmdSmiPowerCapType(IntEnum):
|
|
PPT0 = amdsmi_wrapper.AMDSMI_POWER_CAP_TYPE_PPT0
|
|
PPT1 = amdsmi_wrapper.AMDSMI_POWER_CAP_TYPE_PPT1
|
|
|
|
|
|
class AmdSmiEventReader:
|
|
def __init__(
|
|
self,
|
|
processor_handle: processor_handle_t,
|
|
event_types: List[AmdSmiEvtNotificationType]
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(event_types, Iterable):
|
|
raise AmdSmiParameterException(
|
|
event_types, Iterable
|
|
)
|
|
|
|
for event_type in event_types:
|
|
if not isinstance(event_type, AmdSmiEvtNotificationType):
|
|
raise AmdSmiParameterException(
|
|
event_type, AmdSmiEvtNotificationType
|
|
)
|
|
|
|
self.processor_handle = processor_handle
|
|
mask = 0
|
|
for event_type in event_types:
|
|
if event_type != AmdSmiEvtNotificationType.NONE:
|
|
mask |= (1 << (int(event_type) - 1))
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_init_gpu_event_notification(processor_handle))
|
|
_check_res(amdsmi_wrapper.amdsmi_set_gpu_event_notification_mask(
|
|
processor_handle, ctypes.c_uint64(mask)))
|
|
|
|
def read(self, timestamp, num_elem=10):
|
|
c_count = ctypes.c_uint32(num_elem)
|
|
event_info = (amdsmi_wrapper.amdsmi_evt_notification_data_t * num_elem)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_event_notification(
|
|
ctypes.c_int(timestamp),
|
|
ctypes.byref(c_count),
|
|
event_info,
|
|
)
|
|
)
|
|
|
|
ret = []
|
|
for i in range(c_count.value):
|
|
unique_event_values = set(event.value for event in AmdSmiEvtNotificationType)
|
|
if event_info[i].event in unique_event_values:
|
|
if AmdSmiEvtNotificationType(event_info[i].event).name != "NONE":
|
|
processor_handle = amdsmi_wrapper.amdsmi_processor_handle(event_info[i].processor_handle)
|
|
ret.append(
|
|
{
|
|
"processor_handle": processor_handle,
|
|
"event": AmdSmiEvtNotificationType(event_info[i].event).name,
|
|
"message": event_info[i].message.decode("utf-8"),
|
|
}
|
|
)
|
|
|
|
return ret
|
|
|
|
def stop(self):
|
|
_check_res(amdsmi_wrapper.amdsmi_stop_gpu_event_notification(
|
|
self.processor_handle))
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
self.stop()
|
|
|
|
|
|
def _format_bad_page_info(bad_page_info, bad_page_count: ctypes.c_uint32) -> List[Dict]:
|
|
"""
|
|
Format bad page info data retrieved.
|
|
|
|
Parameters:
|
|
bad_page_info(`amdsmi_retired_page_record_t`): A populated list of amdsmi_retired_page_record_t(s)
|
|
retrieved. Ex: (amdsmi_wrapper.amdsmi_retired_page_record_t * #)()
|
|
bad_page_count(`c_uint32`): Bad page count.
|
|
|
|
Returns:
|
|
`list`: List containing formatted bad pages. Can be empty
|
|
"""
|
|
if bad_page_count == 0:
|
|
return []
|
|
|
|
# Check if each struct within bad_page_info is valid
|
|
for bad_page in bad_page_info:
|
|
if not isinstance(bad_page, amdsmi_wrapper.amdsmi_retired_page_record_t):
|
|
raise AmdSmiParameterException(
|
|
bad_page, amdsmi_wrapper.amdsmi_retired_page_record_t
|
|
)
|
|
|
|
table_records = []
|
|
for i in range(bad_page_count.value):
|
|
table_records.append(
|
|
{
|
|
"value": i,
|
|
"page_address": bad_page_info[i].page_address,
|
|
"page_size": bad_page_info[i].page_size,
|
|
"status": bad_page_info[i].status,
|
|
}
|
|
)
|
|
return table_records
|
|
|
|
|
|
def _format_bdf(amdsmi_bdf: amdsmi_wrapper.amdsmi_bdf_t) -> str:
|
|
"""
|
|
Format BDF struct to readable data.
|
|
|
|
Parameters:
|
|
amdsmi_bdf(`amdsmi_bdf_t`): Struct containing BDF data that
|
|
will be formatted.
|
|
|
|
Returns:
|
|
`str`: String containing BDF data in a readable format.
|
|
"""
|
|
domain = hex(amdsmi_bdf.struct_amdsmi_bdf_t.domain_number)[2:].zfill(4)
|
|
bus = hex(amdsmi_bdf.struct_amdsmi_bdf_t.bus_number)[2:].zfill(2)
|
|
device = hex(amdsmi_bdf.struct_amdsmi_bdf_t.device_number)[2:].zfill(2)
|
|
function = hex(amdsmi_bdf.struct_amdsmi_bdf_t.function_number)[2:]
|
|
|
|
return domain + ":" + bus + ":" + device + "." + function
|
|
|
|
|
|
def _check_res(ret_code) -> None:
|
|
"""
|
|
Wrapper for amdsmi function calls. Checks the status returned
|
|
by the call. Raises exceptions if the status was inappropriate.
|
|
|
|
Parameters:
|
|
ret_code(`amdsmi_status_t`): Status code returned by function
|
|
call.
|
|
|
|
Returns:
|
|
`None`.
|
|
"""
|
|
if ret_code == amdsmi_wrapper.AMDSMI_STATUS_RETRY:
|
|
raise AmdSmiRetryException()
|
|
|
|
if ret_code == amdsmi_wrapper.AMDSMI_STATUS_TIMEOUT:
|
|
raise AmdSmiTimeoutException()
|
|
|
|
if ret_code != amdsmi_wrapper.AMDSMI_STATUS_SUCCESS:
|
|
raise AmdSmiLibraryException(ret_code)
|
|
|
|
|
|
def _parse_bdf(bdf):
|
|
if bdf is None:
|
|
return None
|
|
extended_regex = re.compile(
|
|
r'^([0-9a-fA-F]{4}):([0-9a-fA-F]{2}):([0-1][0-9a-fA-F])\.([0-7])$')
|
|
if extended_regex.match(bdf) is None:
|
|
simple_regex = re.compile(
|
|
r'^([0-9a-fA-F]{2}):([0-1][0-9a-fA-F])\.([0-7])$')
|
|
if simple_regex.match(bdf) is None:
|
|
return None
|
|
else:
|
|
match = simple_regex.match(bdf)
|
|
if match:
|
|
return [0] + [int(x, 16) for x in match.groups()]
|
|
else:
|
|
return None
|
|
else:
|
|
match = extended_regex.match(bdf)
|
|
if match:
|
|
return [int(x, 16) for x in match.groups()]
|
|
return None
|
|
|
|
|
|
def _make_amdsmi_bdf_from_list(bdf):
|
|
if len(bdf) != 4:
|
|
return None
|
|
amdsmi_bdf = amdsmi_wrapper.amdsmi_bdf_t()
|
|
amdsmi_bdf.struct_amdsmi_bdf_t.function_number = bdf[3]
|
|
amdsmi_bdf.struct_amdsmi_bdf_t.device_number = bdf[2]
|
|
amdsmi_bdf.struct_amdsmi_bdf_t.bus_number = bdf[1]
|
|
amdsmi_bdf.struct_amdsmi_bdf_t.domain_number = bdf[0]
|
|
return amdsmi_bdf
|
|
|
|
|
|
def _pad_hex_value(value, length) -> str:
|
|
""" Pad a hexadecimal value with a given length of zeros
|
|
|
|
:param value: A hexadecimal value to be padded with zeros
|
|
:param length: Number of zeros to pad the hexadecimal value
|
|
:param return original string string or
|
|
padded hex of confirmed hex output (using length provided)
|
|
"""
|
|
# Ensure value entered meets the minimum length and is hexadecimal
|
|
if len(value) > 2 and length > 1 and value[:2].lower() == '0x' \
|
|
and all(c in '0123456789abcdefABCDEF' for c in value[2:]):
|
|
# Pad with zeros after '0x' prefix
|
|
return '0x' + value[2:].zfill(length)
|
|
return value
|
|
|
|
|
|
def _validate_if_max_uint(value, uint_type: MaxUIntegerTypes, isActivity=False, isBool=False) -> Union[str, bool, int, list]:
|
|
return_val = "N/A"
|
|
if not isinstance(value, list):
|
|
if (value == uint_type) or (isActivity and value > 100):
|
|
return return_val
|
|
if isBool:
|
|
return bool(value)
|
|
return value
|
|
else:
|
|
return_val = []
|
|
for _, v in enumerate(value):
|
|
if (v == uint_type) or (isActivity and v > 100):
|
|
return_val.append("N/A")
|
|
else:
|
|
return_val.append(v)
|
|
if isBool:
|
|
return bool(return_val)
|
|
return return_val
|
|
|
|
|
|
def _notifyTypeToString(notify_type_b):
|
|
guid = []
|
|
# Iterate over only the first 8 bytes, but backwards
|
|
for i in notify_type_b[7::-1]:
|
|
guid.append(format(i, '02x'))
|
|
hex_string = "".join(guid)
|
|
hex_value = int(hex_string, 16)
|
|
if hex_value in AmdSmiCperNotifyType._value2member_map_:
|
|
# Convert to the corresponding enum name
|
|
return AmdSmiCperNotifyType(hex_value).name
|
|
else:
|
|
return "Unknown"
|
|
|
|
def _NA_amdsmi_get_gpu_metrics_info() -> Dict[str, str]:
|
|
"""
|
|
Get 'N/A' metric values for gpu_metric, used for exception handling.
|
|
|
|
Parameters:
|
|
None
|
|
|
|
Returns:
|
|
Dict[str, str]: A dictionary with keys as metric names and values as 'N/A'.
|
|
This is used to indicate that the metric is not available or applicable.
|
|
|
|
Raises:
|
|
N/A
|
|
"""
|
|
na_gpu_metrics_info = {
|
|
"common_header.structure_size": "N/A",
|
|
"common_header.format_revision": "N/A",
|
|
"common_header.content_revision": "N/A",
|
|
"temperature_edge": "N/A",
|
|
"temperature_hotspot": "N/A",
|
|
"temperature_mem": "N/A",
|
|
"temperature_vrgfx": "N/A",
|
|
"temperature_vrsoc": "N/A",
|
|
"temperature_vrmem": "N/A",
|
|
"average_gfx_activity": "N/A",
|
|
"average_umc_activity": "N/A",
|
|
"average_mm_activity": "N/A",
|
|
"average_socket_power": "N/A",
|
|
"energy_accumulator": "N/A",
|
|
"system_clock_counter": "N/A",
|
|
"average_gfxclk_frequency": "N/A",
|
|
"average_socclk_frequency": "N/A",
|
|
"average_uclk_frequency": "N/A",
|
|
"average_vclk0_frequency": "N/A",
|
|
"average_dclk0_frequency": "N/A",
|
|
"average_vclk1_frequency": "N/A",
|
|
"average_dclk1_frequency": "N/A",
|
|
"current_gfxclk": "N/A",
|
|
"current_socclk": "N/A",
|
|
"current_uclk": "N/A",
|
|
"current_vclk0": "N/A",
|
|
"current_dclk0": "N/A",
|
|
"current_vclk1": "N/A",
|
|
"current_dclk1": "N/A",
|
|
"throttle_status": "N/A",
|
|
"current_fan_speed": "N/A",
|
|
"pcie_link_width": "N/A",
|
|
"pcie_link_speed": "N/A",
|
|
"gfx_activity_acc": "N/A",
|
|
"mem_activity_acc": "N/A",
|
|
"temperature_hbm": "N/A",
|
|
"firmware_timestamp": "N/A",
|
|
"voltage_soc": "N/A",
|
|
"voltage_gfx": "N/A",
|
|
"voltage_mem": "N/A",
|
|
"indep_throttle_status": "N/A",
|
|
"current_socket_power": "N/A",
|
|
"vcn_activity": "N/A",
|
|
"gfxclk_lock_status": "N/A",
|
|
"xgmi_link_width": "N/A",
|
|
"xgmi_link_speed": "N/A",
|
|
"pcie_bandwidth_acc": "N/A",
|
|
"pcie_bandwidth_inst": "N/A",
|
|
"pcie_l0_to_recov_count_acc": "N/A",
|
|
"pcie_replay_count_acc": "N/A",
|
|
"pcie_replay_rover_count_acc": "N/A",
|
|
"xgmi_read_data_acc": "N/A",
|
|
"xgmi_write_data_acc": "N/A",
|
|
"current_gfxclks": "N/A",
|
|
"current_socclks": "N/A",
|
|
"current_vclk0s": "N/A",
|
|
"current_dclk0s": "N/A",
|
|
"jpeg_activity": "N/A",
|
|
"pcie_nak_sent_count_acc": "N/A",
|
|
"pcie_nak_rcvd_count_acc": "N/A",
|
|
"accumulation_counter": "N/A",
|
|
"prochot_residency_acc": "N/A",
|
|
"ppt_residency_acc": "N/A",
|
|
"socket_thm_residency_acc": "N/A",
|
|
"vr_thm_residency_acc": "N/A",
|
|
"hbm_thm_residency_acc": "N/A",
|
|
"num_partition": "N/A",
|
|
"xcp_stats.gfx_busy_inst": "N/A",
|
|
"xcp_stats.jpeg_busy": "N/A",
|
|
"xcp_stats.vcn_busy": "N/A",
|
|
"xcp_stats.gfx_busy_acc": "N/A",
|
|
"xcp_stats.gfx_below_host_limit_acc": "N/A",
|
|
"xcp_stats.gfx_below_host_limit_ppt_acc": "N/A",
|
|
"xcp_stats.gfx_below_host_limit_thm_acc": "N/A",
|
|
"xcp_stats.gfx_low_utilization_acc": "N/A",
|
|
"xcp_stats.gfx_below_host_limit_total_acc": "N/A",
|
|
"pcie_lc_perf_other_end_recovery": "N/A",
|
|
"vram_max_bandwidth": "N/A",
|
|
"xgmi_link_status": "N/A"
|
|
}
|
|
return na_gpu_metrics_info
|
|
|
|
|
|
def amdsmi_get_socket_handles() -> List[c_void_p]:
|
|
"""
|
|
Function that gets socket handles. Wraps the same named function call.
|
|
|
|
Parameters:
|
|
`None`.
|
|
|
|
Returns:
|
|
`List`: List containing all of the found socket handles.
|
|
"""
|
|
socket_count = ctypes.c_uint32(0)
|
|
null_ptr = POINTER(amdsmi_wrapper.amdsmi_socket_handle)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_socket_handles(
|
|
ctypes.byref(socket_count), null_ptr)
|
|
)
|
|
socket_handles = (amdsmi_wrapper.amdsmi_socket_handle *
|
|
socket_count.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_socket_handles(
|
|
ctypes.byref(socket_count), socket_handles)
|
|
)
|
|
sockets = [
|
|
amdsmi_wrapper.amdsmi_socket_handle(socket_handles[sock_idx])
|
|
for sock_idx in range(socket_count.value)
|
|
]
|
|
|
|
return sockets
|
|
|
|
def amdsmi_get_cpusocket_handles() -> List[c_void_p]:
|
|
"""
|
|
Function that gets cpu socket handles. Wraps the same named function call.
|
|
|
|
Parameters:
|
|
`None`.
|
|
|
|
Returns:
|
|
`List`: List containing all of the found cpu socket handles.
|
|
"""
|
|
cpu_count = ctypes.c_uint32(0)
|
|
null_ptr = POINTER(amdsmi_wrapper.amdsmi_processor_handle)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_handles(
|
|
ctypes.byref(cpu_count), null_ptr)
|
|
)
|
|
proc_handles = (amdsmi_wrapper.amdsmi_processor_handle *
|
|
cpu_count.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_handles(
|
|
ctypes.byref(cpu_count), proc_handles)
|
|
)
|
|
cpu_handles = [
|
|
amdsmi_wrapper.amdsmi_processor_handle(proc_handles[sock_idx])
|
|
for sock_idx in range(cpu_count.value)
|
|
]
|
|
return cpu_handles
|
|
|
|
def amdsmi_get_socket_info(socket_handle):
|
|
if not isinstance(socket_handle, amdsmi_wrapper.amdsmi_socket_handle):
|
|
raise AmdSmiParameterException(
|
|
socket_handle, amdsmi_wrapper.amdsmi_socket_handle)
|
|
socket_info = ctypes.create_string_buffer(128)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_socket_info(
|
|
socket_handle, ctypes.c_size_t(128), socket_info)
|
|
)
|
|
|
|
return socket_info.value.decode()
|
|
|
|
def amdsmi_get_processor_info(processor_handle):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
processor_info = ctypes.create_string_buffer(128)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_processor_info(
|
|
processor_handle, ctypes.c_size_t(128), processor_info)
|
|
)
|
|
|
|
return processor_info.value.decode()
|
|
|
|
def amdsmi_get_processor_handles() -> List[c_void_p]:
|
|
socket_handles = amdsmi_get_socket_handles()
|
|
devices = []
|
|
for socket in socket_handles:
|
|
device_count = ctypes.c_uint32()
|
|
null_ptr = POINTER(amdsmi_wrapper.amdsmi_processor_handle)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_processor_handles(
|
|
socket,
|
|
ctypes.byref(device_count),
|
|
null_ptr,
|
|
)
|
|
)
|
|
processor_handles = (
|
|
amdsmi_wrapper.amdsmi_processor_handle * device_count.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_processor_handles(
|
|
socket,
|
|
ctypes.byref(device_count),
|
|
processor_handles,
|
|
)
|
|
)
|
|
devices.extend(
|
|
[
|
|
amdsmi_wrapper.amdsmi_processor_handle(processor_handles[dev_idx])
|
|
for dev_idx in range(device_count.value)
|
|
]
|
|
)
|
|
|
|
return devices
|
|
|
|
def amdsmi_get_cpucore_handles() -> List[c_void_p]:
|
|
cores_count = ctypes.c_uint32(0)
|
|
null_ptr = POINTER(amdsmi_wrapper.amdsmi_processor_handle)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpucore_handles(
|
|
ctypes.byref(cores_count), null_ptr)
|
|
)
|
|
proc_handles = (amdsmi_wrapper.amdsmi_processor_handle *
|
|
cores_count.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpucore_handles(
|
|
ctypes.byref(cores_count), proc_handles)
|
|
)
|
|
core_handles = [
|
|
amdsmi_wrapper.amdsmi_processor_handle(proc_handles[sock_idx])
|
|
for sock_idx in range(cores_count.value)
|
|
]
|
|
|
|
return core_handles
|
|
|
|
def amdsmi_get_cpu_hsmp_proto_ver(processor_handle: processor_handle_t) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
proto_ver = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_hsmp_proto_ver(
|
|
processor_handle, ctypes.byref(proto_ver)
|
|
)
|
|
)
|
|
|
|
return proto_ver.value
|
|
|
|
def amdsmi_get_cpu_smu_fw_version(
|
|
processor_handle: processor_handle_t) -> Dict[str, int]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
smu_fw = amdsmi_wrapper.amdsmi_smu_fw_version_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_smu_fw_version(processor_handle, smu_fw))
|
|
|
|
return {
|
|
"smu_fw_debug_ver_num": smu_fw.debug,
|
|
"smu_fw_minor_ver_num": smu_fw.minor,
|
|
"smu_fw_major_ver_num": smu_fw.major
|
|
}
|
|
|
|
def amdsmi_get_cpu_hsmp_driver_version(
|
|
processor_handle: processor_handle_t) -> Dict[str, int]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
hsmp_driver_version = amdsmi_wrapper.amdsmi_hsmp_driver_version_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_hsmp_driver_version(processor_handle, hsmp_driver_version))
|
|
|
|
return {
|
|
"hsmp_driver_major_ver_num": hsmp_driver_version.major,
|
|
"hsmp_driver_minor_ver_num": hsmp_driver_version.minor,
|
|
}
|
|
|
|
def amdsmi_get_cpu_core_energy(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
penergy = ctypes.c_uint64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_core_energy(
|
|
processor_handle, ctypes.byref(penergy)
|
|
)
|
|
)
|
|
|
|
return f"{float(penergy.value * pow(10, -6))} J"
|
|
|
|
def amdsmi_get_cpu_socket_energy(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
penergy = ctypes.c_uint64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_energy(
|
|
processor_handle, ctypes.byref(penergy)
|
|
)
|
|
)
|
|
|
|
return f"{float(penergy.value * pow(10, -6))} J"
|
|
|
|
def amdsmi_get_threads_per_core():
|
|
threads_per_core = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_threads_per_core(
|
|
ctypes.byref(threads_per_core)
|
|
)
|
|
)
|
|
|
|
return threads_per_core.value
|
|
|
|
def amdsmi_get_cpu_prochot_status(
|
|
processor_handle: processor_handle_t
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
prochot = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_prochot_status(
|
|
processor_handle, ctypes.byref(prochot)
|
|
)
|
|
)
|
|
|
|
return prochot.value
|
|
|
|
def amdsmi_get_cpu_fclk_mclk(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
fclk = ctypes.c_uint32()
|
|
mclk = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_fclk_mclk(
|
|
processor_handle, ctypes.byref(fclk), ctypes.byref(mclk)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"fclk": f"{fclk.value} MHz",
|
|
"mclk": f"{mclk.value} MHz"
|
|
}
|
|
|
|
def amdsmi_get_cpu_cclk_limit(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
cclk = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_cclk_limit(
|
|
processor_handle, ctypes.byref(cclk)
|
|
)
|
|
)
|
|
|
|
return f"{cclk.value} MHz"
|
|
|
|
def amdsmi_get_cpu_socket_current_active_freq_limit(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_current_active_freq_limit.argtypes = [amdsmi_wrapper.amdsmi_processor_handle, POINTER(ctypes.c_uint16), POINTER(ctypes.c_char_p * len(amdsmi_wrapper.amdsmi_hsmp_freqlimit_src_names))]
|
|
freq = ctypes.c_uint16()
|
|
src_type = (ctypes.c_char_p * len(amdsmi_wrapper.amdsmi_hsmp_freqlimit_src_names))()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_current_active_freq_limit(
|
|
processor_handle, ctypes.byref(freq), src_type
|
|
)
|
|
)
|
|
|
|
freq_src = []
|
|
for names in src_type:
|
|
if names is not None:
|
|
freq_src.append(names.decode('utf-8'))
|
|
|
|
return {
|
|
"freq": f"{freq.value} MHz",
|
|
"freq_src": f"{freq_src}"
|
|
}
|
|
|
|
def amdsmi_get_cpu_socket_freq_range(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
freq_max = ctypes.c_uint16()
|
|
freq_min = ctypes.c_uint16()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_freq_range(
|
|
processor_handle, ctypes.byref(freq_max), ctypes.byref(freq_min)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"max_socket_freq": f"{freq_max.value} MHz",
|
|
"min_socket_freq": f"{freq_min.value} MHz"
|
|
}
|
|
|
|
def amdsmi_get_cpu_core_current_freq_limit(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
freq = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_core_current_freq_limit(
|
|
processor_handle, ctypes.byref(freq)
|
|
)
|
|
)
|
|
|
|
return f"{freq.value} MHz"
|
|
|
|
def amdsmi_get_cpu_socket_power(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
ppower = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_power(
|
|
processor_handle, ctypes.byref(ppower)
|
|
)
|
|
)
|
|
|
|
return f"{ppower.value} mW"
|
|
|
|
def amdsmi_get_cpu_socket_power_cap(
|
|
processor_handle: processor_handle_t
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
pcap = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_power_cap(
|
|
processor_handle, ctypes.byref(pcap)
|
|
)
|
|
)
|
|
|
|
# in mW
|
|
return pcap.value
|
|
|
|
def amdsmi_get_cpu_socket_power_cap_max(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
pmax = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_power_cap_max(
|
|
processor_handle, ctypes.byref(pmax)
|
|
)
|
|
)
|
|
|
|
return f"{pmax.value} mW"
|
|
|
|
def amdsmi_get_cpu_pwr_svi_telemetry_all_rails(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
power = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_pwr_svi_telemetry_all_rails(
|
|
processor_handle, ctypes.byref(power)
|
|
)
|
|
)
|
|
|
|
return f"{power.value} mW"
|
|
|
|
def amdsmi_set_cpu_socket_power_cap(
|
|
processor_handle: processor_handle_t, power_cap: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(power_cap, int):
|
|
raise AmdSmiParameterException(power_cap, int)
|
|
|
|
power_cap_32 = ctypes.c_uint32(power_cap)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_socket_power_cap(
|
|
processor_handle, power_cap_32)
|
|
)
|
|
|
|
def amdsmi_set_cpu_pwr_efficiency_mode(
|
|
processor_handle: processor_handle_t, mode: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(mode, int):
|
|
raise AmdSmiParameterException(mode, int)
|
|
mode_8 = ctypes.c_uint8(mode)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_pwr_efficiency_mode(
|
|
processor_handle, mode_8)
|
|
)
|
|
|
|
def amdsmi_get_cpu_core_boostlimit(
|
|
processor_handle: processor_handle_t
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
boostlimit = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_core_boostlimit(
|
|
processor_handle, ctypes.byref(boostlimit)
|
|
)
|
|
)
|
|
|
|
# In MHz"
|
|
return boostlimit.value
|
|
|
|
def amdsmi_get_cpu_socket_c0_residency(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
c0_residency = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_c0_residency(
|
|
processor_handle, ctypes.byref(c0_residency)
|
|
)
|
|
)
|
|
|
|
return f"{c0_residency.value} %"
|
|
|
|
def amdsmi_set_cpu_core_boostlimit(
|
|
processor_handle: processor_handle_t, boostlimit: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(boostlimit, int):
|
|
raise AmdSmiParameterException(boostlimit, int)
|
|
boostlimit_32 = ctypes.c_uint32(boostlimit)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_core_boostlimit(
|
|
processor_handle, boostlimit_32)
|
|
)
|
|
|
|
def amdsmi_set_cpu_socket_boostlimit(
|
|
processor_handle: processor_handle_t, boostlimit: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(boostlimit, int):
|
|
raise AmdSmiParameterException(boostlimit, int)
|
|
boostlimit_32 = ctypes.c_uint32(boostlimit)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_socket_boostlimit(
|
|
processor_handle, boostlimit_32)
|
|
)
|
|
|
|
def amdsmi_get_cpu_ddr_bw(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
ddr_bw = amdsmi_wrapper.amdsmi_ddr_bw_metrics_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_ddr_bw(processor_handle, ddr_bw))
|
|
|
|
return {
|
|
"ddr_bw_max_bw": f"{ddr_bw.max_bw} Gbps",
|
|
"ddr_bw_utilized_bw": f"{ddr_bw.utilized_bw} Gbps",
|
|
"ddr_bw_utilized_pct": f"{ddr_bw.utilized_pct} %"
|
|
}
|
|
|
|
def amdsmi_get_cpu_socket_temperature(
|
|
processor_handle: processor_handle_t
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
ptmon = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_temperature(
|
|
processor_handle, ctypes.byref(ptmon)
|
|
)
|
|
)
|
|
|
|
return f"{ptmon.value} Degrees C"
|
|
|
|
def amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(
|
|
processor_handle: processor_handle_t,
|
|
dimm_addr: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(dimm_addr, int):
|
|
raise AmdSmiParameterException(dimm_addr, int)
|
|
|
|
dimm_addr_8 = ctypes.c_uint8(dimm_addr)
|
|
dimm = amdsmi_wrapper.amdsmi_temp_range_refresh_rate_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_dimm_temp_range_and_refresh_rate(processor_handle,
|
|
dimm_addr_8,
|
|
ctypes.byref(dimm)))
|
|
|
|
return {
|
|
"dimm_temperature_range": dimm.range,
|
|
"dimm_refresh_rate": dimm.ref_rate
|
|
}
|
|
|
|
def amdsmi_get_cpu_dimm_power_consumption(
|
|
processor_handle: processor_handle_t,
|
|
dimm_addr: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(dimm_addr, int):
|
|
raise AmdSmiParameterException(dimm_addr, int)
|
|
|
|
dimm_addr_8 = ctypes.c_uint8(dimm_addr)
|
|
dimm = amdsmi_wrapper.amdsmi_dimm_power_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_dimm_power_consumption(processor_handle,
|
|
dimm_addr_8,
|
|
ctypes.byref(dimm)))
|
|
|
|
return {
|
|
"dimm_power_consumed": f"{dimm.power} mW",
|
|
"dimm_power_update_rate": f"{dimm.update_rate} ms",
|
|
"dimm_dimm_addr": dimm.dimm_addr
|
|
}
|
|
|
|
def amdsmi_get_cpu_dimm_thermal_sensor(
|
|
processor_handle: processor_handle_t,
|
|
dimm_addr: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(dimm_addr, int):
|
|
raise AmdSmiParameterException(dimm_addr, int)
|
|
|
|
dimm_addr_8 = ctypes.c_uint8(dimm_addr)
|
|
dimm_thermal = amdsmi_wrapper.amdsmi_dimm_thermal_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_dimm_thermal_sensor(processor_handle,
|
|
dimm_addr_8,
|
|
ctypes.byref(dimm_thermal)))
|
|
|
|
return {
|
|
"dimm_thermal_sensor_value": dimm_thermal.sensor,
|
|
"dimm_thermal_update_rate": f"{dimm_thermal.update_rate} ms",
|
|
"dimm_thermal_dimm_addr": dimm_thermal.dimm_addr,
|
|
"dimm_thermal_temperature": f"{dimm_thermal.temp} Degrees C"
|
|
}
|
|
|
|
def amdsmi_set_cpu_xgmi_width(
|
|
processor_handle: processor_handle_t, min_width: int, max_width: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(min_width, int):
|
|
raise AmdSmiParameterException(min_width, int)
|
|
if not isinstance(max_width, int):
|
|
raise AmdSmiParameterException(max_width, int)
|
|
|
|
min_width_8 = ctypes.c_uint8(min_width)
|
|
max_width_8 = ctypes.c_uint8(max_width)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_xgmi_width(
|
|
processor_handle, min_width_8, max_width_8)
|
|
)
|
|
|
|
def amdsmi_set_cpu_gmi3_link_width_range(
|
|
processor_handle: processor_handle_t,
|
|
min_link_width: int, max_link_width: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(min_link_width, int):
|
|
raise AmdSmiParameterException(min_link_width, int)
|
|
if not isinstance(max_link_width, int):
|
|
raise AmdSmiParameterException(max_link_width, int)
|
|
|
|
min_link_width_8 = ctypes.c_uint8(min_link_width)
|
|
max_link_width_8 = ctypes.c_uint8(max_link_width)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_gmi3_link_width_range(
|
|
processor_handle, min_link_width_8, max_link_width_8)
|
|
)
|
|
|
|
def amdsmi_cpu_apb_enable(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_cpu_apb_enable(processor_handle)
|
|
)
|
|
|
|
def amdsmi_cpu_apb_disable(
|
|
processor_handle: processor_handle_t,
|
|
pstate: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(pstate, int):
|
|
raise AmdSmiParameterException(pstate, int)
|
|
|
|
pstate_8 = ctypes.c_uint8(pstate)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_cpu_apb_disable(
|
|
processor_handle, pstate_8)
|
|
)
|
|
|
|
def amdsmi_set_cpu_socket_lclk_dpm_level(
|
|
processor_handle: processor_handle_t,
|
|
nbio_id: int, min_val: int, max_val: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(nbio_id, int):
|
|
raise AmdSmiParameterException(nbio_id, int)
|
|
if not isinstance(min_val, int):
|
|
raise AmdSmiParameterException(min_val, int)
|
|
if not isinstance(max_val, int):
|
|
raise AmdSmiParameterException(max_val, int)
|
|
|
|
nbio_id_8 = ctypes.c_uint8(nbio_id)
|
|
min_val_8 = ctypes.c_uint8(min_val)
|
|
max_val_8 = ctypes.c_uint8(max_val)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_socket_lclk_dpm_level(
|
|
processor_handle, nbio_id_8, min_val_8, max_val_8)
|
|
)
|
|
|
|
def amdsmi_get_cpu_socket_lclk_dpm_level(
|
|
processor_handle: processor_handle_t,
|
|
nbio_id: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(nbio_id, int):
|
|
raise AmdSmiParameterException(nbio_id, int)
|
|
|
|
nbio_id_8 = ctypes.c_uint8(nbio_id)
|
|
dpm_level = amdsmi_wrapper.amdsmi_dpm_level_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_cpu_socket_lclk_dpm_level(processor_handle, nbio_id_8, dpm_level))
|
|
|
|
return {
|
|
"nbio_max_dpm_level": dpm_level.max_dpm_level,
|
|
"nbio_min_dpm_level": dpm_level.min_dpm_level
|
|
}
|
|
|
|
def amdsmi_set_cpu_pcie_link_rate(
|
|
processor_handle: processor_handle_t,
|
|
rate_ctrl: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(rate_ctrl, int):
|
|
raise AmdSmiParameterException(rate_ctrl, int)
|
|
|
|
rate_ctrl_8 = ctypes.c_uint8(rate_ctrl)
|
|
prev_mode_8 = ctypes.c_uint8()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_pcie_link_rate(
|
|
processor_handle, rate_ctrl_8, ctypes.byref(prev_mode_8))
|
|
)
|
|
|
|
return f"{prev_mode_8.value}"
|
|
|
|
def amdsmi_set_cpu_df_pstate_range(
|
|
processor_handle: processor_handle_t,
|
|
max_pstate: int, min_pstate: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(max_pstate, int):
|
|
raise AmdSmiParameterException(max_pstate, int)
|
|
if not isinstance(min_pstate, int):
|
|
raise AmdSmiParameterException(min_pstate, int)
|
|
|
|
max_pstate_8 = ctypes.c_uint8(max_pstate)
|
|
min_pstate_8 = ctypes.c_uint8(min_pstate)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_df_pstate_range(
|
|
processor_handle, max_pstate_8, min_pstate_8))
|
|
|
|
def amdsmi_get_cpu_current_io_bandwidth(
|
|
processor_handle: processor_handle_t,
|
|
encoding: int,
|
|
link_name: str
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(encoding, int):
|
|
raise AmdSmiParameterException(encoding, int)
|
|
if not isinstance(link_name, str):
|
|
raise AmdSmiParameterException(link_name, str)
|
|
|
|
link = amdsmi_wrapper.amdsmi_link_id_bw_type_t()
|
|
link.bw_type = ctypes.c_uint32(encoding)
|
|
link.link_name = ctypes.create_string_buffer(link_name.encode('utf-8'))
|
|
io_bw = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_current_io_bandwidth(
|
|
processor_handle, link, ctypes.byref(io_bw))
|
|
)
|
|
|
|
return f"{io_bw.value} Mbps"
|
|
|
|
def amdsmi_get_cpu_current_xgmi_bw(
|
|
processor_handle: processor_handle_t,
|
|
encoding: int,
|
|
link_name: str
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(encoding, int):
|
|
raise AmdSmiParameterException(encoding, int)
|
|
if not isinstance(link_name, str):
|
|
raise AmdSmiParameterException(link_name, str)
|
|
|
|
link = amdsmi_wrapper.amdsmi_link_id_bw_type_t()
|
|
link.bw_type = ctypes.c_uint32(encoding)
|
|
link.link_name = ctypes.create_string_buffer(link_name.encode('utf-8'))
|
|
xgmi_bw = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_current_xgmi_bw(
|
|
processor_handle, link, ctypes.byref(xgmi_bw))
|
|
)
|
|
|
|
return f"{xgmi_bw.value} Mbps"
|
|
|
|
def amdsmi_get_hsmp_metrics_table_version(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
metric_tbl_version = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_hsmp_metrics_table_version(
|
|
processor_handle, ctypes.byref(metric_tbl_version))
|
|
)
|
|
|
|
return metric_tbl_version.value
|
|
|
|
def amdsmi_set_cpu_rail_isofreq_policy(
|
|
processor_handle: processor_handle_t,
|
|
value: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_cpu_rail_isofreq_policy(processor_handle, value)
|
|
)
|
|
|
|
def amdsmi_get_cpu_rail_isofreq_policy(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
cpurailiso = ctypes.c_uint8()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_rail_isofreq_policy(
|
|
processor_handle, ctypes.byref(cpurailiso)
|
|
)
|
|
)
|
|
|
|
return cpurailiso.value
|
|
|
|
def amdsmi_get_dfc_ctrl(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
dfc_ctrl = ctypes.c_uint8()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_dfc_ctrl(
|
|
processor_handle, ctypes.byref(dfc_ctrl)
|
|
)
|
|
)
|
|
|
|
return dfc_ctrl.value
|
|
|
|
def amdsmi_set_dfc_ctrl(
|
|
processor_handle: processor_handle_t,
|
|
value: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_dfc_ctrl(processor_handle, value)
|
|
)
|
|
|
|
# Get 2's complement of 32 bit unsigned integer
|
|
def check_msb_32(num):
|
|
msb = 1 << (NO_OF_32BITS - 1)
|
|
|
|
# If msb = 1 , then take 2's complement of the number
|
|
if num & msb:
|
|
num = ~num + 1
|
|
return num
|
|
|
|
# Get 2's complement of 64 bit unsigned integer
|
|
def check_msb_64(num):
|
|
msb = 1 << (NO_OF_64BITS - 1)
|
|
|
|
# If msb = 1 , then take 2's complement of the number
|
|
if num & msb:
|
|
num = ~num + 1
|
|
return num
|
|
|
|
def amdsmi_get_hsmp_metrics_table(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
mtbl = amdsmi_wrapper.amdsmi_hsmp_metrics_table_t()
|
|
|
|
# Encodings for the metric table defined for hsmp
|
|
fraction_q10 = 1 / math.pow(2, 10)
|
|
fraction_uq10 = fraction_q10
|
|
fraction_uq16 = 1 / math.pow(2, 16)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_hsmp_metrics_table(
|
|
processor_handle, mtbl
|
|
)
|
|
)
|
|
|
|
rawtime = int(mtbl.timestamp)
|
|
rawtime = time()
|
|
timeinfo = localtime(rawtime)
|
|
|
|
return {
|
|
"mtbl_accumulation_counter": mtbl.accumulation_counter,
|
|
"mtbl_max_socket_temperature": f"{round(check_msb_32(mtbl.max_socket_temperature) * fraction_q10 ,3)} °C",
|
|
"mtbl_max_vr_temperature": f"{round(check_msb_32(mtbl.max_vr_temperature) * fraction_q10 ,3)} °C",
|
|
"mtbl_max_hbm_temperature": f"{round(check_msb_32(mtbl.max_hbm_temperature) * fraction_q10 ,3)} °C",
|
|
"mtbl_max_socket_temperature_acc": f"{round(check_msb_64(mtbl.max_socket_temperature_acc) * fraction_q10 ,3)} °C",
|
|
"mtbl_max_vr_temperature_acc": f"{round(check_msb_64(mtbl.max_vr_temperature_acc) * fraction_q10 ,3)} °C",
|
|
"mtbl_max_hbm_temperature_acc": f"{round(check_msb_64(mtbl.max_hbm_temperature_acc) * fraction_q10 ,3)} °C",
|
|
"mtbl_socket_power_limit": f"{round(mtbl.socket_power_limit * fraction_uq10 ,3)} W",
|
|
"mtbl_max_socket_power_limit": f"{round(mtbl.max_socket_power_limit * fraction_uq10 ,3)} W",
|
|
"mtbl_socket_power": f"{round(mtbl.socket_power * fraction_uq10 ,3)} W",
|
|
"mtbl_timestamp_raw": mtbl.timestamp,
|
|
"mtbl_timestamp_readable": f"{asctime(timeinfo)}",
|
|
"mtbl_socket_energy_acc": f"{round((mtbl.socket_energy_acc * fraction_uq16)/KILO ,3)} kJ",
|
|
"mtbl_ccd_energy_acc": f"{round((mtbl.ccd_energy_acc * fraction_uq16)/KILO ,3)} kJ",
|
|
"mtbl_xcd_energy_acc": f"{round((mtbl.xcd_energy_acc * fraction_uq16)/KILO ,3)} kJ",
|
|
"mtbl_aid_energy_acc": f"{round((mtbl.aid_energy_acc * fraction_uq16)/KILO ,3)} kJ",
|
|
"mtbl_hbm_energy_acc": f"{round((mtbl.hbm_energy_acc * fraction_uq16)/KILO ,3)} kJ",
|
|
"mtbl_cclk_frequency_limit": f"{round(mtbl.cclk_frequency_limit * fraction_uq10 ,3)} GHz",
|
|
"mtbl_gfxclk_frequency_limit": f"{round(mtbl.gfxclk_frequency_limit * fraction_uq10 ,3)} MHz",
|
|
"mtbl_fclk_frequency": f"{round(mtbl.fclk_frequency * fraction_uq10 ,3)} MHz",
|
|
"mtbl_uclk_frequency": f"{round(mtbl.uclk_frequency * fraction_uq10 ,3)} MHz",
|
|
"mtbl_socclk_frequency": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.socclk_frequency)]} MHz",
|
|
"mtbl_vclk_frequency": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.vclk_frequency)]} MHz",
|
|
"mtbl_dclk_frequency": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.dclk_frequency)]} MHz",
|
|
"mtbl_lclk_frequency": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.lclk_frequency)]} MHz",
|
|
"mtbl_fclk_frequency_table": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.fclk_frequency_table)]} MHz",
|
|
"mtbl_uclk_frequency_table": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.uclk_frequency_table)]} MHz",
|
|
"mtbl_socclk_frequency_table": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.socclk_frequency_table)]} MHz",
|
|
"mtbl_vclk_frequency_table": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.vclk_frequency_table)]} MHz",
|
|
"mtbl_dclk_frequency_table": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.dclk_frequency_table)]} MHz",
|
|
"mtbl_lclk_frequency_table": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.lclk_frequency_table)]} MHz",
|
|
"mtbl_cclk_frequency_acc": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.cclk_frequency_acc)]} GHz",
|
|
"mtbl_gfxclk_frequency_acc": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.gfxclk_frequency_acc)]} MHz",
|
|
"mtbl_gfxclk_frequency": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.gfxclk_frequency)]} MHz",
|
|
"mtbl_max_cclk_frequency": f"{round(mtbl.max_cclk_frequency * fraction_uq10 ,3)} GHz",
|
|
"mtbl_min_cclk_frequency": f"{round(mtbl.min_cclk_frequency * fraction_uq10 ,3)} GHz",
|
|
"mtbl_max_gfxclk_frequency": f"{round(mtbl.max_gfxclk_frequency * fraction_uq10 ,3)} MHz",
|
|
"mtbl_min_gfxclk_frequency": f"{round(mtbl.min_gfxclk_frequency * fraction_uq10 ,3)} MHz",
|
|
"mtbl_max_lclk_dpm_range": mtbl.max_lclk_dpm_range,
|
|
"mtbl_min_lclk_dpm_range": mtbl.min_lclk_dpm_range,
|
|
"mtbl_xgmi_width": round(mtbl.xgmi_width * fraction_uq10 ,3),
|
|
"mtbl_xgmi_bitrate": f"{round(mtbl.xgmi_bitrate * fraction_uq10 ,3)} Gbps",
|
|
"mtbl_xgmi_read_bandwidth_acc": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.xgmi_read_bandwidth_acc)]} Gbps",
|
|
"mtbl_xgmi_write_bandwidth_acc": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.xgmi_write_bandwidth_acc)]} Gbps",
|
|
"mtbl_socket_c0_residency": f"{round(mtbl.socket_c0_residency * fraction_uq10 ,3)} %",
|
|
"mtbl_socket_gfx_busy": f"{round(mtbl.socket_gfx_busy * fraction_uq10 ,3)} %",
|
|
"mtbl_hbm_bandwidth_utilization": f"{round(mtbl.dram_bandwidth_utilization * fraction_uq10 ,3)} %",
|
|
"mtbl_socket_c0_residency_acc": round(mtbl.socket_c0_residency_acc * fraction_uq10 ,3),
|
|
"mtbl_socket_gfx_busy_acc": round(mtbl.socket_gfx_busy_acc * fraction_uq10 ,3),
|
|
"mtbl_hbm_bandwidth_acc": f"{round(mtbl.dram_bandwidth_acc * fraction_uq10 ,3)} Gbps",
|
|
"mtbl_max_hbm_bandwidth": f"{round(mtbl.max_dram_bandwidth * fraction_uq10 ,3)} Gbps",
|
|
"mtbl_dram_bandwidth_utilization_acc": round(mtbl.dram_bandwidth_utilization_acc * fraction_uq10 ,3),
|
|
"mtbl_pcie_bandwidth_acc": f"{[round(x*fraction_uq10 ,3) for x in list(mtbl.pcie_bandwidth_acc)]} Gbps",
|
|
"mtbl_prochot_residency_acc": mtbl.prochot_residency_acc,
|
|
"mtbl_ppt_residency_acc": mtbl.ppt_residency_acc,
|
|
"mtbl_socket_thm_residency_acc": mtbl.socket_thm_residency_acc,
|
|
"mtbl_vr_thm_residency_acc": mtbl.vr_thm_residency_acc,
|
|
"mtbl_hbm_thm_residency_acc": mtbl.hbm_thm_residency_acc,
|
|
}
|
|
|
|
def amdsmi_first_online_core_on_cpu_socket(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
pcore_ind = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_first_online_core_on_cpu_socket(
|
|
processor_handle, ctypes.byref(pcore_ind))
|
|
)
|
|
|
|
return pcore_ind.value
|
|
|
|
def amdsmi_get_cpu_family():
|
|
family = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_family(ctypes.byref(family))
|
|
)
|
|
return family.value
|
|
|
|
def amdsmi_get_cpu_model():
|
|
model = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_model(ctypes.byref(model))
|
|
)
|
|
return model.value
|
|
|
|
def amdsmi_get_cpu_model_name(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
cpu_info = amdsmi_wrapper.amdsmi_cpu_info_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_model_name(
|
|
processor_handle, cpu_info
|
|
)
|
|
)
|
|
return f"{cpu_info.model_name}"
|
|
|
|
def amdsmi_get_cpu_cores_per_socket(sock_count: ctypes.c_uint32):
|
|
cps = amdsmi_wrapper.amdsmi_sock_info_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_cores_per_socket(sock_count, cps)
|
|
)
|
|
return {"socket_id": cps.socket_id,
|
|
"cores_per_socket": cps.cores_per_socket
|
|
}
|
|
|
|
def amdsmi_get_cpu_socket_count():
|
|
sock_count = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_socket_count(ctypes.byref(sock_count))
|
|
)
|
|
return sock_count.value
|
|
|
|
def amdsmi_init(flag=AmdSmiInitFlags.INIT_AMD_GPUS):
|
|
if not isinstance(flag, AmdSmiInitFlags):
|
|
raise AmdSmiParameterException(flag, AmdSmiInitFlags)
|
|
_check_res(amdsmi_wrapper.amdsmi_init(flag))
|
|
|
|
|
|
def amdsmi_shut_down():
|
|
_check_res(amdsmi_wrapper.amdsmi_shut_down())
|
|
|
|
|
|
def amdsmi_get_processor_type(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, str]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
dev_type = amdsmi_wrapper.processor_type_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_processor_type(
|
|
processor_handle, ctypes.byref(dev_type))
|
|
)
|
|
|
|
return {
|
|
"processor_type": AmdSmiProcessorType(dev_type.value).name
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_device_bdf(processor_handle: processor_handle_t) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
bdf_info = amdsmi_wrapper.amdsmi_bdf_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_device_bdf(
|
|
processor_handle, ctypes.byref(bdf_info))
|
|
)
|
|
|
|
return _format_bdf(bdf_info)
|
|
|
|
|
|
def amdsmi_get_gpu_device_uuid(processor_handle: processor_handle_t) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
uuid = ctypes.create_string_buffer(AMDSMI_GPU_UUID_SIZE)
|
|
|
|
uuid_length = ctypes.c_uint32()
|
|
uuid_length.value = AMDSMI_GPU_UUID_SIZE
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_device_uuid(
|
|
processor_handle, ctypes.byref(uuid_length), uuid
|
|
)
|
|
)
|
|
|
|
return uuid.value.decode("utf-8")
|
|
|
|
|
|
def amdsmi_get_gpu_enumeration_info(processor_handle: processor_handle_t) -> Dict[str, Any]:
|
|
"""
|
|
Retrieves GPU enumeration information including DRM card ID, DRM render ID, HIP ID, and HIP UUID.
|
|
|
|
Parameters:
|
|
processor_handle (amdsmi_processor_handle_t): The processor handle.
|
|
|
|
Returns:
|
|
Dict[str, Any]: A dictionary containing the retrieved enumeration information.
|
|
|
|
Raises:
|
|
AmdSmiParameterException: If the input parameters are invalid.
|
|
"""
|
|
# Validate the processor handle
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
# Create an instance of the enumeration info struct
|
|
enumeration_info = amdsmi_wrapper.amdsmi_enumeration_info_t()
|
|
|
|
# Call the C function to populate the struct
|
|
status = amdsmi_wrapper.amdsmi_get_gpu_enumeration_info(processor_handle, ctypes.byref(enumeration_info))
|
|
|
|
# Validate the status result
|
|
_check_res(status)
|
|
|
|
# Convert the struct fields into a dictionary and return
|
|
enumeration_info = {
|
|
"drm_render": _validate_if_max_uint(enumeration_info.drm_render, MaxUIntegerTypes.UINT32_T),
|
|
"drm_card": _validate_if_max_uint(enumeration_info.drm_card, MaxUIntegerTypes.UINT32_T),
|
|
"hsa_id": _validate_if_max_uint(enumeration_info.hsa_id, MaxUIntegerTypes.UINT32_T),
|
|
"hip_id": _validate_if_max_uint(enumeration_info.hip_id, MaxUIntegerTypes.UINT32_T),
|
|
"hip_uuid": enumeration_info.hip_uuid.decode('utf-8')
|
|
}
|
|
|
|
return enumeration_info
|
|
|
|
def amdsmi_get_cpu_affinity_with_scope(
|
|
processor_handle: processor_handle_t,
|
|
scope: AmdSmiAffinityScope
|
|
) -> List[int]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(scope, AmdSmiAffinityScope):
|
|
raise AmdSmiParameterException(scope, AmdSmiAffinityScope)
|
|
|
|
socket_count = amdsmi_get_cpu_socket_count()
|
|
sock_info = amdsmi_get_cpu_cores_per_socket(socket_count)
|
|
core_count = sock_info['cores_per_socket']
|
|
|
|
size = ctypes.c_uint32(0)
|
|
size = (socket_count * core_count)/ (ctypes.sizeof(ctypes.c_uint64) * 8)
|
|
size = int(math.ceil(size))
|
|
size = ctypes.c_uint32(size)
|
|
cpu_set = (ctypes.c_uint64 * size.value)()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_cpu_affinity_with_scope(
|
|
processor_handle, size, cpu_set, scope)
|
|
)
|
|
return cpu_set
|
|
|
|
|
|
def amdsmi_get_gpu_asic_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
asic_info_struct = amdsmi_wrapper.amdsmi_asic_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_asic_info(
|
|
processor_handle, ctypes.byref(asic_info_struct))
|
|
)
|
|
|
|
market_name = _pad_hex_value(asic_info_struct.market_name.decode("utf-8"), 4)
|
|
target_graphics_version = hex(asic_info_struct.target_graphics_version)[2:]
|
|
subsystem_id = _validate_if_max_uint(asic_info_struct.subsystem_id, MaxUIntegerTypes.UINT32_T)
|
|
subvendor_id = _validate_if_max_uint(asic_info_struct.subvendor_id, MaxUIntegerTypes.UINT32_T)
|
|
if isinstance(subsystem_id, int):
|
|
subsystem_id = _pad_hex_value(hex(subsystem_id), 4)
|
|
if isinstance(subvendor_id, int):
|
|
subvendor_id = _pad_hex_value(hex(subvendor_id), 4)
|
|
asic_info = {
|
|
"market_name": market_name,
|
|
"vendor_id": asic_info_struct.vendor_id,
|
|
"vendor_name": asic_info_struct.vendor_name.decode("utf-8"),
|
|
"subvendor_id": subvendor_id,
|
|
"device_id": asic_info_struct.device_id,
|
|
"rev_id": _pad_hex_value(hex(asic_info_struct.rev_id), 2),
|
|
"asic_serial": asic_info_struct.asic_serial.decode("utf-8"),
|
|
"oam_id": _validate_if_max_uint(asic_info_struct.oam_id, MaxUIntegerTypes.UINT32_T),
|
|
"num_compute_units": _validate_if_max_uint(asic_info_struct.num_of_compute_units, MaxUIntegerTypes.UINT32_T),
|
|
"target_graphics_version": "gfx" + target_graphics_version,
|
|
"subsystem_id": subsystem_id,
|
|
"flags": asic_info_struct.flags
|
|
}
|
|
|
|
string_values = ["market_name", "vendor_name"]
|
|
for value in string_values:
|
|
if not asic_info[value]:
|
|
asic_info[value] = "N/A"
|
|
|
|
hex_values = ["vendor_id", "device_id"]
|
|
for value in hex_values:
|
|
if asic_info[value]:
|
|
asic_info[value] = hex(asic_info[value])
|
|
else:
|
|
asic_info[value] = "N/A"
|
|
|
|
# Convert asic serial (hex string) to hex output format
|
|
if asic_info["asic_serial"]:
|
|
asic_serial_string = asic_info["asic_serial"]
|
|
asic_serial_hex = int(asic_serial_string, base=16)
|
|
asic_info["asic_serial"] = str.format("0x{:016X}", asic_serial_hex)
|
|
else:
|
|
asic_info["asic_serial"] = "N/A"
|
|
|
|
# Remove commas from vendor name for clean output
|
|
asic_info["vendor_name"] = asic_info["vendor_name"].replace(',', '')
|
|
|
|
return asic_info
|
|
|
|
|
|
def amdsmi_get_gpu_kfd_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
kfd_info_struct = amdsmi_wrapper.amdsmi_kfd_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_kfd_info(
|
|
processor_handle, ctypes.byref(kfd_info_struct))
|
|
)
|
|
|
|
kfd_info = {
|
|
"kfd_id": _validate_if_max_uint(kfd_info_struct.kfd_id, MaxUIntegerTypes.UINT64_T),
|
|
"node_id": _validate_if_max_uint(kfd_info_struct.node_id, MaxUIntegerTypes.UINT32_T),
|
|
"current_partition_id": _validate_if_max_uint(kfd_info_struct.current_partition_id, MaxUIntegerTypes.UINT32_T)
|
|
}
|
|
|
|
return kfd_info
|
|
|
|
def amdsmi_get_supported_power_cap(
|
|
processor_handle: processor_handle_t) ->Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
CONST_AMDSMI_MAX_POWER_SENSORS = 2
|
|
|
|
sensor_count = ctypes.c_uint32()
|
|
sensor_ind = (ctypes.c_uint32 * CONST_AMDSMI_MAX_POWER_SENSORS)()
|
|
sensor_types = (amdsmi_wrapper.amdsmi_power_cap_type_t * CONST_AMDSMI_MAX_POWER_SENSORS)()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_supported_power_cap(
|
|
processor_handle, ctypes.byref(sensor_count), sensor_ind, sensor_types
|
|
)
|
|
)
|
|
|
|
return {
|
|
"sensor_inds": [sensor_ind[i] for i in range(sensor_count.value)],
|
|
"sensor_types": [AmdSmiPowerCapType(sensor_types[i]) for i in range(sensor_count.value)]
|
|
}
|
|
|
|
def amdsmi_get_power_cap_info(
|
|
processor_handle: processor_handle_t,
|
|
sensor_ind: int = AmdSmiPowerCapType.PPT0
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
power_cap_info = amdsmi_wrapper.amdsmi_power_cap_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_power_cap_info(
|
|
processor_handle, sensor_ind, ctypes.byref(power_cap_info)
|
|
)
|
|
)
|
|
|
|
return {"power_cap": power_cap_info.power_cap,
|
|
"default_power_cap": power_cap_info.default_power_cap,
|
|
"dpm_cap": power_cap_info.dpm_cap,
|
|
"min_power_cap": power_cap_info.min_power_cap,
|
|
"max_power_cap": power_cap_info.max_power_cap}
|
|
|
|
def _get_name_value(num, data) -> List[Dict[str, int]]:
|
|
"""
|
|
Extracts a list of name-value pairs from a ctypes array buffer.
|
|
|
|
This function works around a ctypes array issue where direct field access
|
|
to the `amdsmi_name_value_t` structure is unreliable. Instead, it uses
|
|
memory operations to extract the 'name' (a 64-byte char array) and 'value'
|
|
(a uint64) from each structure in the array.
|
|
|
|
Parameters:
|
|
num (ctypes.c_uint32): Number of elements in the array.
|
|
data (ctypes.c_void_p): Pointer to the start of the array buffer containing
|
|
`amdsmi_name_value_t` structures.
|
|
|
|
Returns:
|
|
List[Dict[str, int]]: A list of dictionaries, each with keys 'name' (str)
|
|
and 'value' (int) extracted from the buffer.
|
|
|
|
Workaround:
|
|
Direct access to the fields of the ctypes array is broken, so the function
|
|
uses memory alignment and pointer arithmetic to extract the fields manually.
|
|
"""
|
|
|
|
# Work around ctypes array issue by using memory access
|
|
# Use 4 byte alignment for amdsmi_name_value_t.name char array, 64=256/4
|
|
# Use 8 bytes for amdsmi_name_value_t.value uint64
|
|
aligned_name_size = int(AMDSMI_MAX_STRING_LENGTH / 4)
|
|
value_size_bytes = 8
|
|
struct_alignment = aligned_name_size + value_size_bytes
|
|
|
|
# Access name,value field using memory operations since direct access is broken
|
|
struct_ptr = ctypes.cast(data, ctypes.POINTER(ctypes.c_char * struct_alignment))
|
|
|
|
results = []
|
|
for i in range(num.value):
|
|
# Offset into structure array
|
|
current_struct = struct_ptr[i]
|
|
|
|
# Cast address for name member with max chars to read
|
|
name_ptr = ctypes.cast(ctypes.addressof(current_struct), ctypes.POINTER(ctypes.c_char * AMDSMI_MAX_STRING_LENGTH))
|
|
# Data buffer in bytes
|
|
name_bytes = ctypes.string_at(name_ptr.contents)
|
|
# Get string
|
|
name_str = name_bytes.rstrip(b'\x00').decode('utf-8', errors='replace')
|
|
|
|
# Address for value member
|
|
addr_value = ctypes.addressof(current_struct) + struct_alignment
|
|
# Cast data buffer to a uint64
|
|
int64_ptr = ctypes.cast(addr_value, ctypes.POINTER(ctypes.c_uint64))
|
|
# Get value
|
|
value = int64_ptr.contents.value
|
|
|
|
item = {
|
|
'name': name_str,
|
|
'value': value
|
|
}
|
|
results.append(item)
|
|
|
|
return results
|
|
|
|
|
|
def amdsmi_get_gpu_pm_metrics_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> List[Dict[str, Any]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
pm_metrics = POINTER(amdsmi_wrapper.amdsmi_name_value_t)()
|
|
num_mets = ctypes.c_uint32(0)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_pm_metrics_info(
|
|
processor_handle, ctypes.byref(pm_metrics), ctypes.byref(num_mets)
|
|
)
|
|
)
|
|
|
|
results = _get_name_value(num_mets, pm_metrics)
|
|
|
|
# Free the allocated memory
|
|
amdsmi_wrapper.amdsmi_free_name_value_pairs(pm_metrics)
|
|
|
|
return results
|
|
|
|
|
|
def amdsmi_get_gpu_reg_table_info(
|
|
processor_handle: processor_handle_t, reg_type: AmdSmiRegType
|
|
) -> List[Dict[str, Any]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(reg_type, AmdSmiRegType):
|
|
raise AmdSmiParameterException(reg_type, AmdSmiRegType)
|
|
|
|
reg_metrics = POINTER(amdsmi_wrapper.amdsmi_name_value_t)()
|
|
num_regs = ctypes.c_uint32(0)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_reg_table_info(
|
|
processor_handle, reg_type, ctypes.byref(reg_metrics), ctypes.byref(num_regs)
|
|
)
|
|
)
|
|
|
|
results = _get_name_value(num_regs, reg_metrics)
|
|
|
|
# Free the allocated memory
|
|
amdsmi_wrapper.amdsmi_free_name_value_pairs(reg_metrics)
|
|
|
|
return results
|
|
|
|
|
|
def amdsmi_get_gpu_vram_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
vram_info = amdsmi_wrapper.amdsmi_vram_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_vram_info(
|
|
processor_handle, ctypes.byref(vram_info))
|
|
)
|
|
return {
|
|
"vram_type": vram_info.vram_type,
|
|
"vram_vendor": vram_info.vram_vendor.decode("utf-8"),
|
|
"vram_size": vram_info.vram_size,
|
|
"vram_bit_width": _validate_if_max_uint(vram_info.vram_bit_width, MaxUIntegerTypes.UINT32_T),
|
|
"vram_max_bandwidth": _validate_if_max_uint(vram_info.vram_max_bandwidth, MaxUIntegerTypes.UINT64_T),
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_xgmi_link_status(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
status_info = amdsmi_wrapper.amdsmi_xgmi_link_status_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_xgmi_link_status(
|
|
processor_handle, ctypes.byref(status_info))
|
|
)
|
|
|
|
link_status = []
|
|
count = 0
|
|
for link in status_info.status:
|
|
if count == status_info.total_links:
|
|
break
|
|
if amdsmi_wrapper.amdsmi_xgmi_link_status_type_t__enumvalues[link] == 'AMDSMI_XGMI_LINK_DISABLE': # XGMI link is disabled
|
|
link_status.append("X")
|
|
elif amdsmi_wrapper.amdsmi_xgmi_link_status_type_t__enumvalues[link] == 'AMDSMI_XGMI_LINK_UP': # XGMI Link is up
|
|
link_status.append("U")
|
|
elif amdsmi_wrapper.amdsmi_xgmi_link_status_type_t__enumvalues[link] == 'AMDSMI_XGMI_LINK_DOWN': # XGMI Link is down
|
|
link_status.append("D")
|
|
else:
|
|
link_status.append("N/A")
|
|
count += 1
|
|
|
|
return_dict = {
|
|
"status" : link_status,
|
|
"total_links": status_info.total_links,
|
|
}
|
|
return return_dict
|
|
|
|
|
|
def amdsmi_get_gpu_cache_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, List]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
cache_info_struct = amdsmi_wrapper.amdsmi_gpu_cache_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_cache_info(
|
|
processor_handle, ctypes.byref(cache_info_struct))
|
|
)
|
|
|
|
cache_info_list = []
|
|
for cache_index in range(cache_info_struct.num_cache_types):
|
|
# Put cache_properties at the start of the dictionary for readability
|
|
cache_dict = {
|
|
"cache_properties": [], # This will be a list of strings
|
|
"cache_size": cache_info_struct.cache[cache_index].cache_size,
|
|
"cache_level": cache_info_struct.cache[cache_index].cache_level,
|
|
"max_num_cu_shared": cache_info_struct.cache[cache_index].max_num_cu_shared,
|
|
"num_cache_instance": cache_info_struct.cache[cache_index].num_cache_instance
|
|
}
|
|
|
|
# Check against cache properties bitmask
|
|
cache_properties = cache_info_struct.cache[cache_index].cache_properties
|
|
data_cache = cache_properties & amdsmi_wrapper.AMDSMI_CACHE_PROPERTY_DATA_CACHE
|
|
inst_cache = cache_properties & amdsmi_wrapper.AMDSMI_CACHE_PROPERTY_INST_CACHE
|
|
cpu_cache = cache_properties & amdsmi_wrapper.AMDSMI_CACHE_PROPERTY_CPU_CACHE
|
|
simd_cache = cache_properties & amdsmi_wrapper.AMDSMI_CACHE_PROPERTY_SIMD_CACHE
|
|
|
|
cache_properties_status = [data_cache, inst_cache, cpu_cache, simd_cache]
|
|
cache_property_list = []
|
|
for cache_property in cache_properties_status:
|
|
if cache_property:
|
|
property_name = amdsmi_wrapper.amdsmi_cache_property_type_t__enumvalues[cache_property]
|
|
property_name = property_name.replace("AMDSMI_CACHE_PROPERTY_", "")
|
|
cache_property_list.append(property_name)
|
|
|
|
cache_dict["cache_properties"] = cache_property_list
|
|
cache_info_list.append(cache_dict)
|
|
|
|
if not cache_info_list:
|
|
raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_NO_DATA)
|
|
|
|
return {
|
|
"cache": cache_info_list
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_vbios_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
vbios_info = amdsmi_wrapper.amdsmi_vbios_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_vbios_info(
|
|
processor_handle, ctypes.byref(vbios_info))
|
|
)
|
|
|
|
boot_firmware = vbios_info.boot_firmware.decode("utf-8")
|
|
if boot_firmware == "":
|
|
boot_firmware = "N/A"
|
|
|
|
return {
|
|
"name": vbios_info.name.decode("utf-8"),
|
|
"build_date": vbios_info.build_date.decode("utf-8"),
|
|
"part_number": vbios_info.part_number.decode("utf-8"),
|
|
"version": vbios_info.version.decode("utf-8"),
|
|
"boot_firmware": boot_firmware,
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_activity(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
engine_usage = amdsmi_wrapper.amdsmi_engine_usage_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_activity(
|
|
processor_handle, ctypes.byref(engine_usage)
|
|
)
|
|
)
|
|
|
|
activity_dict = {
|
|
"gfx_activity": engine_usage.gfx_activity,
|
|
"umc_activity": engine_usage.umc_activity,
|
|
"mm_activity": engine_usage.mm_activity,
|
|
}
|
|
|
|
for key, value in activity_dict.items():
|
|
if value == 0xFFFF:
|
|
activity_dict[key] = "N/A"
|
|
|
|
return activity_dict
|
|
|
|
|
|
def amdsmi_get_clock_info(
|
|
processor_handle: processor_handle_t,
|
|
clock_type: AmdSmiClkType,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(clock_type, AmdSmiClkType):
|
|
raise AmdSmiParameterException(clock_type, AmdSmiClkType)
|
|
|
|
clock_measure = amdsmi_wrapper.amdsmi_clk_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_clock_info(
|
|
processor_handle,
|
|
clock_type,
|
|
ctypes.byref(clock_measure),
|
|
)
|
|
)
|
|
|
|
dict_ret = {
|
|
"clk": _validate_if_max_uint(clock_measure.clk, MaxUIntegerTypes.UINT32_T),
|
|
"min_clk": _validate_if_max_uint(clock_measure.min_clk, MaxUIntegerTypes.UINT32_T),
|
|
"max_clk": _validate_if_max_uint(clock_measure.max_clk, MaxUIntegerTypes.UINT32_T),
|
|
"clk_locked": _validate_if_max_uint(clock_measure.clk_locked, MaxUIntegerTypes.UINT8_T, isBool=True),
|
|
"clk_deep_sleep" : _validate_if_max_uint(clock_measure.clk_deep_sleep, MaxUIntegerTypes.UINT8_T),
|
|
}
|
|
return dict_ret
|
|
|
|
def amdsmi_get_gpu_bad_page_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> List[Dict[str, Any]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
num_pages = ctypes.c_uint32()
|
|
nullptr = POINTER(amdsmi_wrapper.amdsmi_retired_page_record_t)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_bad_page_info(
|
|
processor_handle, ctypes.byref(num_pages), nullptr
|
|
)
|
|
)
|
|
|
|
if num_pages.value == 0:
|
|
return []
|
|
|
|
bad_pages_array_type = amdsmi_wrapper.amdsmi_retired_page_record_t * num_pages.value
|
|
bad_pages = bad_pages_array_type()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_bad_page_info(
|
|
processor_handle, ctypes.byref(num_pages), bad_pages
|
|
)
|
|
)
|
|
|
|
return _format_bad_page_info(bad_pages, num_pages)
|
|
|
|
def amdsmi_get_gpu_bad_page_threshold(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
threshold = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_bad_page_threshold(
|
|
processor_handle, ctypes.byref(threshold)
|
|
)
|
|
)
|
|
|
|
return threshold.value
|
|
|
|
def amdsmi_get_violation_status(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
violation_status = amdsmi_wrapper.amdsmi_violation_status_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_violation_status(
|
|
processor_handle, ctypes.byref(violation_status))
|
|
)
|
|
|
|
dict_return = {
|
|
"reference_timestamp": _validate_if_max_uint(violation_status.reference_timestamp, MaxUIntegerTypes.UINT64_T),
|
|
"violation_timestamp": _validate_if_max_uint(violation_status.violation_timestamp, MaxUIntegerTypes.UINT64_T),
|
|
"acc_counter": _validate_if_max_uint(violation_status.acc_counter, MaxUIntegerTypes.UINT64_T),
|
|
"acc_prochot_thrm": _validate_if_max_uint(violation_status.acc_prochot_thrm, MaxUIntegerTypes.UINT64_T),
|
|
"acc_ppt_pwr": _validate_if_max_uint(violation_status.acc_ppt_pwr, MaxUIntegerTypes.UINT64_T), #PVIOL
|
|
"acc_socket_thrm": _validate_if_max_uint(violation_status.acc_socket_thrm, MaxUIntegerTypes.UINT64_T), #TVIOL
|
|
"acc_vr_thrm": _validate_if_max_uint(violation_status.acc_vr_thrm, MaxUIntegerTypes.UINT64_T),
|
|
"acc_hbm_thrm": _validate_if_max_uint(violation_status.acc_hbm_thrm, MaxUIntegerTypes.UINT64_T),
|
|
"acc_gfx_clk_below_host_limit": _validate_if_max_uint(violation_status.acc_gfx_clk_below_host_limit, MaxUIntegerTypes.UINT64_T),
|
|
"acc_gfx_clk_below_host_limit_pwr": list(violation_status.acc_gfx_clk_below_host_limit_pwr),
|
|
"acc_gfx_clk_below_host_limit_thm": list(violation_status.acc_gfx_clk_below_host_limit_thm),
|
|
"acc_gfx_clk_below_host_limit_total": list(violation_status.acc_gfx_clk_below_host_limit_total),
|
|
"acc_low_utilization": list(violation_status.acc_low_utilization),
|
|
"per_prochot_thrm": _validate_if_max_uint(violation_status.per_prochot_thrm, MaxUIntegerTypes.UINT64_T, isActivity=True),
|
|
"per_ppt_pwr": _validate_if_max_uint(violation_status.per_ppt_pwr, MaxUIntegerTypes.UINT64_T, isActivity=True), #PVIOL
|
|
"per_socket_thrm": _validate_if_max_uint(violation_status.per_socket_thrm, MaxUIntegerTypes.UINT64_T, isActivity=True), #TVIOL
|
|
"per_vr_thrm": _validate_if_max_uint(violation_status.per_vr_thrm, MaxUIntegerTypes.UINT64_T, isActivity=True),
|
|
"per_hbm_thrm": _validate_if_max_uint(violation_status.per_hbm_thrm, MaxUIntegerTypes.UINT64_T, isActivity=True),
|
|
"per_gfx_clk_below_host_limit": _validate_if_max_uint(violation_status.per_gfx_clk_below_host_limit, MaxUIntegerTypes.UINT64_T, isActivity=True),
|
|
"per_gfx_clk_below_host_limit_pwr": list(violation_status.per_gfx_clk_below_host_limit_pwr),
|
|
"per_gfx_clk_below_host_limit_thm": list(violation_status.per_gfx_clk_below_host_limit_thm),
|
|
"per_gfx_clk_below_host_limit_total": list(violation_status.per_gfx_clk_below_host_limit_total),
|
|
"per_low_utilization": list(violation_status.per_low_utilization),
|
|
"active_prochot_thrm": _validate_if_max_uint(violation_status.active_prochot_thrm, MaxUIntegerTypes.UINT8_T, isBool=True),
|
|
"active_ppt_pwr": _validate_if_max_uint(violation_status.active_ppt_pwr, MaxUIntegerTypes.UINT8_T, isBool=True), #PVIOL
|
|
"active_socket_thrm": _validate_if_max_uint(violation_status.active_socket_thrm, MaxUIntegerTypes.UINT8_T, isBool=True), #TVIOL
|
|
"active_vr_thrm": _validate_if_max_uint(violation_status.active_vr_thrm, MaxUIntegerTypes.UINT8_T, isBool=True),
|
|
"active_hbm_thrm": _validate_if_max_uint(violation_status.active_hbm_thrm, MaxUIntegerTypes.UINT8_T, isBool=True),
|
|
"active_gfx_clk_below_host_limit": _validate_if_max_uint(violation_status.active_gfx_clk_below_host_limit, MaxUIntegerTypes.UINT8_T, isBool=True),
|
|
"active_gfx_clk_below_host_limit_pwr": list(violation_status.active_gfx_clk_below_host_limit_pwr),
|
|
"active_gfx_clk_below_host_limit_thm": list(violation_status.active_gfx_clk_below_host_limit_thm),
|
|
"active_gfx_clk_below_host_limit_total": list(violation_status.active_gfx_clk_below_host_limit_total),
|
|
"active_low_utilization": list(violation_status.active_low_utilization),
|
|
}
|
|
|
|
# Create 2d array with each XCD's stats
|
|
if 'acc_gfx_clk_below_host_limit_pwr' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['acc_gfx_clk_below_host_limit_pwr']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
dict_return['acc_gfx_clk_below_host_limit_pwr'][xcp_index] = xcp_detail
|
|
if 'acc_gfx_clk_below_host_limit_thm' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['acc_gfx_clk_below_host_limit_thm']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
dict_return['acc_gfx_clk_below_host_limit_thm'][xcp_index] = xcp_detail
|
|
if 'acc_low_utilization' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['acc_low_utilization']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
dict_return['acc_low_utilization'][xcp_index] = xcp_detail
|
|
if 'acc_gfx_clk_below_host_limit_total' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['acc_gfx_clk_below_host_limit_total']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
dict_return['acc_gfx_clk_below_host_limit_total'][xcp_index] = xcp_detail
|
|
|
|
if 'per_gfx_clk_below_host_limit_pwr' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['per_gfx_clk_below_host_limit_pwr']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
|
|
dict_return['per_gfx_clk_below_host_limit_pwr'][xcp_index] = xcp_detail
|
|
if 'per_gfx_clk_below_host_limit_thm' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['per_gfx_clk_below_host_limit_thm']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
|
|
dict_return['per_gfx_clk_below_host_limit_thm'][xcp_index] = xcp_detail
|
|
if 'per_low_utilization' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['per_low_utilization']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
|
|
dict_return['per_low_utilization'][xcp_index] = xcp_detail
|
|
if 'per_gfx_clk_below_host_limit_total' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['per_gfx_clk_below_host_limit_total']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T, isActivity=True))
|
|
dict_return['per_gfx_clk_below_host_limit_total'][xcp_index] = xcp_detail
|
|
|
|
if 'active_gfx_clk_below_host_limit_pwr' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['active_gfx_clk_below_host_limit_pwr']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT8_T, isBool=True))
|
|
dict_return['active_gfx_clk_below_host_limit_pwr'][xcp_index] = xcp_detail
|
|
if 'active_gfx_clk_below_host_limit_thm' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['active_gfx_clk_below_host_limit_thm']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT8_T, isBool=True))
|
|
dict_return['active_gfx_clk_below_host_limit_thm'][xcp_index] = xcp_detail
|
|
if 'active_low_utilization' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['active_low_utilization']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT8_T, isBool=True))
|
|
dict_return['active_low_utilization'][xcp_index] = xcp_detail
|
|
if 'active_gfx_clk_below_host_limit_total' in dict_return:
|
|
for xcp_index, xcp_metrics in enumerate(dict_return['active_gfx_clk_below_host_limit_total']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT8_T, isBool=True))
|
|
dict_return['active_gfx_clk_below_host_limit_total'][xcp_index] = xcp_detail
|
|
|
|
return dict_return
|
|
|
|
def amdsmi_get_gpu_total_ecc_count(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
ec = amdsmi_wrapper.amdsmi_error_count_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_total_ecc_count(
|
|
processor_handle, ctypes.byref(ec)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"correctable_count": ec.correctable_count,
|
|
"uncorrectable_count": ec.uncorrectable_count,
|
|
"deferred_count": ec.deferred_count,
|
|
}
|
|
|
|
def amdsmi_get_gpu_cper_entries(
|
|
processor_handle: processor_handle_t,
|
|
severity_mask: int,
|
|
buffer_size: int = 4 * 1048576,
|
|
cursor: int = 0
|
|
) -> Tuple[Dict[str, Any], int, List[Dict[str, Any]], int]:
|
|
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(severity_mask, int):
|
|
raise AmdSmiParameterException(severity_mask, int)
|
|
if not isinstance(buffer_size, int):
|
|
raise AmdSmiParameterException(buffer_size, int)
|
|
if not isinstance(cursor, int):
|
|
raise AmdSmiParameterException(cursor, int)
|
|
|
|
# Allocate a buffer for CPER data.
|
|
buf = ctypes.create_string_buffer(buffer_size)
|
|
buf_size = ctypes.c_uint64(buffer_size)
|
|
num_cper_hdrs = 20
|
|
entry_count = ctypes.c_uint64(num_cper_hdrs)
|
|
cur = ctypes.c_uint64(cursor)
|
|
|
|
# Allocate a pointer for the CPER header array.
|
|
cper_hdrs_array = (ctypes.POINTER(amdsmi_wrapper.amdsmi_cper_hdr_t) * num_cper_hdrs)()
|
|
cper_hdrs = ctypes.cast(cper_hdrs_array, ctypes.POINTER(ctypes.POINTER(amdsmi_wrapper.amdsmi_cper_hdr_t)))
|
|
|
|
# Call the underlying AMD-SMI API.
|
|
status_code = amdsmi_wrapper.amdsmi_get_gpu_cper_entries(
|
|
processor_handle,
|
|
ctypes.c_uint32(severity_mask),
|
|
buf,
|
|
ctypes.byref(buf_size),
|
|
cper_hdrs,
|
|
ctypes.byref(entry_count),
|
|
ctypes.byref(cur)
|
|
)
|
|
if status_code not in {amdsmi_wrapper.AMDSMI_STATUS_SUCCESS, amdsmi_wrapper.AMDSMI_STATUS_MORE_DATA}:
|
|
raise AmdSmiLibraryException(status_code)
|
|
|
|
entries = {}
|
|
cper_data = []
|
|
offset = 0
|
|
|
|
# Iterate over each entry using its variable record_length.
|
|
for i in range(entry_count.value):
|
|
entry_address = ctypes.addressof(buf) + offset
|
|
entry_ptr = ctypes.cast(entry_address, POINTER(amdsmi_wrapper.amdsmi_cper_hdr_t))
|
|
|
|
# Extract the raw bytes and size of the entry.
|
|
cper_data.append({
|
|
"bytes": list((entry_ptr.contents.record_length * ctypes.c_byte).from_address(entry_address)),
|
|
"size": entry_ptr.contents.record_length
|
|
})
|
|
|
|
# Extract the timestamp fields.
|
|
year = entry_ptr.contents.timestamp.year
|
|
if year < 100: # Adjust the year if it's less than 100.
|
|
year += 2000
|
|
formatted_timestamp = (
|
|
f"{year:04d}/"
|
|
f"{entry_ptr.contents.timestamp.month:02d}/"
|
|
f"{entry_ptr.contents.timestamp.day:02d} "
|
|
f"{entry_ptr.contents.timestamp.hours:02d}:"
|
|
f"{entry_ptr.contents.timestamp.minutes:02d}:"
|
|
f"{entry_ptr.contents.timestamp.seconds:02d}"
|
|
)
|
|
|
|
serial_number = ""
|
|
if isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
try:
|
|
board_info = amdsmi_get_gpu_board_info(processor_handle)
|
|
serial_number = board_info.get('product_serial', "")
|
|
except Exception:
|
|
serial_number = ""
|
|
# Create a dictionary for the CPER entry.
|
|
cper_entry = {
|
|
"error_severity": amdsmi_wrapper.amdsmi_cper_sev_t__enumvalues.get(
|
|
entry_ptr.contents.error_severity, "AMDSMI_CPER_SEV_UNUSED"
|
|
).replace("AMDSMI_CPER_SEV_", "").lower(),
|
|
"notify_type": _notifyTypeToString(entry_ptr.contents.notify_type.b),
|
|
"timestamp": formatted_timestamp,
|
|
"signature": entry_ptr.contents.signature,
|
|
"revision": entry_ptr.contents.revision,
|
|
"signature_end": hex(entry_ptr.contents.signature_end),
|
|
"sec_cnt": entry_ptr.contents.sec_cnt,
|
|
"record_length": entry_ptr.contents.record_length,
|
|
"serial_number": serial_number,
|
|
"platform_id": entry_ptr.contents.platform_id,
|
|
"creator_id": entry_ptr.contents.creator_id,
|
|
"record_id": entry_ptr.contents.record_id,
|
|
"flags": entry_ptr.contents.flags,
|
|
"persistence_info": entry_ptr.contents.persistence_info,
|
|
#"reserved" : entry_ptr.contents.reserved
|
|
#"cper_valid_bit" : entry_ptr.contents.cper_valid_bits,
|
|
#"partition_id" : entry_ptr.contents.partition_id,
|
|
}
|
|
|
|
entries[i] = cper_entry.copy()
|
|
offset += entry_ptr.contents.record_length # Use the actual record length to advance the offset.
|
|
|
|
return entries, cur.value, cper_data, status_code
|
|
|
|
|
|
def amdsmi_get_afids_from_cper(
|
|
cper_afid_data: bytes
|
|
) -> Tuple[List[int], int]:
|
|
"""
|
|
Extract AFIDs from a CPER blob.
|
|
|
|
Args:
|
|
cper_afid_data: raw bytes of a single CPER record.
|
|
|
|
Returns:
|
|
Tuple[List[int], int]: A tuple containing:
|
|
- A list of extracted AFIDs.
|
|
- The total count of AFIDs.
|
|
"""
|
|
cper_records = []
|
|
# Normalize single blob into a list of records
|
|
if isinstance(cper_afid_data, bytes):
|
|
cper_records = [{
|
|
"bytes": list(cper_afid_data),
|
|
"size": len(cper_afid_data)
|
|
}]
|
|
elif isinstance(cper_afid_data, List[Dict[str, Any]]):
|
|
cper_records = cper_afid_data
|
|
else:
|
|
raise AmdSmiParameterException(cper_afid_data, bytes)
|
|
|
|
all_afids: List[int] = []
|
|
|
|
for record in cper_records:
|
|
if isinstance(record, dict) and "bytes" in record and "size" in record:
|
|
raw_bytes = bytes(record["bytes"])
|
|
record_size = record["size"]
|
|
else:
|
|
raise AmdSmiParameterException(record,
|
|
"dict with keys 'bytes' and 'size' or bytes/bytearray")
|
|
# Wrap as char*
|
|
buf = ctypes.create_string_buffer(raw_bytes, record_size)
|
|
buf_ptr = ctypes.cast(buf, POINTER(ctypes.c_char))
|
|
|
|
afid_array = (ctypes.c_uint64 * MAX_NUMBER_OF_AFIDS_PER_RECORD)()
|
|
num_afids_ct = ctypes.c_uint32(MAX_NUMBER_OF_AFIDS_PER_RECORD)
|
|
|
|
# Call the wrapper function
|
|
status = amdsmi_wrapper.amdsmi_get_afids_from_cper(
|
|
buf_ptr,
|
|
ctypes.c_uint32(record_size),
|
|
afid_array,
|
|
ctypes.byref(num_afids_ct)
|
|
)
|
|
if status != amdsmi_wrapper.AMDSMI_STATUS_SUCCESS:
|
|
raise AmdSmiLibraryException(status)
|
|
|
|
# Collect exactly the decoded AFIDs
|
|
count = num_afids_ct.value
|
|
all_afids.extend(afid_array[i] for i in range(count))
|
|
|
|
return all_afids, len(all_afids)
|
|
|
|
|
|
def amdsmi_get_gpu_board_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
board_info = amdsmi_wrapper.amdsmi_board_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_board_info(
|
|
processor_handle, ctypes.byref(board_info))
|
|
)
|
|
|
|
board_info_dict = {
|
|
"model_number": _pad_hex_value(board_info.model_number.decode("utf-8").strip(), 4),
|
|
"product_serial": board_info.product_serial.decode("utf-8").strip(),
|
|
"fru_id": board_info.fru_id.decode("utf-8").strip(),
|
|
"product_name": _pad_hex_value(board_info.product_name.decode("utf-8").strip(), 4),
|
|
"manufacturer_name": board_info.manufacturer_name.decode("utf-8").strip()
|
|
}
|
|
|
|
for key, value in board_info_dict.items():
|
|
if value == "":
|
|
board_info_dict[key] = "N/A"
|
|
return board_info_dict
|
|
|
|
|
|
def amdsmi_get_gpu_ras_feature_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
ras_feature = amdsmi_wrapper.amdsmi_ras_feature_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ras_feature_info(
|
|
processor_handle, ctypes.byref(ras_feature)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"eeprom_version": hex(ras_feature.ras_eeprom_version),
|
|
"parity_schema" : bool(ras_feature.ecc_correction_schema_flag & 1),
|
|
"single_bit_schema" : bool(ras_feature.ecc_correction_schema_flag & 2),
|
|
"double_bit_schema" : bool(ras_feature.ecc_correction_schema_flag & 4),
|
|
"poison_schema" : bool(ras_feature.ecc_correction_schema_flag & 8)
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_ras_block_features_enabled(
|
|
processor_handle: processor_handle_t,
|
|
) -> List[Dict[str, Any]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
ras_state = amdsmi_wrapper.amdsmi_ras_err_state_t()
|
|
ras_states = []
|
|
for gpu_block in AmdSmiGpuBlock:
|
|
if gpu_block.name == "RESERVED" or gpu_block.name == "INVALID":
|
|
continue
|
|
gpu_block_name = gpu_block.name
|
|
if gpu_block.name == "LAST":
|
|
gpu_block_name = "MPIO"
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ras_block_features_enabled(
|
|
processor_handle,
|
|
amdsmi_wrapper.amdsmi_gpu_block_t(gpu_block.value),
|
|
ctypes.byref(ras_state),
|
|
)
|
|
)
|
|
ras_states.append(
|
|
{
|
|
"block": gpu_block_name,
|
|
"status": AmdSmiRasErrState(ras_state.value).name,
|
|
}
|
|
)
|
|
|
|
return ras_states
|
|
|
|
|
|
def amdsmi_get_gpu_process_list(
|
|
processor_handle: processor_handle_t,
|
|
) -> List[amdsmi_wrapper.amdsmi_proc_info_t]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
# This will get populated with the number of processes found
|
|
max_processes = ctypes.c_uint32(MAX_NUM_PROCESSES)
|
|
|
|
process_list = (amdsmi_wrapper.amdsmi_proc_info_t * max_processes.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_process_list(
|
|
processor_handle, ctypes.byref(max_processes), process_list
|
|
)
|
|
)
|
|
|
|
result = []
|
|
for index in range(max_processes.value):
|
|
process_name = process_list[index].name.decode("utf-8").strip()
|
|
if process_name == "":
|
|
process_name = "N/A"
|
|
result.append({
|
|
"name": process_name,
|
|
"pid": process_list[index].pid,
|
|
"mem": process_list[index].mem,
|
|
"engine_usage": {
|
|
"gfx": process_list[index].engine_usage.gfx,
|
|
"enc": process_list[index].engine_usage.enc
|
|
},
|
|
"memory_usage": {
|
|
"gtt_mem": process_list[index].memory_usage.gtt_mem,
|
|
"cpu_mem": process_list[index].memory_usage.cpu_mem,
|
|
"vram_mem": process_list[index].memory_usage.vram_mem,
|
|
},
|
|
"cu_occupancy": _validate_if_max_uint(process_list[index].cu_occupancy, MaxUIntegerTypes.UINT32_T),
|
|
"evicted_time": _validate_if_max_uint(process_list[index].evicted_time, MaxUIntegerTypes.UINT32_T)
|
|
})
|
|
|
|
return result
|
|
|
|
|
|
def amdsmi_get_gpu_driver_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
info = amdsmi_wrapper.amdsmi_driver_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_driver_info(
|
|
processor_handle, ctypes.byref(info)
|
|
)
|
|
)
|
|
|
|
# Not including os_kernel_version here due to it just being os.uname().release
|
|
driver_info = {
|
|
"driver_name": info.driver_name.decode("utf-8"),
|
|
"driver_version": info.driver_version.decode("utf-8"),
|
|
"driver_date": info.driver_date.decode("utf-8")
|
|
}
|
|
|
|
for key, value in driver_info.items():
|
|
if value == "":
|
|
driver_info[key] = "N/A"
|
|
|
|
return driver_info
|
|
|
|
|
|
def amdsmi_get_power_info(
|
|
processor_handle: processor_handle_t
|
|
) -> Dict[str, ctypes.c_uint32]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
power_info = amdsmi_wrapper.amdsmi_power_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_power_info(
|
|
processor_handle, ctypes.byref(power_info)
|
|
)
|
|
)
|
|
|
|
power_info_dict = {
|
|
"socket_power": power_info.socket_power,
|
|
"current_socket_power": power_info.current_socket_power,
|
|
"average_socket_power": power_info.average_socket_power,
|
|
"gfx_voltage": power_info.gfx_voltage,
|
|
"soc_voltage": power_info.soc_voltage,
|
|
"mem_voltage": power_info.mem_voltage,
|
|
"power_limit" : power_info.power_limit,
|
|
}
|
|
|
|
for key, value in power_info_dict.items():
|
|
if value in (MaxUIntegerTypes.UINT8_T, MaxUIntegerTypes.UINT16_T, MaxUIntegerTypes.UINT32_T, MaxUIntegerTypes.UINT64_T):
|
|
power_info_dict[key] = "N/A"
|
|
|
|
return power_info_dict
|
|
|
|
|
|
def amdsmi_is_gpu_power_management_enabled(
|
|
processor_handle: processor_handle_t
|
|
) -> bool:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
|
|
is_power_management_enabled = ctypes.c_bool()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_is_gpu_power_management_enabled(
|
|
processor_handle, ctypes.byref(is_power_management_enabled)
|
|
)
|
|
)
|
|
|
|
return is_power_management_enabled.value
|
|
|
|
|
|
def amdsmi_get_fw_info(
|
|
processor_handle: processor_handle_t
|
|
) -> Dict[str, List[Dict[str, str]]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
fw_info = amdsmi_wrapper.amdsmi_fw_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_fw_info(
|
|
processor_handle, ctypes.byref(fw_info)
|
|
)
|
|
)
|
|
|
|
# Certain FW blocks are padded with 0s in the front intentionally
|
|
# But the C library converts the hex to an integer which trims the leading 0s
|
|
# Nor do we have a flag that defines the expected format for each FW block
|
|
# We can expect the following blocks to have a padded value and a specified format
|
|
|
|
hex_format_fw = [AmdSmiFwBlock.AMDSMI_FW_ID_PSP_SOSDRV,
|
|
AmdSmiFwBlock.AMDSMI_FW_ID_TA_RAS,
|
|
AmdSmiFwBlock.AMDSMI_FW_ID_TA_XGMI,
|
|
AmdSmiFwBlock.AMDSMI_FW_ID_UVD,
|
|
AmdSmiFwBlock.AMDSMI_FW_ID_VCE,
|
|
AmdSmiFwBlock.AMDSMI_FW_ID_VCN]
|
|
|
|
# PM(AKA: SMC) firmware's hex value looks like 0x12345678
|
|
# However, they are parsed as: int(0x12).int(0x34).int(0x56).int(0x78)
|
|
# Which results in the following: 12.34.56.78
|
|
dec_format_fw = [AmdSmiFwBlock.AMDSMI_FW_ID_PM,
|
|
AmdSmiFwBlock.AMDSMI_FW_ID_PLDM_BUNDLE]
|
|
|
|
firmwares = []
|
|
for i in range(0, fw_info.num_fw_info):
|
|
fw_name = AmdSmiFwBlock(fw_info.fw_info_list[i].fw_id)
|
|
fw_version = fw_info.fw_info_list[i].fw_version # This is in int format (base 10)
|
|
|
|
if fw_name in hex_format_fw:
|
|
# Convert the fw_version from a int to a hex string padded leading 0s
|
|
fw_version_string = hex(fw_version)[2:].zfill(8)
|
|
|
|
# Join every two hex digits with a dot
|
|
fw_version_string = ".".join(re.findall('..?', fw_version_string))
|
|
elif fw_name in dec_format_fw:
|
|
# Convert the fw_version from a int to a hex string padded leading 0s
|
|
fw_version_string = hex(fw_version)[2:].zfill(8)
|
|
|
|
# Convert every two hex digits to decimal and join them with a dot
|
|
dec_version_string = ''
|
|
for index, _ in enumerate(fw_version_string):
|
|
if index % 2 != 0:
|
|
continue
|
|
hex_digits = f"0x{fw_version_string[index:index+2]}"
|
|
dec_version_string += str(int(hex_digits, 16)).zfill(2) + "."
|
|
fw_version_string = dec_version_string.strip('.')
|
|
else:
|
|
fw_version_string = str(fw_version)
|
|
|
|
firmwares.append({
|
|
'fw_name': fw_name,
|
|
'fw_version': fw_version_string.upper(),
|
|
})
|
|
return {'fw_list': firmwares}
|
|
|
|
|
|
def amdsmi_get_gpu_vram_usage(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
vram_usage = amdsmi_wrapper.amdsmi_vram_usage_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_vram_usage(
|
|
processor_handle, ctypes.byref(vram_usage))
|
|
)
|
|
|
|
return {"vram_total": vram_usage.vram_total, "vram_used": vram_usage.vram_used}
|
|
|
|
|
|
def amdsmi_get_pcie_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
pcie_info = amdsmi_wrapper.amdsmi_pcie_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_pcie_info(
|
|
processor_handle, ctypes.byref(pcie_info)
|
|
)
|
|
)
|
|
|
|
pcie_info_dict = {
|
|
"pcie_static": {
|
|
"max_pcie_width": _validate_if_max_uint(pcie_info.pcie_static.max_pcie_width, MaxUIntegerTypes.UINT16_T),
|
|
"max_pcie_speed": _validate_if_max_uint(pcie_info.pcie_static.max_pcie_speed, MaxUIntegerTypes.UINT32_T),
|
|
"pcie_interface_version": _validate_if_max_uint(pcie_info.pcie_static.pcie_interface_version, MaxUIntegerTypes.UINT32_T),
|
|
"slot_type": pcie_info.pcie_static.slot_type,
|
|
},
|
|
"pcie_metric": {
|
|
"pcie_width": _validate_if_max_uint(pcie_info.pcie_metric.pcie_width, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_speed": _validate_if_max_uint(pcie_info.pcie_metric.pcie_speed, MaxUIntegerTypes.UINT32_T),
|
|
"pcie_bandwidth": _validate_if_max_uint(pcie_info.pcie_metric.pcie_bandwidth, MaxUIntegerTypes.UINT32_T),
|
|
"pcie_replay_count": _validate_if_max_uint(pcie_info.pcie_metric.pcie_replay_count, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_l0_to_recovery_count": _validate_if_max_uint(pcie_info.pcie_metric.pcie_l0_to_recovery_count, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_replay_roll_over_count": _validate_if_max_uint(pcie_info.pcie_metric.pcie_replay_roll_over_count, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_nak_sent_count": _validate_if_max_uint(pcie_info.pcie_metric.pcie_nak_sent_count, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_nak_received_count": _validate_if_max_uint(pcie_info.pcie_metric.pcie_nak_received_count, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_lc_perf_other_end_recovery_count": _validate_if_max_uint(pcie_info.pcie_metric.pcie_lc_perf_other_end_recovery_count, MaxUIntegerTypes.UINT32_T)
|
|
}
|
|
}
|
|
|
|
slot_type = pcie_info_dict['pcie_static']['slot_type']
|
|
if isinstance(slot_type, int):
|
|
slot_types = amdsmi_wrapper.amdsmi_card_form_factor_t__enumvalues
|
|
if slot_type in slot_types:
|
|
pcie_info_dict['pcie_static']['slot_type'] = slot_types[slot_type].replace("AMDSMI_CARD_FORM_FACTOR_", "")
|
|
else:
|
|
pcie_info_dict['pcie_static']['slot_type'] = "Unknown"
|
|
else:
|
|
pcie_info_dict['pcie_static']['slot_type'] = "N/A"
|
|
|
|
return pcie_info_dict
|
|
|
|
def amdsmi_get_gpu_xcd_counter(processor_handle: processor_handle_t) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
|
|
xcd_counter = ctypes.c_uint16()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_xcd_counter(
|
|
processor_handle, ctypes.byref(xcd_counter)
|
|
)
|
|
)
|
|
|
|
return xcd_counter.value
|
|
|
|
def amdsmi_get_processor_handle_from_bdf(bdf):
|
|
bdf = _parse_bdf(bdf)
|
|
if bdf is None:
|
|
raise AmdSmiBdfFormatException(bdf)
|
|
amdsmi_bdf = _make_amdsmi_bdf_from_list(bdf)
|
|
processor_handle = amdsmi_wrapper.amdsmi_processor_handle()
|
|
_check_res(amdsmi_wrapper.amdsmi_get_processor_handle_from_bdf(
|
|
amdsmi_bdf, ctypes.byref(processor_handle)))
|
|
return processor_handle
|
|
|
|
def amdsmi_get_gpu_vendor_name(
|
|
processor_handle: processor_handle_t,
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
length = ctypes.c_uint64()
|
|
length.value = _AMDSMI_STRING_LENGTH
|
|
|
|
vendor_name = ctypes.create_string_buffer(_AMDSMI_STRING_LENGTH)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_vendor_name(
|
|
processor_handle, vendor_name, length)
|
|
)
|
|
|
|
return vendor_name.value.decode("utf-8")
|
|
|
|
|
|
def amdsmi_get_gpu_id(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
gpu_id_16 = ctypes.c_uint16()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_gpu_id(
|
|
processor_handle, ctypes.byref(gpu_id_16)))
|
|
|
|
return gpu_id_16.value
|
|
|
|
|
|
def amdsmi_get_gpu_vram_vendor(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
length = ctypes.c_uint32()
|
|
length.value = _AMDSMI_STRING_LENGTH
|
|
|
|
vram_vendor = ctypes.create_string_buffer(_AMDSMI_STRING_LENGTH)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_vram_vendor(
|
|
processor_handle, vram_vendor, length)
|
|
)
|
|
|
|
return vram_vendor.value.decode("utf-8")
|
|
|
|
|
|
def amdsmi_get_gpu_subsystem_id(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
subsystem_id_16 = ctypes.c_uint16()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_subsystem_id(
|
|
processor_handle, ctypes.byref(subsystem_id_16))
|
|
)
|
|
|
|
return _pad_hex_value(hex(subsystem_id_16.value), 4)
|
|
|
|
|
|
def amdsmi_get_gpu_subsystem_name(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
length = ctypes.c_uint64()
|
|
length.value = _AMDSMI_STRING_LENGTH
|
|
|
|
name = ctypes.create_string_buffer(_AMDSMI_STRING_LENGTH)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_subsystem_name(
|
|
processor_handle, name, length)
|
|
)
|
|
|
|
return name.value.decode("utf-8")
|
|
|
|
|
|
def amdsmi_get_lib_version():
|
|
version = amdsmi_wrapper.amdsmi_version_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_lib_version(ctypes.byref(version)))
|
|
|
|
return {
|
|
"major": version.major,
|
|
"minor": version.minor,
|
|
"release": version.release,
|
|
"build": version.build.contents.value.decode("utf-8")
|
|
}
|
|
|
|
|
|
def amdsmi_topo_get_numa_node_number(
|
|
processor_handle: processor_handle_t,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
numa_node_number = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_topo_get_numa_node_number(
|
|
processor_handle, ctypes.byref(numa_node_number)
|
|
)
|
|
)
|
|
|
|
return numa_node_number.value
|
|
|
|
|
|
def amdsmi_topo_get_link_weight(
|
|
processor_handle_src: processor_handle_t,
|
|
processor_handle_dst: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
weight = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_topo_get_link_weight(
|
|
processor_handle_src, processor_handle_dst, ctypes.byref(weight)
|
|
)
|
|
)
|
|
|
|
return weight.value
|
|
|
|
|
|
def amdsmi_get_minmax_bandwidth_between_processors(
|
|
processor_handle_src: processor_handle_t,
|
|
processor_handle_dst: processor_handle_t,
|
|
):
|
|
if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
min_bandwidth = ctypes.c_uint64()
|
|
max_bandwidth = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_minmax_bandwidth_between_processors(
|
|
processor_handle_src,
|
|
processor_handle_dst,
|
|
ctypes.byref(min_bandwidth),
|
|
ctypes.byref(max_bandwidth),
|
|
)
|
|
)
|
|
|
|
return {"min_bandwidth": min_bandwidth.value, "max_bandwidth": max_bandwidth.value}
|
|
|
|
|
|
def amdsmi_get_link_metrics(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
link_metrics = amdsmi_wrapper.amdsmi_link_metrics_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_link_metrics(
|
|
processor_handle, ctypes.byref(link_metrics)
|
|
)
|
|
)
|
|
|
|
links = []
|
|
for i in range(AMDSMI_MAX_NUM_XGMI_LINKS):
|
|
link = link_metrics.links[i]
|
|
links.append({
|
|
"bdf": _format_bdf(link.bdf),
|
|
"bit_rate": link.bit_rate,
|
|
"max_bandwidth": link.max_bandwidth,
|
|
"link_type": link.link_type,
|
|
"read": link.read,
|
|
"write": link.write,
|
|
})
|
|
|
|
return {
|
|
"num_links": link_metrics.num_links,
|
|
"links": links
|
|
}
|
|
|
|
|
|
def amdsmi_topo_get_link_type(
|
|
processor_handle_src: processor_handle_t,
|
|
processor_handle_dst: processor_handle_t,
|
|
):
|
|
if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
hops_64 = ctypes.c_uint64()
|
|
type_32 = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_topo_get_link_type(
|
|
processor_handle_src, processor_handle_dst,
|
|
ctypes.byref(hops_64), ctypes.byref(type_32)
|
|
)
|
|
)
|
|
|
|
return {"hops": hops_64.value, "type": type_32.value}
|
|
|
|
|
|
def amdsmi_topo_get_p2p_status(
|
|
processor_handle_src: processor_handle_t,
|
|
processor_handle_dst: processor_handle_t,
|
|
):
|
|
if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
type_32 = ctypes.c_uint32()
|
|
cap = amdsmi_wrapper.struct_amdsmi_p2p_capability_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_topo_get_p2p_status(
|
|
processor_handle_src, processor_handle_dst, ctypes.byref(type_32), ctypes.byref(cap)
|
|
)
|
|
)
|
|
|
|
return {
|
|
'type' : type,
|
|
'cap': {
|
|
'is_iolink_coherent': cap.is_iolink_coherent,
|
|
'is_iolink_atomics_32bit': cap.is_iolink_atomics_32bit,
|
|
'is_iolink_atomics_64bit': cap.is_iolink_atomics_64bit,
|
|
'is_iolink_dma': cap.is_iolink_dma,
|
|
'is_iolink_bi_directional': cap.is_iolink_bi_directional
|
|
}
|
|
}
|
|
|
|
|
|
def amdsmi_is_P2P_accessible(
|
|
processor_handle_src: processor_handle_t,
|
|
processor_handle_dst: processor_handle_t,
|
|
):
|
|
if not isinstance(processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_src, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle_dst, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
accessible = ctypes.c_bool()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_is_P2P_accessible(
|
|
processor_handle_src, processor_handle_dst, ctypes.byref(accessible)
|
|
)
|
|
)
|
|
|
|
return accessible.value
|
|
|
|
def amdsmi_get_gpu_compute_partition(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
length = ctypes.c_uint32()
|
|
length.value = _AMDSMI_STRING_LENGTH
|
|
|
|
compute_partition = ctypes.create_string_buffer(_AMDSMI_STRING_LENGTH)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_compute_partition(
|
|
processor_handle, compute_partition, length
|
|
)
|
|
)
|
|
|
|
return compute_partition.value.decode("utf-8")
|
|
|
|
|
|
def amdsmi_set_gpu_compute_partition(processor_handle: processor_handle_t,
|
|
compute_partition: AmdSmiComputePartitionType):
|
|
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(compute_partition, AmdSmiComputePartitionType):
|
|
raise AmdSmiParameterException(compute_partition, AmdSmiComputePartitionType)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_compute_partition(
|
|
processor_handle, compute_partition
|
|
)
|
|
)
|
|
|
|
def amdsmi_set_gpu_accelerator_partition_profile(processor_handle: processor_handle_t,
|
|
profile_index: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(profile_index, int):
|
|
raise AmdSmiParameterException(profile_index, int)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_accelerator_partition_profile(
|
|
processor_handle, profile_index
|
|
)
|
|
)
|
|
|
|
def amdsmi_get_gpu_memory_partition(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
length = ctypes.c_uint32()
|
|
length.value = _AMDSMI_STRING_LENGTH
|
|
|
|
memory_partition = ctypes.create_string_buffer(_AMDSMI_STRING_LENGTH)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_memory_partition(
|
|
processor_handle, memory_partition, length
|
|
)
|
|
)
|
|
|
|
return memory_partition.value.decode("utf-8")
|
|
|
|
def amdsmi_get_gpu_memory_partition_config(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
config = amdsmi_wrapper.amdsmi_memory_partition_config_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_memory_partition_config(
|
|
processor_handle, config
|
|
)
|
|
)
|
|
mem_caps_list = []
|
|
if config.partition_caps.nps_flags.nps1_cap == 1:
|
|
mem_caps_list.append("NPS1")
|
|
if config.partition_caps.nps_flags.nps2_cap == 1:
|
|
mem_caps_list.append("NPS2")
|
|
if config.partition_caps.nps_flags.nps4_cap == 1:
|
|
mem_caps_list.append("NPS4")
|
|
if config.partition_caps.nps_flags.nps8_cap == 1:
|
|
mem_caps_list.append("NPS8")
|
|
if (config.partition_caps.nps_flags.nps1_cap == 0 and
|
|
config.partition_caps.nps_flags.nps2_cap == 0 and
|
|
config.partition_caps.nps_flags.nps4_cap == 0 and
|
|
config.partition_caps.nps_flags.nps8_cap == 0):
|
|
mem_caps_list.append("N/A")
|
|
|
|
return_dict = {
|
|
"partition_caps": mem_caps_list,
|
|
"mp_mode": amdsmi_wrapper.amdsmi_memory_partition_type_t__enumvalues[
|
|
config.mp_mode].replace("AMDSMI_MEMORY_PARTITION_", "").replace("UNKNOWN", "N/A"),
|
|
"num_numa_ranges": "N/A",
|
|
"numa_range": "N/A",
|
|
}
|
|
return return_dict
|
|
|
|
|
|
def amdsmi_set_gpu_memory_partition(processor_handle: processor_handle_t,
|
|
memory_partition: AmdSmiMemoryPartitionType):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(memory_partition, AmdSmiMemoryPartitionType):
|
|
raise AmdSmiParameterException(memory_partition, AmdSmiMemoryPartitionType)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_memory_partition(
|
|
processor_handle, memory_partition
|
|
)
|
|
)
|
|
|
|
def amdsmi_set_gpu_memory_partition_mode(processor_handle: processor_handle_t,
|
|
memory_partition: AmdSmiMemoryPartitionType):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(memory_partition, AmdSmiMemoryPartitionType):
|
|
raise AmdSmiParameterException(memory_partition, AmdSmiMemoryPartitionType)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_memory_partition(
|
|
processor_handle, memory_partition
|
|
)
|
|
)
|
|
|
|
def amdsmi_get_gpu_accelerator_partition_profile(
|
|
processor_handle: processor_handle_t
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
exception_caught = False
|
|
return_dictionary = {}
|
|
length = 8
|
|
partition_id = [0, 0, 0, 0, 0, 0, 0, 0]
|
|
partition_id_list = (ctypes.c_uint32 * length)(*partition_id)
|
|
profile = amdsmi_wrapper.amdsmi_accelerator_partition_profile_t()
|
|
partition_ids = []
|
|
kPOSITION_OF_PARTITION_ID = 0
|
|
|
|
ret = amdsmi_wrapper.amdsmi_get_gpu_accelerator_partition_profile(processor_handle,
|
|
ctypes.byref(profile), partition_id_list)
|
|
if ret == amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED:
|
|
#partition_id[0] will contain the partition id of each device
|
|
#BM/Guest will include this logic. Host will only display primary partition ids.
|
|
partition_ids.append(partition_id_list[kPOSITION_OF_PARTITION_ID])
|
|
|
|
try:
|
|
_check_res(ret)
|
|
except AmdSmiException as e:
|
|
partition_profile_dict = {
|
|
"profile_type" : "N/A",
|
|
"num_partitions" : "N/A",
|
|
"profile_index" : "N/A",
|
|
"memory_caps": "N/A",
|
|
"num_resources" : "N/A",
|
|
"resources" : "N/A"
|
|
}
|
|
return_dictionary = {
|
|
"partition_id" : partition_ids,
|
|
"partition_profile" : partition_profile_dict
|
|
}
|
|
if ret == amdsmi_wrapper.AMDSMI_STATUS_NOT_SUPPORTED:
|
|
exception_caught = True
|
|
else:
|
|
_check_res(ret) # re-raise the exception if error is anything other than AMDSMI_STATUS_NOT_SUPPORTED
|
|
# this ensures we can get partition ID even if the profile is not supported.
|
|
finally:
|
|
if not exception_caught:
|
|
profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[profile.profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "")
|
|
profile_type_ret = profile_type_ret.replace("INVALID", "N/A")
|
|
length = profile.num_partitions
|
|
#partition_id[0] will contain the partition id of each device
|
|
#BM/Guest will include this logic. Host will only display primary partition ids.
|
|
partition_ids.append(partition_id_list[kPOSITION_OF_PARTITION_ID])
|
|
mem_caps_list = []
|
|
if profile.memory_caps.nps_flags.nps1_cap == 1:
|
|
mem_caps_list.append("NPS1")
|
|
if profile.memory_caps.nps_flags.nps2_cap == 1:
|
|
mem_caps_list.append("NPS2")
|
|
if profile.memory_caps.nps_flags.nps4_cap == 1:
|
|
mem_caps_list.append("NPS4")
|
|
if profile.memory_caps.nps_flags.nps8_cap == 1:
|
|
mem_caps_list.append("NPS8")
|
|
if (profile.memory_caps.nps_flags.nps1_cap == 0 and
|
|
profile.memory_caps.nps_flags.nps2_cap == 0 and
|
|
profile.memory_caps.nps_flags.nps4_cap == 0 and
|
|
profile.memory_caps.nps_flags.nps8_cap == 0):
|
|
mem_caps_list.append("N/A")
|
|
partition_profile_dict = {
|
|
"profile_type" : profile_type_ret,
|
|
"num_partitions" : profile.num_partitions,
|
|
"profile_index" : profile.profile_index,
|
|
"memory_caps": mem_caps_list,
|
|
"num_resources" : profile.num_resources,
|
|
"resources" : "N/A"
|
|
}
|
|
return_dictionary = {
|
|
"partition_id" : partition_ids,
|
|
"partition_profile" : partition_profile_dict
|
|
}
|
|
return return_dictionary
|
|
|
|
def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: processor_handle_t) -> Dict:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
config = amdsmi_wrapper.amdsmi_accelerator_partition_profile_config_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle,
|
|
ctypes.byref(config)))
|
|
|
|
profiles = []
|
|
resources = []
|
|
resource_idx = 0
|
|
for i in range(config.num_profiles):
|
|
profile = config.profiles[i]
|
|
profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[
|
|
config.profiles[i].profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "")
|
|
profile_type_ret = profile_type_ret.replace("INVALID", "N/A")
|
|
|
|
mem_caps_list = []
|
|
if profile.memory_caps.nps_flags.nps1_cap == 1:
|
|
mem_caps_list.append("NPS1")
|
|
if profile.memory_caps.nps_flags.nps2_cap == 1:
|
|
mem_caps_list.append("NPS2")
|
|
if profile.memory_caps.nps_flags.nps4_cap == 1:
|
|
mem_caps_list.append("NPS4")
|
|
if profile.memory_caps.nps_flags.nps8_cap == 1:
|
|
mem_caps_list.append("NPS8")
|
|
if (profile.memory_caps.nps_flags.nps1_cap == 0 and
|
|
profile.memory_caps.nps_flags.nps2_cap == 0 and
|
|
profile.memory_caps.nps_flags.nps4_cap == 0 and
|
|
profile.memory_caps.nps_flags.nps8_cap == 0):
|
|
mem_caps_list.append("N/A")
|
|
|
|
resources = []
|
|
for _ in range(config.num_resource_profiles):
|
|
res_profile = config.resource_profiles[resource_idx]
|
|
resource_profiles_ret = amdsmi_wrapper.amdsmi_accelerator_partition_resource_type_t__enumvalues[
|
|
res_profile.resource_type].replace("AMDSMI_ACCELERATOR_", "")
|
|
resource_profile_dict = {
|
|
"profile_index": res_profile.profile_index,
|
|
"resource_type": resource_profiles_ret,
|
|
"partition_resource": res_profile.partition_resource,
|
|
"num_partitions_share_resource": res_profile.num_partitions_share_resource,
|
|
}
|
|
resources.append(resource_profile_dict)
|
|
resource_idx += 1
|
|
|
|
profile_dict = {
|
|
"profile_type": profile_type_ret,
|
|
"num_partitions": profile.num_partitions,
|
|
"profile_index": profile.profile_index,
|
|
"memory_caps": mem_caps_list,
|
|
"num_resources": profile.num_resources,
|
|
"resources": resources
|
|
}
|
|
profiles.append(profile_dict)
|
|
|
|
config_dict = {
|
|
"num_profiles": config.num_profiles,
|
|
"num_resource_profiles": config.num_resource_profiles,
|
|
"resource_profiles": resources,
|
|
"default_profile_index": config.default_profile_index,
|
|
"profiles": profiles,
|
|
}
|
|
|
|
return config_dict
|
|
|
|
def amdsmi_get_xgmi_info(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
xgmi_info = amdsmi_wrapper.amdsmi_xgmi_info_t()
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_get_xgmi_info(processor_handle, xgmi_info))
|
|
|
|
return {
|
|
"xgmi_lanes": xgmi_info.xgmi_lanes,
|
|
"xgmi_hive_id": xgmi_info.xgmi_hive_id,
|
|
"xgmi_node_id": xgmi_info.xgmi_node_id,
|
|
"index": xgmi_info.index,
|
|
}
|
|
|
|
|
|
def amdsmi_gpu_counter_group_supported(
|
|
processor_handle: processor_handle_t,
|
|
event_group: AmdSmiEventGroup,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(event_group, AmdSmiEventGroup):
|
|
raise AmdSmiParameterException(event_group, AmdSmiEventGroup)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_gpu_counter_group_supported(
|
|
processor_handle, event_group)
|
|
)
|
|
|
|
|
|
def amdsmi_gpu_create_counter(
|
|
processor_handle: processor_handle_t,
|
|
event_type: AmdSmiEventType,
|
|
) -> amdsmi_wrapper.amdsmi_event_handle_t:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(event_type, AmdSmiEventType):
|
|
raise AmdSmiParameterException(event_type, AmdSmiEventType)
|
|
|
|
event_handle = amdsmi_wrapper.amdsmi_event_handle_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_gpu_create_counter(
|
|
processor_handle, event_type, ctypes.byref(event_handle)
|
|
)
|
|
)
|
|
|
|
return event_handle
|
|
|
|
def amdsmi_gpu_destroy_counter(event_handle: amdsmi_wrapper.amdsmi_event_handle_t) -> None:
|
|
if not isinstance(event_handle, amdsmi_wrapper.amdsmi_event_handle_t):
|
|
raise AmdSmiParameterException(event_handle, amdsmi_wrapper.amdsmi_event_handle_t)
|
|
_check_res(amdsmi_wrapper.amdsmi_gpu_destroy_counter(event_handle))
|
|
|
|
|
|
def amdsmi_gpu_control_counter(
|
|
event_handle: amdsmi_wrapper.amdsmi_event_handle_t,
|
|
counter_command: AmdSmiCounterCommand,
|
|
):
|
|
if not isinstance(event_handle, amdsmi_wrapper.amdsmi_event_handle_t):
|
|
raise AmdSmiParameterException(event_handle, amdsmi_wrapper.amdsmi_event_handle_t)
|
|
if not isinstance(counter_command, AmdSmiCounterCommand):
|
|
raise AmdSmiParameterException(counter_command, AmdSmiCounterCommand)
|
|
|
|
event_handle_value = amdsmi_wrapper.amdsmi_event_handle_t(event_handle.value)
|
|
command_args = ctypes.c_void_p()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_gpu_control_counter(
|
|
event_handle_value, counter_command, command_args
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_gpu_read_counter(
|
|
event_handle: amdsmi_wrapper.amdsmi_event_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(event_handle, amdsmi_wrapper.amdsmi_event_handle_t):
|
|
raise AmdSmiParameterException(event_handle, amdsmi_wrapper.amdsmi_event_handle_t)
|
|
|
|
counter_value = amdsmi_wrapper.amdsmi_counter_value_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_gpu_read_counter(
|
|
event_handle, ctypes.byref(counter_value))
|
|
)
|
|
|
|
return {
|
|
"value": counter_value.value,
|
|
"time_enabled": counter_value.time_enabled,
|
|
"time_running": counter_value.time_running,
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_available_counters(
|
|
processor_handle: processor_handle_t,
|
|
event_group: AmdSmiEventGroup,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(event_group, AmdSmiEventGroup):
|
|
raise AmdSmiParameterException(event_group, AmdSmiEventGroup)
|
|
available = ctypes.c_uint32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_available_counters(
|
|
processor_handle, event_group, ctypes.byref(available)
|
|
)
|
|
)
|
|
|
|
return available.value
|
|
|
|
|
|
def amdsmi_set_gpu_perf_level(
|
|
processor_handle: processor_handle_t,
|
|
perf_level: AmdSmiDevPerfLevel,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(perf_level, AmdSmiDevPerfLevel):
|
|
raise AmdSmiParameterException(perf_level, AmdSmiDevPerfLevel)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_set_gpu_perf_level(
|
|
processor_handle, perf_level))
|
|
|
|
|
|
def amdsmi_reset_gpu(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_reset_gpu(processor_handle))
|
|
|
|
def amdsmi_gpu_driver_reload():
|
|
_check_res(amdsmi_wrapper.amdsmi_gpu_driver_reload())
|
|
|
|
|
|
def amdsmi_set_gpu_fan_speed(
|
|
processor_handle: processor_handle_t, sensor_idx: int, fan_speed: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_idx, int):
|
|
raise AmdSmiParameterException(sensor_idx, int)
|
|
if not isinstance(fan_speed, int):
|
|
raise AmdSmiParameterException(fan_speed, int)
|
|
sensor_idx_32 = ctypes.c_uint32(sensor_idx)
|
|
fan_speed_64 = ctypes.c_uint64(fan_speed)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_fan_speed(
|
|
processor_handle, sensor_idx_32, fan_speed_64)
|
|
)
|
|
|
|
|
|
def amdsmi_reset_gpu_fan(
|
|
processor_handle: processor_handle_t, sensor_idx: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_idx, int):
|
|
raise AmdSmiParameterException(sensor_idx, int)
|
|
sensor_idx_32 = ctypes.c_uint32(sensor_idx)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_reset_gpu_fan(processor_handle, sensor_idx_32))
|
|
|
|
|
|
def amdsmi_set_clk_freq(
|
|
processor_handle: processor_handle_t,
|
|
clk_type: str,
|
|
freq_bitmask: int,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(clk_type, str):
|
|
raise AmdSmiParameterException(clk_type, str)
|
|
if not isinstance(freq_bitmask, int):
|
|
raise AmdSmiParameterException(freq_bitmask, int)
|
|
|
|
if clk_type.lower() == "sclk":
|
|
clk_type_conversion = AmdSmiClkType.SYS
|
|
elif clk_type.lower() == "mclk":
|
|
clk_type_conversion = AmdSmiClkType.MEM
|
|
elif clk_type.lower() == "fclk":
|
|
clk_type_conversion = AmdSmiClkType.DF
|
|
elif clk_type.lower() == "socclk":
|
|
clk_type_conversion = AmdSmiClkType.SOC
|
|
else:
|
|
clk_type_conversion = "N/A"
|
|
if not isinstance(clk_type_conversion, AmdSmiClkType):
|
|
raise AmdSmiParameterException(clk_type_conversion, AmdSmiClkType)
|
|
|
|
freq_bitmask_64 = ctypes.c_uint64(freq_bitmask)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_clk_freq(
|
|
processor_handle, clk_type_conversion, freq_bitmask_64
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_set_soc_pstate(
|
|
processor_handle: processor_handle_t,
|
|
policy_id: int,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(policy_id, int):
|
|
raise AmdSmiParameterException(policy_id, int)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_soc_pstate(
|
|
processor_handle, policy_id
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_set_xgmi_plpd(
|
|
processor_handle: processor_handle_t,
|
|
policy_id: int,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(policy_id, int):
|
|
raise AmdSmiParameterException(policy_id, int)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_xgmi_plpd(
|
|
processor_handle, policy_id
|
|
)
|
|
)
|
|
return
|
|
|
|
|
|
def amdsmi_set_gpu_process_isolation(
|
|
processor_handle: processor_handle_t,
|
|
pisolate: int,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(pisolate, int):
|
|
raise AmdSmiParameterException(pisolate, int)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_process_isolation(
|
|
processor_handle, pisolate
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_clean_gpu_local_data(
|
|
processor_handle: processor_handle_t,
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_clean_gpu_local_data(
|
|
processor_handle
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_set_gpu_overdrive_level(
|
|
processor_handle: processor_handle_t, overdrive_value: int
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(overdrive_value, int):
|
|
raise AmdSmiParameterException(overdrive_value, int)
|
|
overdrive_value_32 = ctypes.c_uint32(overdrive_value)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_overdrive_level(
|
|
processor_handle, overdrive_value_32)
|
|
)
|
|
|
|
|
|
def amdsmi_get_gpu_bdf_id(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
bdfid = ctypes.c_uint64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_bdf_id(
|
|
processor_handle, ctypes.byref(bdfid))
|
|
)
|
|
|
|
return bdfid.value
|
|
|
|
|
|
def amdsmi_set_gpu_pci_bandwidth(
|
|
processor_handle: processor_handle_t, bitmask: int
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(bitmask, int):
|
|
raise AmdSmiParameterException(bitmask, int)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_pci_bandwidth(
|
|
processor_handle, ctypes.c_uint64(bitmask)
|
|
)
|
|
)
|
|
|
|
|
|
def _format_transfer_rate(transfer_rate):
|
|
return {
|
|
'num_supported': transfer_rate.num_supported,
|
|
'current': transfer_rate.current,
|
|
'frequency': list(transfer_rate.frequency)
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_pci_bandwidth(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
bandwidth = amdsmi_wrapper.amdsmi_pcie_bandwidth_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_pci_bandwidth(
|
|
processor_handle, ctypes.byref(bandwidth))
|
|
)
|
|
|
|
transfer_rate = _format_transfer_rate(bandwidth.transfer_rate)
|
|
|
|
return {
|
|
'transfer_rate': transfer_rate,
|
|
'lanes': list(bandwidth.lanes)
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_pci_throughput(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
sent = ctypes.c_uint64()
|
|
received = ctypes.c_uint64()
|
|
max_pkt_sz = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_pci_throughput(processor_handle, ctypes.byref(
|
|
sent), ctypes.byref(received), ctypes.byref(max_pkt_sz))
|
|
)
|
|
|
|
return {
|
|
'sent': sent.value,
|
|
'received': received.value,
|
|
'max_pkt_sz': max_pkt_sz.value
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_pci_replay_counter(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
counter = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_pci_replay_counter(
|
|
processor_handle, ctypes.byref(counter))
|
|
)
|
|
|
|
return counter.value
|
|
|
|
|
|
def amdsmi_get_gpu_topo_numa_affinity(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
numa_node = ctypes.c_int32()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_topo_numa_affinity(
|
|
processor_handle, ctypes.byref(numa_node))
|
|
)
|
|
|
|
return numa_node.value
|
|
|
|
|
|
def amdsmi_set_power_cap(
|
|
processor_handle: processor_handle_t, sensor_ind: int, cap: int
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(sensor_ind, int):
|
|
raise AmdSmiParameterException(sensor_ind, int)
|
|
|
|
if not isinstance(cap, int):
|
|
raise AmdSmiParameterException(cap, int)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_power_cap(
|
|
processor_handle, ctypes.c_uint32(sensor_ind), ctypes.c_uint64(cap)
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_set_gpu_power_profile(
|
|
processor_handle: processor_handle_t,
|
|
reserved: int,
|
|
profile: AmdSmiPowerProfilePresetMasks,
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(reserved, int):
|
|
raise AmdSmiParameterException(reserved, int)
|
|
|
|
if not isinstance(profile, AmdSmiPowerProfilePresetMasks):
|
|
raise AmdSmiParameterException(profile, AmdSmiPowerProfilePresetMasks)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_power_profile(
|
|
processor_handle, ctypes.c_uint32(reserved), profile
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_get_energy_count(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
energy_accumulator= ctypes.c_uint64()
|
|
counter_resolution = ctypes.c_float()
|
|
timestamp = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_energy_count(processor_handle, ctypes.byref(
|
|
energy_accumulator), ctypes.byref(counter_resolution), ctypes.byref(timestamp))
|
|
)
|
|
|
|
return {
|
|
'energy_accumulator': energy_accumulator.value,
|
|
'counter_resolution': counter_resolution.value,
|
|
'timestamp': timestamp.value,
|
|
}
|
|
|
|
|
|
def amdsmi_set_gpu_clk_range(
|
|
processor_handle: processor_handle_t,
|
|
min_clk_value: int,
|
|
max_clk_value: int,
|
|
clk_type: AmdSmiClkType,
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(min_clk_value, int):
|
|
raise AmdSmiParameterException(min_clk_value, int)
|
|
|
|
if not isinstance(max_clk_value, int):
|
|
raise AmdSmiParameterException(min_clk_value, int)
|
|
|
|
if not isinstance(clk_type, AmdSmiClkType):
|
|
raise AmdSmiParameterException(clk_type, AmdSmiClkType)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_clk_range(
|
|
processor_handle,
|
|
ctypes.c_uint64(min_clk_value),
|
|
ctypes.c_uint64(max_clk_value),
|
|
clk_type,
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_set_gpu_clk_limit(
|
|
processor_handle: processor_handle_t,
|
|
clk_type: str,
|
|
limit_type: str,
|
|
value: int
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(clk_type, str):
|
|
raise AmdSmiParameterException(clk_type, str)
|
|
if not isinstance(limit_type, str):
|
|
raise AmdSmiParameterException(limit_type, str)
|
|
if not isinstance(value, int):
|
|
raise AmdSmiParameterException(value, int)
|
|
if clk_type.lower() == "sclk":
|
|
clk_type_conversion = amdsmi_wrapper.AMDSMI_CLK_TYPE_SYS
|
|
elif clk_type.lower() == "mclk":
|
|
clk_type_conversion = amdsmi_wrapper.AMDSMI_CLK_TYPE_MEM
|
|
if limit_type.lower() == "min":
|
|
limit_type_conversion = amdsmi_wrapper.CLK_LIMIT_MIN
|
|
elif limit_type.lower() == "max":
|
|
limit_type_conversion = amdsmi_wrapper.CLK_LIMIT_MAX
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_clk_limit(
|
|
processor_handle,
|
|
clk_type_conversion,
|
|
limit_type_conversion,
|
|
ctypes.c_uint64(value),
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_get_gpu_memory_total(processor_handle: processor_handle_t, mem_type: AmdSmiMemoryType):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(mem_type, AmdSmiMemoryType):
|
|
raise AmdSmiParameterException(
|
|
mem_type, AmdSmiMemoryType
|
|
)
|
|
|
|
total = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_memory_total(
|
|
processor_handle, mem_type, ctypes.byref(total))
|
|
)
|
|
|
|
return total.value
|
|
|
|
|
|
def amdsmi_set_gpu_od_clk_info(
|
|
processor_handle: processor_handle_t,
|
|
level: AmdSmiFreqInd,
|
|
value: int,
|
|
clk_type: AmdSmiClkType,
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(level, AmdSmiFreqInd):
|
|
raise AmdSmiParameterException(level, AmdSmiFreqInd)
|
|
|
|
if not isinstance(value, int):
|
|
raise AmdSmiParameterException(value, int)
|
|
|
|
if not isinstance(clk_type, AmdSmiClkType):
|
|
raise AmdSmiParameterException(clk_type, AmdSmiClkType)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_od_clk_info(
|
|
processor_handle, level, ctypes.c_uint64(value), clk_type
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_get_gpu_memory_usage(processor_handle: processor_handle_t, mem_type: AmdSmiMemoryType):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(mem_type, AmdSmiMemoryType):
|
|
raise AmdSmiParameterException(
|
|
mem_type, AmdSmiMemoryType
|
|
)
|
|
|
|
used = ctypes.c_uint64()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_memory_usage(
|
|
processor_handle, mem_type, ctypes.byref(used))
|
|
)
|
|
|
|
return used.value
|
|
|
|
|
|
def amdsmi_set_gpu_od_volt_info(
|
|
processor_handle: processor_handle_t,
|
|
vpoint: int,
|
|
clk_value: int,
|
|
volt_value: int,
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(vpoint, int):
|
|
raise AmdSmiParameterException(vpoint, int)
|
|
|
|
if not isinstance(clk_value, int):
|
|
raise AmdSmiParameterException(clk_value, int)
|
|
|
|
if not isinstance(volt_value, int):
|
|
raise AmdSmiParameterException(volt_value, int)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_od_volt_info(
|
|
processor_handle,
|
|
ctypes.c_uint32(vpoint),
|
|
ctypes.c_uint64(clk_value),
|
|
ctypes.c_uint64(volt_value),
|
|
)
|
|
)
|
|
|
|
|
|
def amdsmi_get_gpu_fan_rpms(
|
|
processor_handle: processor_handle_t, sensor_idx: int
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_idx, int):
|
|
raise AmdSmiParameterException(sensor_idx, int)
|
|
fan_speed = ctypes.c_int64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_fan_rpms(
|
|
processor_handle, sensor_idx, ctypes.byref(fan_speed)
|
|
)
|
|
)
|
|
|
|
return fan_speed.value
|
|
|
|
|
|
def amdsmi_get_gpu_fan_speed(
|
|
processor_handle: processor_handle_t, sensor_idx: int
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_idx, int):
|
|
raise AmdSmiParameterException(sensor_idx, int)
|
|
fan_speed = ctypes.c_int64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_fan_speed(
|
|
processor_handle, sensor_idx, ctypes.byref(fan_speed)
|
|
)
|
|
)
|
|
|
|
return fan_speed.value
|
|
|
|
|
|
def amdsmi_get_gpu_fan_speed_max(
|
|
processor_handle: processor_handle_t, sensor_idx: int
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_idx, int):
|
|
raise AmdSmiParameterException(sensor_idx, int)
|
|
fan_speed = ctypes.c_uint64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_fan_speed_max(
|
|
processor_handle, sensor_idx, ctypes.byref(fan_speed)
|
|
)
|
|
)
|
|
|
|
return fan_speed.value
|
|
|
|
|
|
def amdsmi_get_node_handle(processor_handle):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(processor_handle,
|
|
amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
node_handle = amdsmi_wrapper.amdsmi_node_handle()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_node_handle(processor_handle, ctypes.byref(node_handle))
|
|
)
|
|
|
|
return node_handle
|
|
|
|
|
|
def amdsmi_get_npm_info(node_handle: processor_handle_t) -> Dict[str, Any]:
|
|
if not isinstance(node_handle, amdsmi_wrapper.amdsmi_node_handle):
|
|
raise AmdSmiParameterException(node_handle, amdsmi_wrapper.amdsmi_node_handle)
|
|
|
|
npm_info = amdsmi_wrapper.amdsmi_npm_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_npm_info(
|
|
node_handle, ctypes.byref(npm_info)
|
|
)
|
|
)
|
|
|
|
dict_ret = {
|
|
"limit": npm_info.limit,
|
|
"status": npm_info.status,
|
|
}
|
|
return dict_ret
|
|
|
|
|
|
def amdsmi_get_temp_metric(
|
|
processor_handle: processor_handle_t,
|
|
sensor_type: AmdSmiTemperatureType,
|
|
metric: AmdSmiTemperatureMetric,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_type, AmdSmiTemperatureType):
|
|
raise AmdSmiParameterException(sensor_type, AmdSmiTemperatureType)
|
|
if not isinstance(metric, AmdSmiTemperatureMetric):
|
|
raise AmdSmiParameterException(metric, AmdSmiTemperatureMetric)
|
|
|
|
temp_value = ctypes.c_int64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_temp_metric(
|
|
processor_handle, sensor_type, metric, ctypes.byref(temp_value)
|
|
)
|
|
)
|
|
|
|
return temp_value.value
|
|
|
|
|
|
def amdsmi_get_gpu_volt_metric(
|
|
processor_handle: processor_handle_t,
|
|
sensor_type: AmdSmiVoltageType,
|
|
metric: AmdSmiVoltageMetric,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_type, AmdSmiVoltageType):
|
|
raise AmdSmiParameterException(sensor_type, AmdSmiVoltageType)
|
|
if not isinstance(metric, AmdSmiVoltageMetric):
|
|
raise AmdSmiParameterException(metric, AmdSmiVoltageMetric)
|
|
|
|
voltage = ctypes.c_int64()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_volt_metric(
|
|
processor_handle, sensor_type, metric, ctypes.byref(voltage)
|
|
)
|
|
)
|
|
|
|
return voltage.value
|
|
|
|
|
|
def amdsmi_get_utilization_count(
|
|
processor_handle: processor_handle_t,
|
|
counter_types: List[AmdSmiUtilizationCounterType]
|
|
) -> List[Dict[str, Any]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
# Enforce List typing
|
|
if not isinstance(counter_types, list):
|
|
counter_types = [counter_types]
|
|
|
|
counter_types = list(set(counter_types))
|
|
|
|
# Validate Inputs
|
|
if len(counter_types) == 0:
|
|
raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_INVAL)
|
|
counters = []
|
|
for counter_type in counter_types:
|
|
if not isinstance(counter_type, AmdSmiUtilizationCounterType):
|
|
raise AmdSmiParameterException(
|
|
counter_type, AmdSmiUtilizationCounterType)
|
|
counter = amdsmi_wrapper.amdsmi_utilization_counter_t()
|
|
counter.type = counter_type
|
|
counters.append(counter)
|
|
|
|
count = ctypes.c_uint32(len(counters))
|
|
timestamp = ctypes.c_uint64()
|
|
util_counter_list = (amdsmi_wrapper.amdsmi_utilization_counter_t * len(counters))(*counters)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_utilization_count(
|
|
processor_handle, util_counter_list, count, ctypes.byref(timestamp)
|
|
)
|
|
)
|
|
if count.value != len(counters):
|
|
raise AmdSmiLibraryException(amdsmi_wrapper.AMDSMI_STATUS_API_FAILED)
|
|
|
|
result = []
|
|
result.append({"timestamp": timestamp.value})
|
|
for index in range(count.value):
|
|
counter_type = amdsmi_wrapper.amdsmi_utilization_counter_type_t__enumvalues[
|
|
util_counter_list[index].type
|
|
]
|
|
if counter_type == "AMDSMI_UTILIZATION_COUNTER_FIRST":
|
|
counter_type = "AMDSMI_COARSE_GRAIN_GPU_ACTIVITY"
|
|
if counter_type == "AMDSMI_UTILIZATION_COUNTER_LAST":
|
|
counter_type = "AMDSMI_FINE_DECODER_ACTIVITY"
|
|
result.append(
|
|
{"type": counter_type, "value": util_counter_list[index].value})
|
|
|
|
return result
|
|
|
|
|
|
def amdsmi_get_gpu_perf_level(
|
|
processor_handle: processor_handle_t,
|
|
) -> str:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
perf = amdsmi_wrapper.amdsmi_dev_perf_level_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_perf_level(
|
|
processor_handle, ctypes.byref(perf))
|
|
)
|
|
|
|
result = amdsmi_wrapper.amdsmi_dev_perf_level_t__enumvalues[perf.value]
|
|
if result == "AMDSMI_DEV_PERF_LEVEL_FIRST":
|
|
result = "AMDSMI_DEV_PERF_LEVEL_AUTO"
|
|
if result == "AMDSMI_DEV_PERF_LEVEL_LAST":
|
|
result = "AMDSMI_DEV_PERF_LEVEL_DETERMINISM"
|
|
|
|
return result
|
|
|
|
|
|
def amdsmi_set_gpu_perf_determinism_mode(
|
|
processor_handle: processor_handle_t, clkvalue: int
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(clkvalue, int):
|
|
raise AmdSmiParameterException(clkvalue, int)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_set_gpu_perf_determinism_mode(
|
|
processor_handle, clkvalue))
|
|
|
|
|
|
def amdsmi_get_gpu_overdrive_level(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
od_level = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_overdrive_level(
|
|
processor_handle, ctypes.byref(od_level)
|
|
)
|
|
)
|
|
|
|
return od_level.value
|
|
|
|
|
|
def amdsmi_get_gpu_mem_overdrive_level(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
mem_od_level = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_mem_overdrive_level(
|
|
processor_handle, ctypes.byref(mem_od_level)
|
|
)
|
|
)
|
|
|
|
return mem_od_level.value
|
|
|
|
|
|
def amdsmi_get_clk_freq(
|
|
processor_handle: processor_handle_t, clk_type: AmdSmiClkType
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(clk_type, AmdSmiClkType):
|
|
raise AmdSmiParameterException(clk_type, AmdSmiClkType)
|
|
|
|
freq = amdsmi_wrapper.amdsmi_frequencies_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_clk_freq(
|
|
processor_handle, clk_type, ctypes.byref(freq)
|
|
)
|
|
)
|
|
|
|
dict_ret = {
|
|
"num_supported": freq.num_supported,
|
|
"current": freq.current,
|
|
"frequency": list(freq.frequency)[: freq.num_supported],
|
|
}
|
|
return dict_ret
|
|
|
|
|
|
def amdsmi_get_soc_pstate(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
policy = amdsmi_wrapper.amdsmi_dpm_policy_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_soc_pstate(
|
|
processor_handle, ctypes.byref(policy)
|
|
)
|
|
)
|
|
|
|
polices = []
|
|
for i in range(0, policy.num_supported):
|
|
policy_id = policy.policies[i].policy_id
|
|
desc = policy.policies[i].policy_description
|
|
polices.append({
|
|
'policy_id' : policy_id,
|
|
'policy_description': desc.decode()
|
|
})
|
|
current_id = policy.policies[policy.current].policy_id
|
|
|
|
return {
|
|
"num_supported": policy.num_supported,
|
|
"current_id": current_id,
|
|
"policies": polices,
|
|
}
|
|
|
|
|
|
def amdsmi_get_xgmi_plpd(
|
|
processor_handle: processor_handle_t
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
policy = amdsmi_wrapper.amdsmi_dpm_policy_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_xgmi_plpd(processor_handle, ctypes.byref(policy))
|
|
)
|
|
|
|
policies = []
|
|
for i in range(policy.num_supported):
|
|
try:
|
|
# Access the policy entry directly
|
|
policy_entry = policy.policies[i]
|
|
policy_id = policy_entry.policy_id
|
|
|
|
# Handle the policy description more carefully
|
|
policy_desc_bytes = policy_entry.policy_description
|
|
if policy_desc_bytes:
|
|
# Convert ctypes array to bytes and decode
|
|
policy_desc = ctypes.string_at(policy_desc_bytes).decode('utf-8').rstrip('\x00')
|
|
else:
|
|
policy_desc = ""
|
|
|
|
policies.append({
|
|
'policy_id': policy_id,
|
|
'policy_description': policy_desc
|
|
})
|
|
except (UnicodeDecodeError, AttributeError, ValueError):
|
|
# Fallback for problematic entries
|
|
policies.append({
|
|
'policy_id': 0, # Default fallback
|
|
'policy_description': ""
|
|
})
|
|
|
|
# Get current policy ID correctly
|
|
if policy.current < policy.num_supported:
|
|
current_id = policy.policies[policy.current].policy_id
|
|
else:
|
|
current_id = 0 # Fallback
|
|
|
|
return {
|
|
"num_supported": policy.num_supported,
|
|
"current_id": current_id,
|
|
"plpds": policies, # Marked for deprecation
|
|
"policies": policies, # Correct field name
|
|
}
|
|
|
|
def amdsmi_get_gpu_process_isolation(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
pisolate = ctypes.c_uint32()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_process_isolation(
|
|
processor_handle, ctypes.byref(pisolate)
|
|
)
|
|
)
|
|
|
|
return pisolate.value
|
|
|
|
|
|
def amdsmi_get_gpu_od_volt_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
freq_data = amdsmi_wrapper.amdsmi_od_volt_freq_data_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_od_volt_info(
|
|
processor_handle, ctypes.byref(freq_data)
|
|
)
|
|
)
|
|
|
|
sclk_lower = freq_data.curr_sclk_range.lower_bound
|
|
sclk_upper = freq_data.curr_sclk_range.upper_bound
|
|
mclk_lower = freq_data.curr_mclk_range.lower_bound
|
|
mclk_upper = freq_data.curr_mclk_range.upper_bound
|
|
|
|
if sclk_lower == MaxUIntegerTypes.UINT64_T:
|
|
sclk_lower = "N/A"
|
|
if sclk_upper == MaxUIntegerTypes.UINT64_T:
|
|
sclk_upper = "N/A"
|
|
if mclk_lower == MaxUIntegerTypes.UINT64_T:
|
|
mclk_lower = "N/A"
|
|
if mclk_upper == MaxUIntegerTypes.UINT64_T:
|
|
mclk_upper = "N/A"
|
|
|
|
return {
|
|
"curr_sclk_range": {
|
|
"lower_bound": sclk_lower,
|
|
"upper_bound": sclk_upper
|
|
},
|
|
"curr_mclk_range": {
|
|
"lower_bound": mclk_lower,
|
|
"upper_bound": mclk_upper
|
|
},
|
|
"sclk_freq_limits": {
|
|
"lower_bound": freq_data.sclk_freq_limits.lower_bound,
|
|
"upper_bound": freq_data.sclk_freq_limits.upper_bound
|
|
},
|
|
"mclk_freq_limits": {
|
|
"lower_bound": freq_data.mclk_freq_limits.lower_bound,
|
|
"upper_bound": freq_data.mclk_freq_limits.upper_bound
|
|
},
|
|
"curve.vc_points": [
|
|
{
|
|
"frequency": freq_data.curve.vc_points[0].frequency,
|
|
"voltage": freq_data.curve.vc_points[0].voltage
|
|
},
|
|
{
|
|
"frequency": freq_data.curve.vc_points[1].frequency,
|
|
"voltage": freq_data.curve.vc_points[1].voltage
|
|
},
|
|
{
|
|
"frequency": freq_data.curve.vc_points[2].frequency,
|
|
"voltage": freq_data.curve.vc_points[2].voltage
|
|
}
|
|
],
|
|
"num_regions": freq_data.num_regions
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_metrics_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
gpu_metrics = amdsmi_wrapper.amdsmi_gpu_metrics_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_metrics_info(
|
|
processor_handle, ctypes.byref(gpu_metrics)
|
|
)
|
|
)
|
|
|
|
gpu_metrics_output = {
|
|
"common_header.structure_size": _validate_if_max_uint(gpu_metrics.common_header.structure_size, MaxUIntegerTypes.UINT16_T),
|
|
"common_header.format_revision": _validate_if_max_uint(gpu_metrics.common_header.format_revision, MaxUIntegerTypes.UINT8_T),
|
|
"common_header.content_revision": _validate_if_max_uint(gpu_metrics.common_header.content_revision, MaxUIntegerTypes.UINT8_T),
|
|
"temperature_edge": _validate_if_max_uint(gpu_metrics.temperature_edge, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_hotspot": _validate_if_max_uint(gpu_metrics.temperature_hotspot, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_mem": _validate_if_max_uint(gpu_metrics.temperature_mem, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_vrgfx": _validate_if_max_uint(gpu_metrics.temperature_vrgfx, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_vrsoc": _validate_if_max_uint(gpu_metrics.temperature_vrsoc, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_vrmem": _validate_if_max_uint(gpu_metrics.temperature_vrmem, MaxUIntegerTypes.UINT16_T),
|
|
"average_gfx_activity": _validate_if_max_uint(gpu_metrics.average_gfx_activity, MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"average_umc_activity": _validate_if_max_uint(gpu_metrics.average_umc_activity, MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"average_mm_activity": _validate_if_max_uint(gpu_metrics.average_mm_activity, MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"average_socket_power": _validate_if_max_uint(gpu_metrics.average_socket_power, MaxUIntegerTypes.UINT16_T),
|
|
"energy_accumulator": _validate_if_max_uint(gpu_metrics.energy_accumulator, MaxUIntegerTypes.UINT64_T),
|
|
"system_clock_counter": _validate_if_max_uint(gpu_metrics.system_clock_counter, MaxUIntegerTypes.UINT64_T),
|
|
"average_gfxclk_frequency": _validate_if_max_uint(gpu_metrics.average_gfxclk_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_socclk_frequency": _validate_if_max_uint(gpu_metrics.average_socclk_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_uclk_frequency": _validate_if_max_uint(gpu_metrics.average_uclk_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_vclk0_frequency": _validate_if_max_uint(gpu_metrics.average_vclk0_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_dclk0_frequency": _validate_if_max_uint(gpu_metrics.average_dclk0_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_vclk1_frequency": _validate_if_max_uint(gpu_metrics.average_vclk1_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_dclk1_frequency": _validate_if_max_uint(gpu_metrics.average_dclk1_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"current_gfxclk": _validate_if_max_uint(gpu_metrics.current_gfxclk, MaxUIntegerTypes.UINT16_T),
|
|
"current_socclk": _validate_if_max_uint(gpu_metrics.current_socclk, MaxUIntegerTypes.UINT16_T),
|
|
"current_uclk": _validate_if_max_uint(gpu_metrics.current_uclk, MaxUIntegerTypes.UINT16_T),
|
|
"current_vclk0": _validate_if_max_uint(gpu_metrics.current_vclk0, MaxUIntegerTypes.UINT16_T),
|
|
"current_dclk0": _validate_if_max_uint(gpu_metrics.current_dclk0, MaxUIntegerTypes.UINT16_T),
|
|
"current_vclk1": _validate_if_max_uint(gpu_metrics.current_vclk1, MaxUIntegerTypes.UINT16_T),
|
|
"current_dclk1": _validate_if_max_uint(gpu_metrics.current_dclk1, MaxUIntegerTypes.UINT16_T),
|
|
"throttle_status": _validate_if_max_uint(gpu_metrics.throttle_status, MaxUIntegerTypes.UINT32_T, isBool=True),
|
|
"current_fan_speed": _validate_if_max_uint(gpu_metrics.current_fan_speed, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_link_width": _validate_if_max_uint(gpu_metrics.pcie_link_width, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_link_speed": _validate_if_max_uint(gpu_metrics.pcie_link_speed, MaxUIntegerTypes.UINT16_T),
|
|
"gfx_activity_acc": _validate_if_max_uint(gpu_metrics.gfx_activity_acc, MaxUIntegerTypes.UINT32_T),
|
|
"mem_activity_acc": _validate_if_max_uint(gpu_metrics.mem_activity_acc, MaxUIntegerTypes.UINT32_T),
|
|
"temperature_hbm": _validate_if_max_uint(list(gpu_metrics.temperature_hbm), MaxUIntegerTypes.UINT16_T),
|
|
"firmware_timestamp": _validate_if_max_uint(gpu_metrics.firmware_timestamp, MaxUIntegerTypes.UINT64_T),
|
|
"voltage_soc": _validate_if_max_uint(gpu_metrics.voltage_soc, MaxUIntegerTypes.UINT16_T),
|
|
"voltage_gfx": _validate_if_max_uint(gpu_metrics.voltage_gfx, MaxUIntegerTypes.UINT16_T),
|
|
"voltage_mem": _validate_if_max_uint(gpu_metrics.voltage_mem, MaxUIntegerTypes.UINT16_T),
|
|
"indep_throttle_status": _validate_if_max_uint(gpu_metrics.indep_throttle_status, MaxUIntegerTypes.UINT64_T, isBool=True),
|
|
"current_socket_power": _validate_if_max_uint(gpu_metrics.current_socket_power, MaxUIntegerTypes.UINT16_T),
|
|
"vcn_activity": _validate_if_max_uint(list(gpu_metrics.vcn_activity), MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"gfxclk_lock_status": _validate_if_max_uint(gpu_metrics.gfxclk_lock_status, MaxUIntegerTypes.UINT32_T),
|
|
"xgmi_link_width": _validate_if_max_uint(gpu_metrics.xgmi_link_width, MaxUIntegerTypes.UINT16_T),
|
|
"xgmi_link_speed": _validate_if_max_uint(gpu_metrics.xgmi_link_speed, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_bandwidth_acc": _validate_if_max_uint(gpu_metrics.pcie_bandwidth_acc, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_bandwidth_inst": _validate_if_max_uint(gpu_metrics.pcie_bandwidth_inst, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_l0_to_recov_count_acc": _validate_if_max_uint(gpu_metrics.pcie_l0_to_recov_count_acc, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_replay_count_acc": _validate_if_max_uint(gpu_metrics.pcie_replay_count_acc, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_replay_rover_count_acc": _validate_if_max_uint(gpu_metrics.pcie_replay_rover_count_acc, MaxUIntegerTypes.UINT64_T),
|
|
"xgmi_read_data_acc": _validate_if_max_uint(list(gpu_metrics.xgmi_read_data_acc), MaxUIntegerTypes.UINT64_T),
|
|
"xgmi_write_data_acc": _validate_if_max_uint(list(gpu_metrics.xgmi_write_data_acc), MaxUIntegerTypes.UINT64_T),
|
|
"current_gfxclks": _validate_if_max_uint(list(gpu_metrics.current_gfxclks), MaxUIntegerTypes.UINT16_T),
|
|
"current_socclks": _validate_if_max_uint(list(gpu_metrics.current_socclks), MaxUIntegerTypes.UINT16_T),
|
|
"current_vclk0s": _validate_if_max_uint(list(gpu_metrics.current_vclk0s), MaxUIntegerTypes.UINT16_T),
|
|
"current_dclk0s": _validate_if_max_uint(list(gpu_metrics.current_dclk0s), MaxUIntegerTypes.UINT16_T),
|
|
"jpeg_activity": _validate_if_max_uint(list(gpu_metrics.jpeg_activity), MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"pcie_nak_sent_count_acc": _validate_if_max_uint(gpu_metrics.pcie_nak_sent_count_acc, MaxUIntegerTypes.UINT32_T),
|
|
"pcie_nak_rcvd_count_acc": _validate_if_max_uint(gpu_metrics.pcie_nak_rcvd_count_acc, MaxUIntegerTypes.UINT32_T),
|
|
"accumulation_counter": _validate_if_max_uint(gpu_metrics.accumulation_counter, MaxUIntegerTypes.UINT64_T),
|
|
"prochot_residency_acc": _validate_if_max_uint(gpu_metrics.prochot_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"ppt_residency_acc": _validate_if_max_uint(gpu_metrics.ppt_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"socket_thm_residency_acc": _validate_if_max_uint(gpu_metrics.socket_thm_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"vr_thm_residency_acc": _validate_if_max_uint(gpu_metrics.vr_thm_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"hbm_thm_residency_acc": _validate_if_max_uint(gpu_metrics.hbm_thm_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"num_partition": _validate_if_max_uint(gpu_metrics.num_partition, MaxUIntegerTypes.UINT16_T),
|
|
"xcp_stats.gfx_busy_inst": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.jpeg_busy": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.vcn_busy": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_busy_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_ppt_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_thm_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_low_utilization_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_total_acc": list(gpu_metrics.xcp_stats),
|
|
"pcie_lc_perf_other_end_recovery": _validate_if_max_uint(gpu_metrics.pcie_lc_perf_other_end_recovery, MaxUIntegerTypes.UINT32_T),
|
|
"vram_max_bandwidth": _validate_if_max_uint(gpu_metrics.vram_max_bandwidth, MaxUIntegerTypes.UINT64_T),
|
|
"xgmi_link_status": _validate_if_max_uint(list(gpu_metrics.xgmi_link_status), MaxUIntegerTypes.UINT16_T),
|
|
}
|
|
|
|
# Create 2d array with each XCD's stats
|
|
if 'xcp_stats.gfx_busy_inst' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_busy_inst']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_busy_inst:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT32_T, isActivity=True))
|
|
gpu_metrics_output['xcp_stats.gfx_busy_inst'][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.jpeg_busy' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.jpeg_busy']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.jpeg_busy:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT16_T, isActivity=True))
|
|
gpu_metrics_output['xcp_stats.jpeg_busy'][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.vcn_busy' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.vcn_busy']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.vcn_busy:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT16_T, isActivity=True))
|
|
gpu_metrics_output["xcp_stats.vcn_busy"][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.gfx_busy_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_busy_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_busy_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output["xcp_stats.gfx_busy_acc"][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.gfx_below_host_limit_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_acc'][xcp_index] = xcp_detail
|
|
# new for gpu metrics v1.8
|
|
if 'xcp_stats.gfx_below_host_limit_ppt_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_ppt_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_ppt_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_ppt_acc'][xcp_index] = xcp_detail
|
|
if 'xcp_stats.gfx_below_host_limit_thm_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_thm_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_thm_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_thm_acc'][xcp_index] = xcp_detail
|
|
if 'xcp_stats.gfx_low_utilization_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_low_utilization_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_low_utilization_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_low_utilization_acc'][xcp_index] = xcp_detail
|
|
if 'xcp_stats.gfx_below_host_limit_total_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_total_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_total_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_total_acc'][xcp_index] = xcp_detail
|
|
return gpu_metrics_output
|
|
|
|
def amdsmi_get_gpu_partition_metrics_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
gpu_metrics = amdsmi_wrapper.amdsmi_gpu_metrics_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_partition_metrics_info(
|
|
processor_handle, ctypes.byref(gpu_metrics)
|
|
)
|
|
)
|
|
|
|
gpu_metrics_output = {
|
|
"common_header.structure_size": _validate_if_max_uint(gpu_metrics.common_header.structure_size, MaxUIntegerTypes.UINT16_T),
|
|
"common_header.format_revision": _validate_if_max_uint(gpu_metrics.common_header.format_revision, MaxUIntegerTypes.UINT8_T),
|
|
"common_header.content_revision": _validate_if_max_uint(gpu_metrics.common_header.content_revision, MaxUIntegerTypes.UINT8_T),
|
|
"temperature_edge": _validate_if_max_uint(gpu_metrics.temperature_edge, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_hotspot": _validate_if_max_uint(gpu_metrics.temperature_hotspot, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_mem": _validate_if_max_uint(gpu_metrics.temperature_mem, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_vrgfx": _validate_if_max_uint(gpu_metrics.temperature_vrgfx, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_vrsoc": _validate_if_max_uint(gpu_metrics.temperature_vrsoc, MaxUIntegerTypes.UINT16_T),
|
|
"temperature_vrmem": _validate_if_max_uint(gpu_metrics.temperature_vrmem, MaxUIntegerTypes.UINT16_T),
|
|
"average_gfx_activity": _validate_if_max_uint(gpu_metrics.average_gfx_activity, MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"average_umc_activity": _validate_if_max_uint(gpu_metrics.average_umc_activity, MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"average_mm_activity": _validate_if_max_uint(gpu_metrics.average_mm_activity, MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"average_socket_power": _validate_if_max_uint(gpu_metrics.average_socket_power, MaxUIntegerTypes.UINT16_T),
|
|
"energy_accumulator": _validate_if_max_uint(gpu_metrics.energy_accumulator, MaxUIntegerTypes.UINT64_T),
|
|
"system_clock_counter": _validate_if_max_uint(gpu_metrics.system_clock_counter, MaxUIntegerTypes.UINT64_T),
|
|
"average_gfxclk_frequency": _validate_if_max_uint(gpu_metrics.average_gfxclk_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_socclk_frequency": _validate_if_max_uint(gpu_metrics.average_socclk_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_uclk_frequency": _validate_if_max_uint(gpu_metrics.average_uclk_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_vclk0_frequency": _validate_if_max_uint(gpu_metrics.average_vclk0_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_dclk0_frequency": _validate_if_max_uint(gpu_metrics.average_dclk0_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_vclk1_frequency": _validate_if_max_uint(gpu_metrics.average_vclk1_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"average_dclk1_frequency": _validate_if_max_uint(gpu_metrics.average_dclk1_frequency, MaxUIntegerTypes.UINT16_T),
|
|
"current_gfxclk": _validate_if_max_uint(gpu_metrics.current_gfxclk, MaxUIntegerTypes.UINT16_T),
|
|
"current_socclk": _validate_if_max_uint(gpu_metrics.current_socclk, MaxUIntegerTypes.UINT16_T),
|
|
"current_uclk": _validate_if_max_uint(gpu_metrics.current_uclk, MaxUIntegerTypes.UINT16_T),
|
|
"current_vclk0": _validate_if_max_uint(gpu_metrics.current_vclk0, MaxUIntegerTypes.UINT16_T),
|
|
"current_dclk0": _validate_if_max_uint(gpu_metrics.current_dclk0, MaxUIntegerTypes.UINT16_T),
|
|
"current_vclk1": _validate_if_max_uint(gpu_metrics.current_vclk1, MaxUIntegerTypes.UINT16_T),
|
|
"current_dclk1": _validate_if_max_uint(gpu_metrics.current_dclk1, MaxUIntegerTypes.UINT16_T),
|
|
"throttle_status": _validate_if_max_uint(gpu_metrics.throttle_status, MaxUIntegerTypes.UINT32_T, isBool=True),
|
|
"current_fan_speed": _validate_if_max_uint(gpu_metrics.current_fan_speed, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_link_width": _validate_if_max_uint(gpu_metrics.pcie_link_width, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_link_speed": _validate_if_max_uint(gpu_metrics.pcie_link_speed, MaxUIntegerTypes.UINT16_T),
|
|
"gfx_activity_acc": _validate_if_max_uint(gpu_metrics.gfx_activity_acc, MaxUIntegerTypes.UINT32_T),
|
|
"mem_activity_acc": _validate_if_max_uint(gpu_metrics.mem_activity_acc, MaxUIntegerTypes.UINT32_T),
|
|
"temperature_hbm": _validate_if_max_uint(list(gpu_metrics.temperature_hbm), MaxUIntegerTypes.UINT16_T),
|
|
"firmware_timestamp": _validate_if_max_uint(gpu_metrics.firmware_timestamp, MaxUIntegerTypes.UINT64_T),
|
|
"voltage_soc": _validate_if_max_uint(gpu_metrics.voltage_soc, MaxUIntegerTypes.UINT16_T),
|
|
"voltage_gfx": _validate_if_max_uint(gpu_metrics.voltage_gfx, MaxUIntegerTypes.UINT16_T),
|
|
"voltage_mem": _validate_if_max_uint(gpu_metrics.voltage_mem, MaxUIntegerTypes.UINT16_T),
|
|
"indep_throttle_status": _validate_if_max_uint(gpu_metrics.indep_throttle_status, MaxUIntegerTypes.UINT64_T, isBool=True),
|
|
"current_socket_power": _validate_if_max_uint(gpu_metrics.current_socket_power, MaxUIntegerTypes.UINT16_T),
|
|
"vcn_activity": _validate_if_max_uint(list(gpu_metrics.vcn_activity), MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"gfxclk_lock_status": _validate_if_max_uint(gpu_metrics.gfxclk_lock_status, MaxUIntegerTypes.UINT32_T),
|
|
"xgmi_link_width": _validate_if_max_uint(gpu_metrics.xgmi_link_width, MaxUIntegerTypes.UINT16_T),
|
|
"xgmi_link_speed": _validate_if_max_uint(gpu_metrics.xgmi_link_speed, MaxUIntegerTypes.UINT16_T),
|
|
"pcie_bandwidth_acc": _validate_if_max_uint(gpu_metrics.pcie_bandwidth_acc, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_bandwidth_inst": _validate_if_max_uint(gpu_metrics.pcie_bandwidth_inst, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_l0_to_recov_count_acc": _validate_if_max_uint(gpu_metrics.pcie_l0_to_recov_count_acc, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_replay_count_acc": _validate_if_max_uint(gpu_metrics.pcie_replay_count_acc, MaxUIntegerTypes.UINT64_T),
|
|
"pcie_replay_rover_count_acc": _validate_if_max_uint(gpu_metrics.pcie_replay_rover_count_acc, MaxUIntegerTypes.UINT64_T),
|
|
"xgmi_read_data_acc": _validate_if_max_uint(list(gpu_metrics.xgmi_read_data_acc), MaxUIntegerTypes.UINT64_T),
|
|
"xgmi_write_data_acc": _validate_if_max_uint(list(gpu_metrics.xgmi_write_data_acc), MaxUIntegerTypes.UINT64_T),
|
|
"current_gfxclks": _validate_if_max_uint(list(gpu_metrics.current_gfxclks), MaxUIntegerTypes.UINT16_T),
|
|
"current_socclks": _validate_if_max_uint(list(gpu_metrics.current_socclks), MaxUIntegerTypes.UINT16_T),
|
|
"current_vclk0s": _validate_if_max_uint(list(gpu_metrics.current_vclk0s), MaxUIntegerTypes.UINT16_T),
|
|
"current_dclk0s": _validate_if_max_uint(list(gpu_metrics.current_dclk0s), MaxUIntegerTypes.UINT16_T),
|
|
"jpeg_activity": _validate_if_max_uint(list(gpu_metrics.jpeg_activity), MaxUIntegerTypes.UINT16_T, isActivity=True),
|
|
"pcie_nak_sent_count_acc": _validate_if_max_uint(gpu_metrics.pcie_nak_sent_count_acc, MaxUIntegerTypes.UINT32_T),
|
|
"pcie_nak_rcvd_count_acc": _validate_if_max_uint(gpu_metrics.pcie_nak_rcvd_count_acc, MaxUIntegerTypes.UINT32_T),
|
|
"accumulation_counter": _validate_if_max_uint(gpu_metrics.accumulation_counter, MaxUIntegerTypes.UINT64_T),
|
|
"prochot_residency_acc": _validate_if_max_uint(gpu_metrics.prochot_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"ppt_residency_acc": _validate_if_max_uint(gpu_metrics.ppt_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"socket_thm_residency_acc": _validate_if_max_uint(gpu_metrics.socket_thm_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"vr_thm_residency_acc": _validate_if_max_uint(gpu_metrics.vr_thm_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"hbm_thm_residency_acc": _validate_if_max_uint(gpu_metrics.hbm_thm_residency_acc, MaxUIntegerTypes.UINT64_T),
|
|
"num_partition": _validate_if_max_uint(gpu_metrics.num_partition, MaxUIntegerTypes.UINT16_T),
|
|
"xcp_stats.gfx_busy_inst": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.jpeg_busy": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.vcn_busy": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_busy_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_ppt_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_thm_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_low_utilization_acc": list(gpu_metrics.xcp_stats),
|
|
"xcp_stats.gfx_below_host_limit_total_acc": list(gpu_metrics.xcp_stats),
|
|
"pcie_lc_perf_other_end_recovery": _validate_if_max_uint(gpu_metrics.pcie_lc_perf_other_end_recovery, MaxUIntegerTypes.UINT32_T),
|
|
"vram_max_bandwidth": _validate_if_max_uint(gpu_metrics.vram_max_bandwidth, MaxUIntegerTypes.UINT64_T),
|
|
"xgmi_link_status": _validate_if_max_uint(list(gpu_metrics.xgmi_link_status), MaxUIntegerTypes.UINT16_T),
|
|
}
|
|
|
|
# Create 2d array with each XCD's stats
|
|
if 'xcp_stats.gfx_busy_inst' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_busy_inst']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_busy_inst:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT32_T, isActivity=True))
|
|
gpu_metrics_output['xcp_stats.gfx_busy_inst'][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.jpeg_busy' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.jpeg_busy']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.jpeg_busy:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT16_T, isActivity=True))
|
|
gpu_metrics_output['xcp_stats.jpeg_busy'][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.vcn_busy' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.vcn_busy']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.vcn_busy:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT16_T, isActivity=True))
|
|
gpu_metrics_output["xcp_stats.vcn_busy"][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.gfx_busy_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_busy_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_busy_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output["xcp_stats.gfx_busy_acc"][xcp_index] = xcp_detail
|
|
|
|
if 'xcp_stats.gfx_below_host_limit_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_acc'][xcp_index] = xcp_detail
|
|
# new for gpu metrics v1.8
|
|
if 'xcp_stats.gfx_below_host_limit_ppt_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_ppt_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_ppt_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_ppt_acc'][xcp_index] = xcp_detail
|
|
if 'xcp_stats.gfx_below_host_limit_thm_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_thm_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_thm_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_thm_acc'][xcp_index] = xcp_detail
|
|
if 'xcp_stats.gfx_low_utilization_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_low_utilization_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_low_utilization_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_low_utilization_acc'][xcp_index] = xcp_detail
|
|
if 'xcp_stats.gfx_below_host_limit_total_acc' in gpu_metrics_output:
|
|
for xcp_index, xcp_metrics in enumerate(gpu_metrics_output['xcp_stats.gfx_below_host_limit_total_acc']):
|
|
xcp_detail = []
|
|
for val in xcp_metrics.gfx_below_host_limit_total_acc:
|
|
xcp_detail.append(_validate_if_max_uint(val, MaxUIntegerTypes.UINT64_T))
|
|
gpu_metrics_output['xcp_stats.gfx_below_host_limit_total_acc'][xcp_index] = xcp_detail
|
|
return gpu_metrics_output
|
|
|
|
|
|
def amdsmi_get_gpu_od_volt_curve_regions(
|
|
processor_handle: processor_handle_t, num_regions: int
|
|
) -> List[Dict[str, Any]]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(num_regions, int):
|
|
raise AmdSmiParameterException(num_regions, int)
|
|
|
|
region_count = ctypes.c_uint32(num_regions)
|
|
buffer = (amdsmi_wrapper.amdsmi_freq_volt_region_t * num_regions)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_od_volt_curve_regions(
|
|
processor_handle, ctypes.byref(region_count), buffer
|
|
)
|
|
)
|
|
|
|
result = []
|
|
|
|
for index in range(region_count.value):
|
|
result.extend(
|
|
[
|
|
{
|
|
"freq_range": {
|
|
"lower_bound": buffer[index].freq_range.lower_bound,
|
|
"upper_bound": buffer[index].freq_range.upper_bound,
|
|
},
|
|
"volt_range": {
|
|
"lower_bound": buffer[index].volt_range.lower_bound,
|
|
"upper_bound": buffer[index].volt_range.upper_bound,
|
|
},
|
|
}
|
|
]
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
def amdsmi_get_gpu_power_profile_presets(
|
|
processor_handle: processor_handle_t, sensor_idx: int
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(sensor_idx, int):
|
|
raise AmdSmiParameterException(sensor_idx, int)
|
|
|
|
status = amdsmi_wrapper.amdsmi_power_profile_status_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_power_profile_presets(
|
|
processor_handle, sensor_idx, ctypes.byref(status)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"available_profiles": status.available_profiles,
|
|
"current": status.current,
|
|
"num_profiles": status.num_profiles,
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_ecc_count(
|
|
processor_handle: processor_handle_t, block: AmdSmiGpuBlock
|
|
) -> Dict[str, int]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(block, AmdSmiGpuBlock):
|
|
raise AmdSmiParameterException(block, AmdSmiGpuBlock)
|
|
|
|
ec = amdsmi_wrapper.amdsmi_error_count_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ecc_count(
|
|
processor_handle, block, ctypes.byref(ec))
|
|
)
|
|
|
|
return {
|
|
"correctable_count": ec.correctable_count,
|
|
"uncorrectable_count": ec.uncorrectable_count,
|
|
"deferred_count": ec.deferred_count,
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_ecc_enabled(
|
|
processor_handle: processor_handle_t,
|
|
) -> int:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
blocks = ctypes.c_uint64(0)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ecc_enabled(
|
|
processor_handle, ctypes.byref(blocks))
|
|
)
|
|
|
|
return blocks.value
|
|
|
|
|
|
def amdsmi_get_gpu_ecc_status(
|
|
processor_handle: processor_handle_t, block: AmdSmiGpuBlock
|
|
) -> AmdSmiRasErrState:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(block, AmdSmiGpuBlock):
|
|
raise AmdSmiParameterException(block, AmdSmiGpuBlock)
|
|
|
|
state = amdsmi_wrapper.amdsmi_ras_err_state_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ecc_status(
|
|
processor_handle, block, ctypes.byref(state)
|
|
)
|
|
)
|
|
|
|
return AmdSmiRasErrState(state.value)
|
|
|
|
|
|
def amdsmi_status_code_to_string(status: amdsmi_wrapper.amdsmi_status_t) -> Union[str, bytes, None]:
|
|
if not isinstance(status, amdsmi_wrapper.amdsmi_status_t):
|
|
raise AmdSmiParameterException(status, amdsmi_wrapper.amdsmi_status_t)
|
|
|
|
status_string_p_p = ctypes.pointer(ctypes.pointer(ctypes.c_char()))
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_status_code_to_string(
|
|
status, status_string_p_p))
|
|
|
|
return amdsmi_wrapper.string_cast(status_string_p_p.contents)
|
|
|
|
|
|
def amdsmi_get_gpu_compute_process_info() -> List[Dict[str, int]]:
|
|
num_items = ctypes.c_uint32(0)
|
|
nullptr = POINTER(amdsmi_wrapper.amdsmi_process_info_t)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_compute_process_info(
|
|
nullptr, ctypes.byref(num_items))
|
|
)
|
|
|
|
procs = (amdsmi_wrapper.amdsmi_process_info_t * num_items.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_compute_process_info(
|
|
procs, ctypes.byref(num_items))
|
|
)
|
|
|
|
return [
|
|
{
|
|
"process_id": proc.process_id,
|
|
"vram_usage": proc.vram_usage,
|
|
"sdma_usage": proc.sdma_usage,
|
|
"cu_occupancy": proc.cu_occupancy,
|
|
"evicted_time": proc.evicted_time,
|
|
}
|
|
for proc in procs
|
|
]
|
|
|
|
|
|
def amdsmi_get_gpu_compute_process_info_by_pid(pid: int) -> Dict[str, int]:
|
|
if not isinstance(pid, int):
|
|
raise AmdSmiParameterException(pid, int)
|
|
|
|
proc = amdsmi_wrapper.amdsmi_process_info_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_compute_process_info_by_pid(
|
|
ctypes.c_uint32(pid), ctypes.byref(proc)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"process_id": proc.process_id,
|
|
"vram_usage": proc.vram_usage,
|
|
"sdma_usage": proc.sdma_usage,
|
|
"cu_occupancy": proc.cu_occupancy,
|
|
"evicted_time": proc.evicted_time,
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_compute_process_gpus(pid: int) -> List[int]:
|
|
if not isinstance(pid, int):
|
|
raise AmdSmiParameterException(pid, int)
|
|
|
|
num_devices = ctypes.c_uint32(0)
|
|
nullptr = POINTER(ctypes.c_uint32)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_compute_process_gpus(
|
|
pid, nullptr, ctypes.byref(num_devices)
|
|
)
|
|
)
|
|
|
|
dv_indices = (ctypes.c_uint32 * num_devices.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_compute_process_gpus(
|
|
pid, dv_indices, ctypes.byref(num_devices)
|
|
)
|
|
)
|
|
|
|
return [dv_index.value for dv_index in dv_indices]
|
|
|
|
|
|
def amdsmi_gpu_xgmi_error_status(
|
|
processor_handle: processor_handle_t,
|
|
) -> AmdSmiXgmiStatus:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
status = amdsmi_wrapper.amdsmi_xgmi_status_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_gpu_xgmi_error_status(
|
|
processor_handle, ctypes.byref(status))
|
|
)
|
|
|
|
#return AmdSmiXgmiStatus(status.value).value
|
|
return AmdSmiXgmiStatus(status.value)
|
|
|
|
|
|
def amdsmi_reset_gpu_xgmi_error(
|
|
processor_handle: processor_handle_t,
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_reset_gpu_xgmi_error(processor_handle))
|
|
|
|
|
|
def amdsmi_get_gpu_memory_reserved_pages(
|
|
processor_handle: processor_handle_t,
|
|
) -> Union[list, str]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
num_pages = ctypes.c_uint32()
|
|
nullptr = POINTER(amdsmi_wrapper.amdsmi_retired_page_record_t)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_memory_reserved_pages(
|
|
processor_handle, ctypes.byref(num_pages), nullptr
|
|
)
|
|
)
|
|
|
|
if num_pages.value == 0:
|
|
return []
|
|
|
|
mem_reserved_pages = (amdsmi_wrapper.amdsmi_retired_page_record_t * num_pages.value)()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_memory_reserved_pages(
|
|
processor_handle, ctypes.byref(num_pages), mem_reserved_pages
|
|
)
|
|
)
|
|
|
|
return _format_bad_page_info(mem_reserved_pages, num_pages)
|
|
|
|
|
|
def amdsmi_get_gpu_metrics_header_info(
|
|
processor_handle: processor_handle_t,
|
|
) -> Dict[str, int]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
header_info = amdsmi_wrapper.amd_metrics_table_header_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_metrics_header_info(
|
|
processor_handle, ctypes.byref(header_info)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"structure_size": header_info.structure_size,
|
|
"format_revision": header_info.format_revision,
|
|
"content_revision": header_info.content_revision
|
|
}
|
|
|
|
|
|
def amdsmi_get_link_topology_nearest(
|
|
processor_handle: processor_handle_t,
|
|
link_type: AmdSmiLinkType,
|
|
)-> Dict[str, Any]:
|
|
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
if not isinstance(link_type, AmdSmiLinkType):
|
|
raise AmdSmiParameterException(link_type, AmdSmiLinkType)
|
|
|
|
topology_nearest_list = amdsmi_wrapper.amdsmi_topology_nearest_t()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_link_topology_nearest(
|
|
processor_handle,
|
|
link_type,
|
|
ctypes.byref(topology_nearest_list)
|
|
)
|
|
)
|
|
|
|
device_list = []
|
|
for index in range(topology_nearest_list.count):
|
|
device_list.append(topology_nearest_list.processor_list[index])
|
|
|
|
return {
|
|
'processor_list': device_list
|
|
}
|
|
|
|
|
|
def amdsmi_get_gpu_virtualization_mode(
|
|
processor_handle: processor_handle_t
|
|
) -> Dict[str, AmdSmiVirtualizationMode]:
|
|
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
# make info struct here
|
|
mode = amdsmi_wrapper.amdsmi_virtualization_mode_t()
|
|
|
|
# call lib function here
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_virtualization_mode(
|
|
processor_handle,
|
|
ctypes.byref(mode)
|
|
)
|
|
)
|
|
|
|
return {
|
|
"mode": AmdSmiVirtualizationMode(mode.value)
|
|
}
|
|
|
|
def amdsmi_get_gpu_ptl_state(
|
|
processor_handle: processor_handle_t
|
|
) -> bool:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
|
|
is_ptl_enabled = ctypes.c_bool()
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ptl_state(
|
|
processor_handle, ctypes.byref(is_ptl_enabled)
|
|
)
|
|
)
|
|
|
|
return is_ptl_enabled.value
|
|
|
|
def amdsmi_set_gpu_ptl_state(
|
|
processor_handle: processor_handle_t,
|
|
state: int
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_ptl_state(
|
|
processor_handle, state
|
|
)
|
|
)
|
|
|
|
def amdsmi_get_gpu_ptl_formats(
|
|
processor_handle: processor_handle_t
|
|
) -> Tuple[int, int]:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
data_format1 = amdsmi_wrapper.amdsmi_ptl_data_format_t()
|
|
data_format2 = amdsmi_wrapper.amdsmi_ptl_data_format_t()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_ptl_formats(
|
|
processor_handle, ctypes.byref(data_format1), ctypes.byref(data_format2)
|
|
)
|
|
)
|
|
|
|
return int(data_format1.value), int(data_format2.value)
|
|
|
|
def amdsmi_set_gpu_ptl_formats(
|
|
processor_handle: processor_handle_t,
|
|
fmt1: AmdSmiPtlData,
|
|
fmt2: AmdSmiPtlData,
|
|
) -> None:
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
for fmt in (fmt1, fmt2):
|
|
if not isinstance(fmt, AmdSmiPtlData):
|
|
raise AmdSmiParameterException(fmt, AmdSmiPtlData)
|
|
if fmt is AmdSmiPtlData.INVALID:
|
|
raise AmdSmiParameterException(fmt, "A valid PTL data format (not INVALID)")
|
|
|
|
c_fmt1 = amdsmi_wrapper.amdsmi_ptl_data_format_t(int(fmt1))
|
|
c_fmt2 = amdsmi_wrapper.amdsmi_ptl_data_format_t(int(fmt2))
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_set_gpu_ptl_formats(
|
|
processor_handle, c_fmt1, c_fmt2)
|
|
)
|
|
|
|
### Non C-Lib APIs ###
|
|
|
|
def amdsmi_get_rocm_version()-> Tuple[bool, str]:
|
|
"""
|
|
Get the ROCm version for the rocm-core library.
|
|
|
|
This function attempts to retrieve the ROCm version by loading the `librocm-core.so` shared library
|
|
and calling its `getROCmVersion` function. The version is returned as a string in the format "major.minor.patch".
|
|
|
|
Returns:
|
|
Tuple[bool, str]: A tuple containing a boolean and a string.
|
|
- The boolean indicates whether the operation was successful.
|
|
- The string contains the ROCm version if successful, or an error message if not.
|
|
|
|
Raises:
|
|
Exception: If there is an error loading the shared library or calling the function.
|
|
|
|
Example:
|
|
rocm_lib_status, version_message = amdsmi_get_rocm_version()
|
|
if rocm_lib_status:
|
|
print(f"ROCm version: {version_message}")
|
|
else:
|
|
print(f"Error: {version_message}")
|
|
"""
|
|
# librocm-core.so can be located in found using several different methods.
|
|
# Look for it with below priority:
|
|
# 1. ROCM_HOME/ROCM_PATH environment variables
|
|
# - ROCM_HOME/lib
|
|
# - ROCM_PATH/lib (usually set to /opt/rocm/)
|
|
# 2. Decided by the linker
|
|
# - LD_LIBRARY_PATH env var
|
|
# - defined path in /etc/ld.so.conf.d/
|
|
# 3. Relative to amdsmi_wrapper.py in /opt/rocm/share/amd_smi
|
|
# - parent directory
|
|
|
|
try:
|
|
possible_locations = list()
|
|
# 0. Relative to amdsmi_interface.py in TheRock:
|
|
# `amdsmi_interface.py` is located in
|
|
# `_rocm_sdk_core/share/amd_smi/amdsmi`, libraries are in
|
|
# `_rocm_sdk_core/lib`.
|
|
librocm_core_path = Path(__file__).resolve().parent.parent.parent.parent / "lib/librocm-core.so.1"
|
|
possible_locations.append(librocm_core_path)
|
|
# 1.
|
|
rocm_path = os.getenv("ROCM_HOME", os.getenv("ROCM_PATH"))
|
|
if rocm_path:
|
|
possible_locations.append(os.path.join(rocm_path, "lib/librocm-core.so"))
|
|
|
|
# Check if /opt/rocm/lib/librocm-core.so exists and add it to the list
|
|
if os.path.exists("/opt/rocm/lib/librocm-core.so"):
|
|
possible_locations.append("/opt/rocm/lib/librocm-core.so")
|
|
# 2.
|
|
possible_locations.append("librocm-core.so")
|
|
# 3.
|
|
librocm_core_parent_dir = Path(__file__).resolve().parent.parent.parent / "lib" / "librocm-core.so"
|
|
possible_locations.append(librocm_core_parent_dir)
|
|
|
|
for librocm_core_file_path in possible_locations:
|
|
try:
|
|
librocm_core = ctypes.CDLL(librocm_core_file_path)
|
|
VerErrors = ctypes.c_uint32
|
|
get_rocm_core_version = librocm_core.getROCmVersion
|
|
get_rocm_core_version.restype = VerErrors
|
|
get_rocm_core_version.argtypes = [POINTER(ctypes.c_uint32), POINTER(ctypes.c_uint32),POINTER(ctypes.c_uint32)]
|
|
|
|
# call the function
|
|
major = ctypes.c_uint32()
|
|
minor = ctypes.c_uint32()
|
|
patch = ctypes.c_uint32()
|
|
|
|
if get_rocm_core_version(ctypes.byref(major), ctypes.byref(minor),ctypes.byref(patch)) == 0:
|
|
return True, f"{major.value}.{minor.value}.{patch.value}"
|
|
else:
|
|
return False, "Failed to unpack ROCm version"
|
|
except OSError:
|
|
continue
|
|
|
|
# If we hit here, we were unable to find the librocm-core.so file
|
|
return False, "Could not find librocm-core.so"
|
|
except Exception as e:
|
|
return False, f"Unable to detect ROCm installation, Unknown Error: {e}"
|
|
|
|
|
|
def amdsmi_get_cpu_handles() -> Dict[str, Any]:
|
|
cpu_handles = amdsmi_get_cpusocket_handles()
|
|
return { 'cpu_count': len(cpu_handles), 'processor_handles': cpu_handles }
|
|
|
|
|
|
def amdsmi_get_esmi_err_msg(status: AmdSmiStatus) -> str:
|
|
if not isinstance(status, AmdSmiStatus):
|
|
raise AmdSmiParameterException(status, AmdSmiStatus)
|
|
|
|
# Create a pointer to a pointer to char (char**)
|
|
status_string_p_p = ctypes.pointer(ctypes.pointer(ctypes.c_char()))
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_esmi_err_msg(
|
|
status.value,
|
|
status_string_p_p
|
|
)
|
|
)
|
|
|
|
# Use string_cast helper function if available in wrapper
|
|
if not status_string_p_p.contents:
|
|
pass
|
|
elif hasattr(amdsmi_wrapper, 'string_cast'):
|
|
error_msg = amdsmi_wrapper.string_cast(status_string_p_p.contents)
|
|
if isinstance(error_msg, str):
|
|
return error_msg
|
|
else:
|
|
# Manual string extraction
|
|
error_msg = ctypes.string_at(status_string_p_p.contents).decode('utf-8')
|
|
return error_msg
|
|
return "Unknown error"
|
|
|
|
|
|
def amdsmi_get_gpu_event_notification(
|
|
timeout_ms: int = 1000
|
|
) -> Dict[str, Any]:
|
|
if not isinstance(timeout_ms, int):
|
|
raise AmdSmiParameterException(timeout_ms, int)
|
|
|
|
# Convert timeout to C type
|
|
timeout_ms_c = ctypes.c_int32(timeout_ms)
|
|
|
|
# Initialize output parameters
|
|
num_elem = ctypes.c_uint32(MAX_NUM_PROCESSES)
|
|
num_elem_p = ctypes.pointer(num_elem)
|
|
|
|
# Create array for event notification data
|
|
data_array = (amdsmi_wrapper.amdsmi_evt_notification_data_t * MAX_NUM_PROCESSES)()
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_gpu_event_notification(
|
|
timeout_ms_c, num_elem_p, data_array
|
|
)
|
|
)
|
|
|
|
results = []
|
|
for i in range(num_elem_p.contents.value):
|
|
entry = {
|
|
'processor_handle': data_array[i].processor_handle,
|
|
'event': data_array[i].event,
|
|
'message': data_array[i].message.decode('utf-8') if data_array[i].message else ""
|
|
}
|
|
results.append(entry)
|
|
|
|
result = {
|
|
'num_elem': num_elem_p.contents.value,
|
|
'data': results
|
|
}
|
|
return result
|
|
|
|
|
|
def amdsmi_get_gpu_revision(processor_handle: processor_handle_t) -> str:
|
|
"""
|
|
Get the GPU revision for a given processor handle.
|
|
|
|
Parameters:
|
|
processor_handle (amdsmi_processor_handle): The processor handle for the GPU.
|
|
|
|
Returns:
|
|
str: The GPU revision as a string.
|
|
|
|
Raises:
|
|
AmdSmiParameterException: If the processor handle is invalid.
|
|
AmdSmiLibraryException: If the underlying library call fails.
|
|
"""
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
revision_16 = ctypes.c_uint16()
|
|
_check_res(amdsmi_wrapper.amdsmi_get_gpu_revision(processor_handle, ctypes.byref(revision_16)))
|
|
|
|
return _pad_hex_value(hex(revision_16.value), 2)
|
|
|
|
|
|
def amdsmi_get_processor_count_from_handles(processor_handles_list) -> Dict[str, int]:
|
|
if not isinstance(processor_handles_list, list):
|
|
raise AmdSmiParameterException(processor_handles_list, list)
|
|
|
|
# Convert Python list to C array
|
|
processor_count = len(processor_handles_list)
|
|
processor_handles_array = (amdsmi_wrapper.amdsmi_processor_handle * processor_count)()
|
|
|
|
for i, handle in enumerate(processor_handles_list):
|
|
processor_handles_array[i] = handle
|
|
|
|
processor_count_p = ctypes.pointer(ctypes.c_uint32(processor_count))
|
|
nr_cpusockets = ctypes.pointer(ctypes.c_uint32(0))
|
|
nr_cpucores = ctypes.pointer(ctypes.c_uint32(0))
|
|
nr_gpus = ctypes.pointer(ctypes.c_uint32(0))
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_processor_count_from_handles(
|
|
processor_handles_array, processor_count_p, nr_cpusockets, nr_cpucores, nr_gpus
|
|
)
|
|
)
|
|
|
|
return {
|
|
'nr_cpusockets': nr_cpusockets.contents.value,
|
|
'nr_cpucores': nr_cpucores.contents.value,
|
|
'nr_gpus': nr_gpus.contents.value
|
|
}
|
|
|
|
|
|
def amdsmi_get_processor_handles_by_type(socket_handle: socket_handle_t, processor_type: AmdSmiProcessorType):
|
|
if not isinstance(socket_handle, amdsmi_wrapper.amdsmi_socket_handle):
|
|
raise AmdSmiParameterException(socket_handle, amdsmi_wrapper.amdsmi_socket_handle)
|
|
if not isinstance(processor_type, AmdSmiProcessorType):
|
|
raise AmdSmiParameterException(processor_type, AmdSmiProcessorType)
|
|
|
|
processor_handles = (amdsmi_wrapper.amdsmi_processor_handle * MAX_NUM_PROCESSES)()
|
|
processor_count = ctypes.c_uint32(0)
|
|
ptr_processor_count = ctypes.pointer(processor_count)
|
|
|
|
_check_res(
|
|
amdsmi_wrapper.amdsmi_get_processor_handles_by_type(
|
|
socket_handle, processor_type, processor_handles, ptr_processor_count
|
|
)
|
|
)
|
|
|
|
entry = []
|
|
for i in range(ptr_processor_count.contents.value):
|
|
entry.append(processor_handles[i])
|
|
return {
|
|
'processor_handles': entry,
|
|
'processor_count': ptr_processor_count.contents.value
|
|
}
|
|
|
|
|
|
def amdsmi_gpu_validate_ras_eeprom(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_gpu_validate_ras_eeprom(processor_handle))
|
|
|
|
|
|
def amdsmi_init_gpu_event_notification(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_init_gpu_event_notification(processor_handle))
|
|
|
|
|
|
def amdsmi_set_gpu_event_notification_mask(processor_handle: processor_handle_t, mask: int):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
|
|
if not isinstance(mask, int):
|
|
raise AmdSmiParameterException(mask, int)
|
|
|
|
mask_64 = ctypes.c_uint64(mask)
|
|
|
|
_check_res(amdsmi_wrapper.amdsmi_set_gpu_event_notification_mask(processor_handle, mask_64))
|
|
|
|
|
|
def amdsmi_stop_gpu_event_notification(
|
|
processor_handle: processor_handle_t
|
|
):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(
|
|
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
|
)
|
|
_check_res(amdsmi_wrapper.amdsmi_stop_gpu_event_notification(processor_handle))
|
|
|
|
|
|
def amdsmi_get_gpu_busy_percent(processor_handle: processor_handle_t):
|
|
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
|
raise AmdSmiParameterException(processor_handle, amdsmi_wrapper.amdsmi_processor_handle)
|
|
|
|
gpu_busy_percent = ctypes.c_uint32(0)
|
|
_check_res(amdsmi_wrapper.amdsmi_get_gpu_busy_percent(processor_handle, ctypes.byref(gpu_busy_percent)))
|
|
return gpu_busy_percent.value |