2024-11-14 11:27:38 -06:00
# Copyright (C) Advanced Micro Devices. All rights reserved.
2022-10-11 16:06:32 +02:00
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import ctypes
2025-02-26 05:45:58 -06:00
import math
import os
import re
import sys
2022-11-10 16:18:27 +01:00
from collections . abc import Iterable
2025-06-11 17:19:02 -05:00
from ctypes import POINTER , c_void_p
2025-04-12 01:54:57 -05:00
from enum import IntEnum , Enum
2025-02-26 05:45:58 -06:00
from pathlib import Path
from time import asctime , localtime , time
from typing import Any , Dict , List , Tuple , Union
2022-10-11 16:06:32 +02:00
2023-01-19 15:33:01 +01:00
from . import amdsmi_wrapper
2023-03-17 05:34:24 -05:00
from . amdsmi_exception import *
2025-02-26 05:45:58 -06:00
### Non Library Specific Constants ###
class MaxUIntegerTypes ( IntEnum ) :
UINT8_T = 0xFF
UINT16_T = 0xFFFF
UINT32_T = 0xFFFFFFFF
UINT64_T = 0xFFFFFFFFFFFFFFFF
NO_OF_32BITS = ( sys . getsizeof ( ctypes . c_uint32 ) * 8 )
NO_OF_64BITS = ( sys . getsizeof ( ctypes . c_uint64 ) * 8 )
KILO = math . pow ( 10 , 3 )
2025-10-06 14:50:00 -05:00
socket_handle_t = c_void_p
processor_handle_t = c_void_p
2025-02-26 05:45:58 -06:00
###############################
2022-10-11 16:06:32 +02:00
2023-06-28 17:54:36 +02:00
MAX_NUM_PROCESSES = 1024
2023-11-22 03:32:15 -06:00
# gpu metrics macros defined in amdsmi.h
AMDSMI_NUM_HBM_INSTANCES = 4
AMDSMI_MAX_NUM_VCN = 4
AMDSMI_MAX_NUM_CLKS = 4
AMDSMI_MAX_NUM_XGMI_LINKS = 8
AMDSMI_MAX_NUM_GFX_CLKS = 8
2024-05-07 16:06:25 -05:00
AMDSMI_MAX_AID = 4
AMDSMI_MAX_ENGINES = 8
AMDSMI_MAX_NUM_JPEG = 32
2024-05-21 20:30:16 -05:00
AMDSMI_MAX_NUM_XCC = 8
AMDSMI_MAX_NUM_XCP = 8
2024-05-07 16:06:25 -05:00
2025-05-15 21:49:56 -05:00
# max num afids per cper record
MAX_NUMBER_OF_AFIDS_PER_RECORD = 12
2024-05-07 16:06:25 -05:00
# Max number of DPM policies
AMDSMI_MAX_NUM_PM_POLICIES = 32
# Max supported frequencies
AMDSMI_MAX_NUM_FREQUENCIES = 33
# Max Fan speed
AMDSMI_MAX_FAN_SPEED = 255
# Max Votlage Curve Points
AMDSMI_NUM_VOLTAGE_CURVE_POINTS = 3
# Max size definitions
AMDSMI_MAX_MM_IP_COUNT = 8
2025-05-28 18:31:30 -05:00
AMDSMI_MAX_STRING_LENGTH = 256
2024-05-07 16:06:25 -05:00
AMDSMI_MAX_DEVICES = 32
AMDSMI_MAX_CONTAINER_TYPE = 2
AMDSMI_MAX_CACHE_TYPES = 10
AMDSMI_MAX_NUM_XGMI_PHYSICAL_LINK = 64
AMDSMI_GPU_UUID_SIZE = 38
2025-02-26 05:45:58 -06:00
_AMDSMI_STRING_LENGTH = 80
2023-11-22 03:32:15 -06:00
2025-10-06 14:50:00 -05:00
class AmdSmiStatus ( IntEnum ) :
SUCCESS = amdsmi_wrapper . AMDSMI_STATUS_SUCCESS
INVAL = amdsmi_wrapper . AMDSMI_STATUS_INVAL
NOT_SUPPORTED = amdsmi_wrapper . AMDSMI_STATUS_NOT_SUPPORTED
NOT_YET_IMPLEMENTED = amdsmi_wrapper . AMDSMI_STATUS_NOT_YET_IMPLEMENTED
FAIL_LOAD_MODULE = amdsmi_wrapper . AMDSMI_STATUS_FAIL_LOAD_MODULE
FAIL_LOAD_SYMBOL = amdsmi_wrapper . AMDSMI_STATUS_FAIL_LOAD_SYMBOL
DRM_ERROR = amdsmi_wrapper . AMDSMI_STATUS_DRM_ERROR
API_FAILED = amdsmi_wrapper . AMDSMI_STATUS_API_FAILED
TIMEOUT = amdsmi_wrapper . AMDSMI_STATUS_TIMEOUT
RETRY = amdsmi_wrapper . AMDSMI_STATUS_RETRY
NO_PERM = amdsmi_wrapper . AMDSMI_STATUS_NO_PERM
INTERRUPT = amdsmi_wrapper . AMDSMI_STATUS_INTERRUPT
IO = amdsmi_wrapper . AMDSMI_STATUS_IO
ADDRESS_FAULT = amdsmi_wrapper . AMDSMI_STATUS_ADDRESS_FAULT
FILE_ERROR = amdsmi_wrapper . AMDSMI_STATUS_FILE_ERROR
OUT_OF_RESOURCES = amdsmi_wrapper . AMDSMI_STATUS_OUT_OF_RESOURCES
INTERNAL_EXCEPTION = amdsmi_wrapper . AMDSMI_STATUS_INTERNAL_EXCEPTION
INPUT_OUT_OF_BOUNDS = amdsmi_wrapper . AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS
INIT_ERROR = amdsmi_wrapper . AMDSMI_STATUS_INIT_ERROR
REFCOUNT_OVERFLOW = amdsmi_wrapper . AMDSMI_STATUS_REFCOUNT_OVERFLOW
DIRECTORY_NOT_FOUND = amdsmi_wrapper . AMDSMI_STATUS_DIRECTORY_NOT_FOUND
BUSY = amdsmi_wrapper . AMDSMI_STATUS_BUSY
NOT_FOUND = amdsmi_wrapper . AMDSMI_STATUS_NOT_FOUND
NOT_INIT = amdsmi_wrapper . AMDSMI_STATUS_NOT_INIT
NO_SLOT = amdsmi_wrapper . AMDSMI_STATUS_NO_SLOT
DRIVER_NOT_LOADED = amdsmi_wrapper . AMDSMI_STATUS_DRIVER_NOT_LOADED
MORE_DATA = amdsmi_wrapper . AMDSMI_STATUS_MORE_DATA
NO_DATA = amdsmi_wrapper . AMDSMI_STATUS_NO_DATA
INSUFFICIENT_SIZE = amdsmi_wrapper . AMDSMI_STATUS_INSUFFICIENT_SIZE
UNEXPECTED_SIZE = amdsmi_wrapper . AMDSMI_STATUS_UNEXPECTED_SIZE
UNEXPECTED_DATA = amdsmi_wrapper . AMDSMI_STATUS_UNEXPECTED_DATA
NON_AMD_CPU = amdsmi_wrapper . AMDSMI_STATUS_NON_AMD_CPU
NO_ENERGY_DRV = amdsmi_wrapper . AMDSMI_STATUS_NO_ENERGY_DRV
NO_MSR_DRV = amdsmi_wrapper . AMDSMI_STATUS_NO_MSR_DRV
NO_HSMP_DRV = amdsmi_wrapper . AMDSMI_STATUS_NO_HSMP_DRV
NO_HSMP_SUP = amdsmi_wrapper . AMDSMI_STATUS_NO_HSMP_SUP
NO_HSMP_MSG_SUP = amdsmi_wrapper . AMDSMI_STATUS_NO_HSMP_MSG_SUP
HSMP_TIMEOUT = amdsmi_wrapper . AMDSMI_STATUS_HSMP_TIMEOUT
NO_DRV = amdsmi_wrapper . AMDSMI_STATUS_NO_DRV
FILE_NOT_FOUND = amdsmi_wrapper . AMDSMI_STATUS_FILE_NOT_FOUND
ARG_PTR_NULL = amdsmi_wrapper . AMDSMI_STATUS_ARG_PTR_NULL
AMDGPU_RESTART_ERR = amdsmi_wrapper . AMDSMI_STATUS_AMDGPU_RESTART_ERR
SETTING_UNAVAILABLE = amdsmi_wrapper . AMDSMI_STATUS_SETTING_UNAVAILABLE
CORRUPTED_EEPROM = amdsmi_wrapper . AMDSMI_STATUS_CORRUPTED_EEPROM
MAP_ERROR = amdsmi_wrapper . AMDSMI_STATUS_MAP_ERROR
UNKNOWN_ERROR = amdsmi_wrapper . AMDSMI_STATUS_UNKNOWN_ERROR
2023-11-22 03:32:15 -06:00
2022-10-11 16:06:32 +02:00
class AmdSmiInitFlags ( IntEnum ) :
2023-06-01 14:46:21 +02:00
INIT_ALL_PROCESSORS = amdsmi_wrapper . AMDSMI_INIT_ALL_PROCESSORS
INIT_AMD_CPUS = amdsmi_wrapper . AMDSMI_INIT_AMD_CPUS
INIT_AMD_GPUS = amdsmi_wrapper . AMDSMI_INIT_AMD_GPUS
2023-12-07 07:30:31 -08:00
INIT_AMD_APUS = amdsmi_wrapper . AMDSMI_INIT_AMD_APUS
2023-06-01 14:46:21 +02:00
INIT_NON_AMD_CPUS = amdsmi_wrapper . AMDSMI_INIT_NON_AMD_CPUS
INIT_NON_AMD_GPUS = amdsmi_wrapper . AMDSMI_INIT_NON_AMD_GPUS
2022-10-11 16:06:32 +02:00
class AmdSmiContainerTypes ( IntEnum ) :
2024-04-24 11:16:06 +02:00
LXC = amdsmi_wrapper . AMDSMI_CONTAINER_LXC
DOCKER = amdsmi_wrapper . AMDSMI_CONTAINER_DOCKER
2022-10-11 16:06:32 +02:00
class AmdSmiDeviceType ( IntEnum ) :
2024-05-23 10:31:37 -05:00
UNKNOWN_DEVICE = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_UNKNOWN
2024-04-24 11:16:06 +02:00
AMD_GPU_DEVICE = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_AMD_GPU
AMD_CPU_DEVICE = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_AMD_CPU
NON_AMD_GPU_DEVICE = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU
NON_AMD_CPU_DEVICE = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU
2022-10-11 16:06:32 +02:00
class AmdSmiMmIp ( IntEnum ) :
2022-12-01 16:20:37 +01:00
UVD = amdsmi_wrapper . AMDSMI_MM_UVD
VCE = amdsmi_wrapper . AMDSMI_MM_VCE
VCN = amdsmi_wrapper . AMDSMI_MM_VCN
2022-10-11 16:06:32 +02:00
2022-12-28 16:11:10 +01:00
class AmdSmiFwBlock ( IntEnum ) :
2024-04-24 11:16:06 +02:00
AMDSMI_FW_ID_SMU = amdsmi_wrapper . AMDSMI_FW_ID_SMU
AMDSMI_FW_ID_CP_CE = amdsmi_wrapper . AMDSMI_FW_ID_CP_CE
AMDSMI_FW_ID_CP_PFP = amdsmi_wrapper . AMDSMI_FW_ID_CP_PFP
AMDSMI_FW_ID_CP_ME = amdsmi_wrapper . AMDSMI_FW_ID_CP_ME
AMDSMI_FW_ID_CP_MEC_JT1 = amdsmi_wrapper . AMDSMI_FW_ID_CP_MEC_JT1
AMDSMI_FW_ID_CP_MEC_JT2 = amdsmi_wrapper . AMDSMI_FW_ID_CP_MEC_JT2
AMDSMI_FW_ID_CP_MEC1 = amdsmi_wrapper . AMDSMI_FW_ID_CP_MEC1
AMDSMI_FW_ID_CP_MEC2 = amdsmi_wrapper . AMDSMI_FW_ID_CP_MEC2
AMDSMI_FW_ID_RLC = amdsmi_wrapper . AMDSMI_FW_ID_RLC
AMDSMI_FW_ID_SDMA0 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA0
AMDSMI_FW_ID_SDMA1 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA1
AMDSMI_FW_ID_SDMA2 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA2
AMDSMI_FW_ID_SDMA3 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA3
AMDSMI_FW_ID_SDMA4 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA4
AMDSMI_FW_ID_SDMA5 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA5
AMDSMI_FW_ID_SDMA6 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA6
AMDSMI_FW_ID_SDMA7 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA7
AMDSMI_FW_ID_VCN = amdsmi_wrapper . AMDSMI_FW_ID_VCN
AMDSMI_FW_ID_UVD = amdsmi_wrapper . AMDSMI_FW_ID_UVD
2024-05-23 10:31:37 -05:00
AMDSMI_FW_ID_VCE = amdsmi_wrapper . AMDSMI_FW_ID_VCE
AMDSMI_FW_ID_ISP = amdsmi_wrapper . AMDSMI_FW_ID_ISP
2024-04-24 11:16:06 +02:00
AMDSMI_FW_ID_DMCU_ERAM = amdsmi_wrapper . AMDSMI_FW_ID_DMCU_ERAM
AMDSMI_FW_ID_DMCU_ISR = amdsmi_wrapper . AMDSMI_FW_ID_DMCU_ISR
AMDSMI_FW_ID_RLC_RESTORE_LIST_GPM_MEM = amdsmi_wrapper . AMDSMI_FW_ID_RLC_RESTORE_LIST_GPM_MEM
AMDSMI_FW_ID_RLC_RESTORE_LIST_SRM_MEM = amdsmi_wrapper . AMDSMI_FW_ID_RLC_RESTORE_LIST_SRM_MEM
AMDSMI_FW_ID_RLC_RESTORE_LIST_CNTL = amdsmi_wrapper . AMDSMI_FW_ID_RLC_RESTORE_LIST_CNTL
AMDSMI_FW_ID_RLC_V = amdsmi_wrapper . AMDSMI_FW_ID_RLC_V
AMDSMI_FW_ID_MMSCH = amdsmi_wrapper . AMDSMI_FW_ID_MMSCH
AMDSMI_FW_ID_PSP_SYSDRV = amdsmi_wrapper . AMDSMI_FW_ID_PSP_SYSDRV
AMDSMI_FW_ID_PSP_SOSDRV = amdsmi_wrapper . AMDSMI_FW_ID_PSP_SOSDRV
AMDSMI_FW_ID_PSP_TOC = amdsmi_wrapper . AMDSMI_FW_ID_PSP_TOC
AMDSMI_FW_ID_PSP_KEYDB = amdsmi_wrapper . AMDSMI_FW_ID_PSP_KEYDB
AMDSMI_FW_ID_DFC = amdsmi_wrapper . AMDSMI_FW_ID_DFC
AMDSMI_FW_ID_PSP_SPL = amdsmi_wrapper . AMDSMI_FW_ID_PSP_SPL
AMDSMI_FW_ID_DRV_CAP = amdsmi_wrapper . AMDSMI_FW_ID_DRV_CAP
AMDSMI_FW_ID_MC = amdsmi_wrapper . AMDSMI_FW_ID_MC
AMDSMI_FW_ID_PSP_BL = amdsmi_wrapper . AMDSMI_FW_ID_PSP_BL
AMDSMI_FW_ID_CP_PM4 = amdsmi_wrapper . AMDSMI_FW_ID_CP_PM4
AMDSMI_FW_ID_RLC_P = amdsmi_wrapper . AMDSMI_FW_ID_RLC_P
AMDSMI_FW_ID_SEC_POLICY_STAGE2 = amdsmi_wrapper . AMDSMI_FW_ID_SEC_POLICY_STAGE2
AMDSMI_FW_ID_REG_ACCESS_WHITELIST = amdsmi_wrapper . AMDSMI_FW_ID_REG_ACCESS_WHITELIST
AMDSMI_FW_ID_IMU_DRAM = amdsmi_wrapper . AMDSMI_FW_ID_IMU_DRAM
AMDSMI_FW_ID_IMU_IRAM = amdsmi_wrapper . AMDSMI_FW_ID_IMU_IRAM
AMDSMI_FW_ID_SDMA_TH0 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA_TH0
AMDSMI_FW_ID_SDMA_TH1 = amdsmi_wrapper . AMDSMI_FW_ID_SDMA_TH1
AMDSMI_FW_ID_CP_MES = amdsmi_wrapper . AMDSMI_FW_ID_CP_MES
AMDSMI_FW_ID_MES_STACK = amdsmi_wrapper . AMDSMI_FW_ID_MES_STACK
AMDSMI_FW_ID_MES_THREAD1 = amdsmi_wrapper . AMDSMI_FW_ID_MES_THREAD1
AMDSMI_FW_ID_MES_THREAD1_STACK = amdsmi_wrapper . AMDSMI_FW_ID_MES_THREAD1_STACK
AMDSMI_FW_ID_RLX6 = amdsmi_wrapper . AMDSMI_FW_ID_RLX6
AMDSMI_FW_ID_RLX6_DRAM_BOOT = amdsmi_wrapper . AMDSMI_FW_ID_RLX6_DRAM_BOOT
AMDSMI_FW_ID_RS64_ME = amdsmi_wrapper . AMDSMI_FW_ID_RS64_ME
AMDSMI_FW_ID_RS64_ME_P0_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_ME_P0_DATA
AMDSMI_FW_ID_RS64_ME_P1_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_ME_P1_DATA
AMDSMI_FW_ID_RS64_PFP = amdsmi_wrapper . AMDSMI_FW_ID_RS64_PFP
AMDSMI_FW_ID_RS64_PFP_P0_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_PFP_P0_DATA
AMDSMI_FW_ID_RS64_PFP_P1_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_PFP_P1_DATA
AMDSMI_FW_ID_RS64_MEC = amdsmi_wrapper . AMDSMI_FW_ID_RS64_MEC
AMDSMI_FW_ID_RS64_MEC_P0_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_MEC_P0_DATA
AMDSMI_FW_ID_RS64_MEC_P1_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_MEC_P1_DATA
AMDSMI_FW_ID_RS64_MEC_P2_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_MEC_P2_DATA
AMDSMI_FW_ID_RS64_MEC_P3_DATA = amdsmi_wrapper . AMDSMI_FW_ID_RS64_MEC_P3_DATA
AMDSMI_FW_ID_PPTABLE = amdsmi_wrapper . AMDSMI_FW_ID_PPTABLE
AMDSMI_FW_ID_PSP_SOC = amdsmi_wrapper . AMDSMI_FW_ID_PSP_SOC
AMDSMI_FW_ID_PSP_DBG = amdsmi_wrapper . AMDSMI_FW_ID_PSP_DBG
AMDSMI_FW_ID_PSP_INTF = amdsmi_wrapper . AMDSMI_FW_ID_PSP_INTF
AMDSMI_FW_ID_RLX6_CORE1 = amdsmi_wrapper . AMDSMI_FW_ID_RLX6_CORE1
AMDSMI_FW_ID_RLX6_DRAM_BOOT_CORE1 = amdsmi_wrapper . AMDSMI_FW_ID_RLX6_DRAM_BOOT_CORE1
AMDSMI_FW_ID_RLCV_LX7 = amdsmi_wrapper . AMDSMI_FW_ID_RLCV_LX7
AMDSMI_FW_ID_RLC_SAVE_RESTORE_LIST = amdsmi_wrapper . AMDSMI_FW_ID_RLC_SAVE_RESTORE_LIST
AMDSMI_FW_ID_ASD = amdsmi_wrapper . AMDSMI_FW_ID_ASD
AMDSMI_FW_ID_TA_RAS = amdsmi_wrapper . AMDSMI_FW_ID_TA_RAS
AMDSMI_FW_ID_TA_XGMI = amdsmi_wrapper . AMDSMI_FW_ID_TA_XGMI
AMDSMI_FW_ID_RLC_SRLG = amdsmi_wrapper . AMDSMI_FW_ID_RLC_SRLG
AMDSMI_FW_ID_RLC_SRLS = amdsmi_wrapper . AMDSMI_FW_ID_RLC_SRLS
AMDSMI_FW_ID_PM = amdsmi_wrapper . AMDSMI_FW_ID_PM
AMDSMI_FW_ID_DMCU = amdsmi_wrapper . AMDSMI_FW_ID_DMCU
2025-06-12 02:18:55 -05:00
AMDSMI_FW_ID_PLDM_BUNDLE = amdsmi_wrapper . AMDSMI_FW_ID_PLDM_BUNDLE
2022-10-11 16:06:32 +02:00
2022-12-28 12:55:15 +01:00
class AmdSmiClkType ( IntEnum ) :
2024-04-24 11:16:06 +02:00
SYS = amdsmi_wrapper . AMDSMI_CLK_TYPE_SYS
GFX = amdsmi_wrapper . AMDSMI_CLK_TYPE_GFX
DF = amdsmi_wrapper . AMDSMI_CLK_TYPE_DF
DCEF = amdsmi_wrapper . AMDSMI_CLK_TYPE_DCEF
SOC = amdsmi_wrapper . AMDSMI_CLK_TYPE_SOC
MEM = amdsmi_wrapper . AMDSMI_CLK_TYPE_MEM
PCIE = amdsmi_wrapper . AMDSMI_CLK_TYPE_PCIE
VCLK0 = amdsmi_wrapper . AMDSMI_CLK_TYPE_VCLK0
VCLK1 = amdsmi_wrapper . AMDSMI_CLK_TYPE_VCLK1
DCLK0 = amdsmi_wrapper . AMDSMI_CLK_TYPE_DCLK0
DCLK1 = amdsmi_wrapper . AMDSMI_CLK_TYPE_DCLK1
2022-10-11 16:06:32 +02:00
2025-06-19 16:38:34 -05:00
class AmdSmiClkLimitType ( IntEnum ) :
MIN = amdsmi_wrapper . CLK_LIMIT_MIN
MAX = amdsmi_wrapper . CLK_LIMIT_MAX
2022-10-11 16:06:32 +02:00
class AmdSmiTemperatureType ( IntEnum ) :
2024-04-24 11:16:06 +02:00
EDGE = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_EDGE
HOTSPOT = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_HOTSPOT
JUNCTION = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_JUNCTION
VRAM = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_VRAM
HBM_0 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_HBM_0
HBM_1 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_HBM_1
HBM_2 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_HBM_2
HBM_3 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_HBM_3
PLX = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_PLX
2022-10-11 16:06:32 +02:00
2025-08-05 21:37:45 -04:00
# GPU Board Node temperature
GPUBOARD_NODE_RETIMER_X = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_RETIMER_X # Retimer X temperature
GPUBOARD_NODE_OAM_X_IBC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_IBC # OAM X IBC temperature
GPUBOARD_NODE_OAM_X_IBC_2 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_IBC_2 # OAM X IBC 2 temperature
GPUBOARD_NODE_OAM_X_VDD18_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_VDD18_VR # OAM X VDD 1.8V voltage regulator temperature
GPUBOARD_NODE_OAM_X_04_HBM_B_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_04_HBM_B_VR # OAM X 0.4V HBM B voltage regulator temperature
GPUBOARD_NODE_OAM_X_04_HBM_D_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_OAM_X_04_HBM_D_VR # OAM X 0.4V HBM D voltage regulator temperature
GPUBOARD_NODE_LAST = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_NODE_LAST
2025-10-27 14:43:31 -05:00
# GPU Board VR (Voltage Regulator) temperature
2025-08-05 21:37:45 -04:00
GPUBOARD_VDDCR_VDD0 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD0 # VDDCR VDD0 voltage regulator temperature
GPUBOARD_VDDCR_VDD1 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD1 # VDDCR VDD1 voltage regulator temperature
GPUBOARD_VDDCR_VDD2 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD2 # VDDCR VDD2 voltage regulator temperature
GPUBOARD_VDDCR_VDD3 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_VDD3 # VDDCR VDD3 voltage regulator temperature
GPUBOARD_VDDCR_SOC_A = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOC_A # VDDCR SOC A voltage regulator temperature
GPUBOARD_VDDCR_SOC_C = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOC_C # VDDCR SOC C voltage regulator temperature
GPUBOARD_VDDCR_SOCIO_A = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOCIO_A # VDDCR SOCIO A voltage regulator temperature
GPUBOARD_VDDCR_SOCIO_C = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_SOCIO_C # VDDCR SOCIO C voltage regulator temperature
GPUBOARD_VDD_085_HBM = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDD_085_HBM # VDD 0.85V HBM voltage regulator temperature
GPUBOARD_VDDCR_11_HBM_B = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_11_HBM_B # VDDCR 1.1V HBM B voltage regulator temperature
GPUBOARD_VDDCR_11_HBM_D = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDCR_11_HBM_D # VDDCR 1.1V HBM D voltage regulator temperature
GPUBOARD_VDD_USR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDD_USR # VDD USR voltage regulator temperature
GPUBOARD_VDDIO_11_E32 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VDDIO_11_E32 # VDDIO 1.1V E32 voltage regulator temperature
GPUBOARD_VR_LAST = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_GPUBOARD_VR_LAST
2025-10-27 14:43:31 -05:00
# Baseboard System temperature
2025-08-05 21:37:45 -04:00
BASEBOARD_UBB_FPGA = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA # UBB FPGA temperature
BASEBOARD_UBB_FRONT = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FRONT # UBB front temperature
BASEBOARD_UBB_BACK = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_BACK # UBB back temperature
BASEBOARD_UBB_OAM7 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_OAM7 # UBB OAM7 temperature
BASEBOARD_UBB_IBC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_IBC # UBB IBC temperature
BASEBOARD_UBB_UFPGA = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_UFPGA # UBB UFPGA temperature
BASEBOARD_UBB_OAM1 = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_OAM1 # UBB OAM1 temperature
BASEBOARD_OAM_0_1_HSC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_0_1_HSC # OAM 0-1 HSC temperature
BASEBOARD_OAM_2_3_HSC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_2_3_HSC # OAM 2-3 HSC temperature
BASEBOARD_OAM_4_5_HSC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_4_5_HSC # OAM 4-5 HSC temperature
BASEBOARD_OAM_6_7_HSC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_6_7_HSC # OAM 6-7 HSC temperature
BASEBOARD_UBB_FPGA_0V72_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA_0V72_VR # UBB FPGA 0.72V voltage regulator temperature
BASEBOARD_UBB_FPGA_3V3_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_UBB_FPGA_3V3_VR # UBB FPGA 3.3V voltage regulator temperature
BASEBOARD_RETIMER_0_1_2_3_1V2_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_0_1_2_3_1V2_VR # Retimer 0-1-2-3 1.2V voltage regulator temperature
BASEBOARD_RETIMER_4_5_6_7_1V2_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_4_5_6_7_1V2_VR # Retimer 4-5-6-7 1.2V voltage regulator temperature
BASEBOARD_RETIMER_0_1_0V9_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_0_1_0V9_VR # Retimer 0-1 0.9V voltage regulator temperature
BASEBOARD_RETIMER_4_5_0V9_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_4_5_0V9_VR # Retimer 4-5 0.9V voltage regulator temperature
BASEBOARD_RETIMER_2_3_0V9_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_2_3_0V9_VR # Retimer 2-3 0.9V voltage regulator temperature
BASEBOARD_RETIMER_6_7_0V9_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_RETIMER_6_7_0V9_VR # Retimer 6-7 0.9V voltage regulator temperature
BASEBOARD_OAM_0_1_2_3_3V3_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_0_1_2_3_3V3_VR # OAM 0-1-2-3 3.3V voltage regulator temperature
BASEBOARD_OAM_4_5_6_7_3V3_VR = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_OAM_4_5_6_7_3V3_VR # OAM 4-5-6-7 3.3V voltage regulator temperature
BASEBOARD_IBC_HSC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_IBC_HSC # IBC HSC temperature
BASEBOARD_IBC = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_IBC # IBC temperature
BASEBOARD_LAST = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE_BASEBOARD_LAST
BASEBOARD__MAX = amdsmi_wrapper . AMDSMI_TEMPERATURE_TYPE__MAX # Maximum per GPU temperature type
2022-10-11 16:06:32 +02:00
class AmdSmiDevPerfLevel ( IntEnum ) :
AUTO = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_AUTO
LOW = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_LOW
HIGH = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_HIGH
MANUAL = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_MANUAL
STABLE_STD = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_STABLE_STD
STABLE_PEAK = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_STABLE_PEAK
STABLE_MIN_MCLK = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK
STABLE_MIN_SCLK = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK
DETERMINISM = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_DETERMINISM
UNKNOWN = amdsmi_wrapper . AMDSMI_DEV_PERF_LEVEL_UNKNOWN
class AmdSmiEventGroup ( IntEnum ) :
XGMI = amdsmi_wrapper . AMDSMI_EVNT_GRP_XGMI
XGMI_DATA_OUT = amdsmi_wrapper . AMDSMI_EVNT_GRP_XGMI_DATA_OUT
GRP_INVALID = amdsmi_wrapper . AMDSMI_EVNT_GRP_INVALID
class AmdSmiEventType ( IntEnum ) :
XGMI_0_NOP_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_0_NOP_TX
XGMI_0_REQUEST_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_0_REQUEST_TX
XGMI_0_RESPONSE_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_0_RESPONSE_TX
XGMI_0_BEATS_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_0_BEATS_TX
XGMI_1_NOP_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_1_NOP_TX
XGMI_1_REQUEST_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_1_REQUEST_TX
XGMI_1_RESPONSE_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_1_RESPONSE_TX
XGMI_1_BEATS_TX = amdsmi_wrapper . AMDSMI_EVNT_XGMI_1_BEATS_TX
XGMI_DATA_OUT_0 = amdsmi_wrapper . AMDSMI_EVNT_XGMI_DATA_OUT_0
XGMI_DATA_OUT_1 = amdsmi_wrapper . AMDSMI_EVNT_XGMI_DATA_OUT_1
XGMI_DATA_OUT_2 = amdsmi_wrapper . AMDSMI_EVNT_XGMI_DATA_OUT_2
XGMI_DATA_OUT_3 = amdsmi_wrapper . AMDSMI_EVNT_XGMI_DATA_OUT_3
XGMI_DATA_OUT_4 = amdsmi_wrapper . AMDSMI_EVNT_XGMI_DATA_OUT_4
XGMI_DATA_OUT_5 = amdsmi_wrapper . AMDSMI_EVNT_XGMI_DATA_OUT_5
class AmdSmiCounterCommand ( IntEnum ) :
CMD_START = amdsmi_wrapper . AMDSMI_CNTR_CMD_START
CMD_STOP = amdsmi_wrapper . AMDSMI_CNTR_CMD_STOP
class AmdSmiEvtNotificationType ( IntEnum ) :
2024-05-02 01:48:14 -05:00
NONE = amdsmi_wrapper . AMDSMI_EVT_NOTIF_NONE
2022-10-11 16:06:32 +02:00
VMFAULT = amdsmi_wrapper . AMDSMI_EVT_NOTIF_VMFAULT
THERMAL_THROTTLE = amdsmi_wrapper . AMDSMI_EVT_NOTIF_THERMAL_THROTTLE
GPU_PRE_RESET = amdsmi_wrapper . AMDSMI_EVT_NOTIF_GPU_PRE_RESET
GPU_POST_RESET = amdsmi_wrapper . AMDSMI_EVT_NOTIF_GPU_POST_RESET
2025-05-16 16:41:31 -05:00
MIGRATE_START = amdsmi_wrapper . AMDSMI_EVT_NOTIF_MIGRATE_START
MIGRATE_END = amdsmi_wrapper . AMDSMI_EVT_NOTIF_MIGRATE_END
PAGE_FAULT_START = amdsmi_wrapper . AMDSMI_EVT_NOTIF_PAGE_FAULT_END
PAGE_FAULT_END = amdsmi_wrapper . AMDSMI_EVT_NOTIF_PAGE_FAULT_END
QUEUE_EVICTION = amdsmi_wrapper . AMDSMI_EVT_NOTIF_QUEUE_EVICTION
QUEUE_RESTORE = amdsmi_wrapper . AMDSMI_EVT_NOTIF_QUEUE_RESTORE
UNMAP_FROM_GPU = amdsmi_wrapper . AMDSMI_EVT_NOTIF_UNMAP_FROM_GPU
2025-05-16 11:01:15 -05:00
PROCESS_START = amdsmi_wrapper . AMDSMI_EVT_NOTIF_PROCESS_START
PROCESS_END = amdsmi_wrapper . AMDSMI_EVT_NOTIF_PROCESS_END
2022-10-11 16:06:32 +02:00
2025-02-26 05:45:58 -06:00
2022-10-11 16:06:32 +02:00
class AmdSmiTemperatureMetric ( IntEnum ) :
CURRENT = amdsmi_wrapper . AMDSMI_TEMP_CURRENT
MAX = amdsmi_wrapper . AMDSMI_TEMP_MAX
MIN = amdsmi_wrapper . AMDSMI_TEMP_MIN
MAX_HYST = amdsmi_wrapper . AMDSMI_TEMP_MAX_HYST
MIN_HYST = amdsmi_wrapper . AMDSMI_TEMP_MIN_HYST
CRITICAL = amdsmi_wrapper . AMDSMI_TEMP_CRITICAL
CRITICAL_HYST = amdsmi_wrapper . AMDSMI_TEMP_CRITICAL_HYST
EMERGENCY = amdsmi_wrapper . AMDSMI_TEMP_EMERGENCY
EMERGENCY_HYST = amdsmi_wrapper . AMDSMI_TEMP_EMERGENCY_HYST
CRIT_MIN = amdsmi_wrapper . AMDSMI_TEMP_CRIT_MIN
CRIT_MIN_HYST = amdsmi_wrapper . AMDSMI_TEMP_CRIT_MIN_HYST
OFFSET = amdsmi_wrapper . AMDSMI_TEMP_OFFSET
LOWEST = amdsmi_wrapper . AMDSMI_TEMP_LOWEST
HIGHEST = amdsmi_wrapper . AMDSMI_TEMP_HIGHEST
class AmdSmiVoltageMetric ( IntEnum ) :
CURRENT = amdsmi_wrapper . AMDSMI_VOLT_CURRENT
MAX = amdsmi_wrapper . AMDSMI_VOLT_MAX
MIN_CRIT = amdsmi_wrapper . AMDSMI_VOLT_MIN_CRIT
MIN = amdsmi_wrapper . AMDSMI_VOLT_MIN
MAX_CRIT = amdsmi_wrapper . AMDSMI_VOLT_MAX_CRIT
AVERAGE = amdsmi_wrapper . AMDSMI_VOLT_AVERAGE
LOWEST = amdsmi_wrapper . AMDSMI_VOLT_LOWEST
HIGHEST = amdsmi_wrapper . AMDSMI_VOLT_HIGHEST
class AmdSmiVoltageType ( IntEnum ) :
VDDGFX = amdsmi_wrapper . AMDSMI_VOLT_TYPE_VDDGFX
2025-05-29 19:55:08 -04:00
VDDBOARD = amdsmi_wrapper . AMDSMI_VOLT_TYPE_VDDBOARD
2022-10-11 16:06:32 +02:00
INVALID = amdsmi_wrapper . AMDSMI_VOLT_TYPE_INVALID
2025-01-15 20:28:45 -06:00
class AmdSmiAcceleratorPartitionResourceType ( IntEnum ) :
XCC = amdsmi_wrapper . AMDSMI_ACCELERATOR_XCC
ENCODER = amdsmi_wrapper . AMDSMI_ACCELERATOR_ENCODER
DECODER = amdsmi_wrapper . AMDSMI_ACCELERATOR_DECODER
DMA = amdsmi_wrapper . AMDSMI_ACCELERATOR_DMA
JPEG = amdsmi_wrapper . AMDSMI_ACCELERATOR_JPEG
MAX = amdsmi_wrapper . AMDSMI_ACCELERATOR_MAX
class AmdSmiAcceleratorPartitionType ( IntEnum ) :
SPX = amdsmi_wrapper . AMDSMI_ACCELERATOR_PARTITION_SPX
DPX = amdsmi_wrapper . AMDSMI_ACCELERATOR_PARTITION_DPX
TPX = amdsmi_wrapper . AMDSMI_ACCELERATOR_PARTITION_TPX
QPX = amdsmi_wrapper . AMDSMI_ACCELERATOR_PARTITION_QPX
CPX = amdsmi_wrapper . AMDSMI_ACCELERATOR_PARTITION_CPX
INVALID = amdsmi_wrapper . AMDSMI_ACCELERATOR_PARTITION_INVALID
2022-10-11 16:06:32 +02:00
2023-10-13 01:41:14 -05:00
class AmdSmiComputePartitionType ( IntEnum ) :
2024-04-24 11:16:06 +02:00
SPX = amdsmi_wrapper . AMDSMI_COMPUTE_PARTITION_SPX
DPX = amdsmi_wrapper . AMDSMI_COMPUTE_PARTITION_DPX
TPX = amdsmi_wrapper . AMDSMI_COMPUTE_PARTITION_TPX
QPX = amdsmi_wrapper . AMDSMI_COMPUTE_PARTITION_QPX
2025-01-15 20:28:45 -06:00
CPX = amdsmi_wrapper . AMDSMI_COMPUTE_PARTITION_CPX
2024-04-24 11:16:06 +02:00
INVALID = amdsmi_wrapper . AMDSMI_COMPUTE_PARTITION_INVALID
2023-10-13 01:41:14 -05:00
class AmdSmiMemoryPartitionType ( IntEnum ) :
2024-04-24 11:16:06 +02:00
NPS1 = amdsmi_wrapper . AMDSMI_MEMORY_PARTITION_NPS1
NPS2 = amdsmi_wrapper . AMDSMI_MEMORY_PARTITION_NPS2
NPS4 = amdsmi_wrapper . AMDSMI_MEMORY_PARTITION_NPS4
NPS8 = amdsmi_wrapper . AMDSMI_MEMORY_PARTITION_NPS8
UNKNOWN = amdsmi_wrapper . AMDSMI_MEMORY_PARTITION_UNKNOWN
2023-10-13 01:41:14 -05:00
2022-10-11 16:06:32 +02:00
class AmdSmiPowerProfilePresetMasks ( IntEnum ) :
CUSTOM_MASK = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_CUSTOM_MASK
VIDEO_MASK = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_VIDEO_MASK
POWER_SAVING_MASK = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_POWER_SAVING_MASK
COMPUTE_MASK = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_COMPUTE_MASK
VR_MASK = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_VR_MASK
THREE_D_FULL_SCR_MASK = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK
BOOTUP_DEFAULT = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT
INVALID = amdsmi_wrapper . AMDSMI_PWR_PROF_PRST_INVALID
class AmdSmiGpuBlock ( IntEnum ) :
INVALID = amdsmi_wrapper . AMDSMI_GPU_BLOCK_INVALID
UMC = amdsmi_wrapper . AMDSMI_GPU_BLOCK_UMC
SDMA = amdsmi_wrapper . AMDSMI_GPU_BLOCK_SDMA
GFX = amdsmi_wrapper . AMDSMI_GPU_BLOCK_GFX
MMHUB = amdsmi_wrapper . AMDSMI_GPU_BLOCK_MMHUB
ATHUB = amdsmi_wrapper . AMDSMI_GPU_BLOCK_ATHUB
PCIE_BIF = amdsmi_wrapper . AMDSMI_GPU_BLOCK_PCIE_BIF
HDP = amdsmi_wrapper . AMDSMI_GPU_BLOCK_HDP
XGMI_WAFL = amdsmi_wrapper . AMDSMI_GPU_BLOCK_XGMI_WAFL
DF = amdsmi_wrapper . AMDSMI_GPU_BLOCK_DF
SMN = amdsmi_wrapper . AMDSMI_GPU_BLOCK_SMN
SEM = amdsmi_wrapper . AMDSMI_GPU_BLOCK_SEM
MP0 = amdsmi_wrapper . AMDSMI_GPU_BLOCK_MP0
MP1 = amdsmi_wrapper . AMDSMI_GPU_BLOCK_MP1
FUSE = amdsmi_wrapper . AMDSMI_GPU_BLOCK_FUSE
2024-04-12 12:43:32 -05:00
MCA = amdsmi_wrapper . AMDSMI_GPU_BLOCK_MCA
VCN = amdsmi_wrapper . AMDSMI_GPU_BLOCK_VCN
JPEG = amdsmi_wrapper . AMDSMI_GPU_BLOCK_JPEG
IH = amdsmi_wrapper . AMDSMI_GPU_BLOCK_IH
MPIO = amdsmi_wrapper . AMDSMI_GPU_BLOCK_MPIO
2022-10-11 16:06:32 +02:00
RESERVED = amdsmi_wrapper . AMDSMI_GPU_BLOCK_RESERVED
class AmdSmiRasErrState ( IntEnum ) :
NONE = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_NONE
DISABLED = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_DISABLED
PARITY = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_PARITY
SING_C = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_SING_C
MULT_UC = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_MULT_UC
POISON = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_POISON
ENABLED = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_ENABLED
INVALID = amdsmi_wrapper . AMDSMI_RAS_ERR_STATE_INVALID
2025-04-12 01:54:57 -05:00
class AmdSmiCperNotifyType ( Enum ) :
CMC = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_CMC
CPE = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_CPE
MCE = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_MCE
PCIE = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_PCIE
INIT = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_INIT
NMI = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_NMI
BOOT = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_BOOT
DMAr = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_DMAR
SEA = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_SEA
SEI = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_SEI
PEI = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_PEI
CXL_COMPONENT = amdsmi_wrapper . AMDSMI_CPER_NOTIFY_TYPE_CXL_COMPONENT
2022-10-11 16:06:32 +02:00
class AmdSmiMemoryType ( IntEnum ) :
VRAM = amdsmi_wrapper . AMDSMI_MEM_TYPE_VRAM
VIS_VRAM = amdsmi_wrapper . AMDSMI_MEM_TYPE_VIS_VRAM
GTT = amdsmi_wrapper . AMDSMI_MEM_TYPE_GTT
class AmdSmiFreqInd ( IntEnum ) :
MIN = amdsmi_wrapper . AMDSMI_FREQ_IND_MIN
MAX = amdsmi_wrapper . AMDSMI_FREQ_IND_MAX
INVALID = amdsmi_wrapper . AMDSMI_FREQ_IND_INVALID
class AmdSmiXgmiStatus ( IntEnum ) :
NO_ERRORS = amdsmi_wrapper . AMDSMI_XGMI_STATUS_NO_ERRORS
ERROR = amdsmi_wrapper . AMDSMI_XGMI_STATUS_ERROR
MULTIPLE_ERRORS = amdsmi_wrapper . AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS
class AmdSmiMemoryPageStatus ( IntEnum ) :
RESERVED = amdsmi_wrapper . AMDSMI_MEM_PAGE_STATUS_RESERVED
PENDING = amdsmi_wrapper . AMDSMI_MEM_PAGE_STATUS_PENDING
UNRESERVABLE = amdsmi_wrapper . AMDSMI_MEM_PAGE_STATUS_UNRESERVABLE
2024-08-26 05:29:24 -04:00
class AmdSmiLinkType ( IntEnum ) :
AMDSMI_LINK_TYPE_INTERNAL = amdsmi_wrapper . AMDSMI_LINK_TYPE_INTERNAL
AMDSMI_LINK_TYPE_XGMI = amdsmi_wrapper . AMDSMI_LINK_TYPE_XGMI
AMDSMI_LINK_TYPE_PCIE = amdsmi_wrapper . AMDSMI_LINK_TYPE_PCIE
AMDSMI_LINK_TYPE_NOT_APPLICABLE = amdsmi_wrapper . AMDSMI_LINK_TYPE_NOT_APPLICABLE
AMDSMI_LINK_TYPE_UNKNOWN = amdsmi_wrapper . AMDSMI_LINK_TYPE_UNKNOWN
2022-10-11 16:06:32 +02:00
class AmdSmiUtilizationCounterType ( IntEnum ) :
COARSE_GRAIN_GFX_ACTIVITY = amdsmi_wrapper . AMDSMI_COARSE_GRAIN_GFX_ACTIVITY
COARSE_GRAIN_MEM_ACTIVITY = amdsmi_wrapper . AMDSMI_COARSE_GRAIN_MEM_ACTIVITY
2024-08-02 21:40:28 -05:00
COARSE_DECODER_ACTIVITY = amdsmi_wrapper . AMDSMI_COARSE_DECODER_ACTIVITY
FINE_GRAIN_GFX_ACTIVITY = amdsmi_wrapper . AMDSMI_FINE_GRAIN_GFX_ACTIVITY
FINE_GRAIN_MEM_ACTIVITY = amdsmi_wrapper . AMDSMI_FINE_GRAIN_MEM_ACTIVITY
FINE_DECODER_ACTIVITY = amdsmi_wrapper . AMDSMI_FINE_DECODER_ACTIVITY
2024-01-24 02:09:00 -06:00
UTILIZATION_COUNTER_FIRST = amdsmi_wrapper . AMDSMI_UTILIZATION_COUNTER_FIRST
UTILIZATION_COUNTER_LAST = amdsmi_wrapper . AMDSMI_UTILIZATION_COUNTER_LAST
2023-11-22 03:32:15 -06:00
2024-01-30 20:15:11 -06:00
2023-06-01 14:46:21 +02:00
class AmdSmiProcessorType ( IntEnum ) :
2024-05-23 10:31:37 -05:00
UNKNOWN = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_UNKNOWN
2025-10-06 14:50:00 -05:00
AMD_GPU = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_AMD_GPU
AMD_CPU = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_AMD_CPU
NON_AMD_GPU = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_NON_AMD_GPU
NON_AMD_CPU = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_NON_AMD_CPU
AMD_CPU_CORE = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_AMD_CPU_CORE
AMD_APU = amdsmi_wrapper . AMDSMI_PROCESSOR_TYPE_AMD_APU
2023-06-01 14:46:21 +02:00
2024-04-24 05:27:33 -05:00
2024-11-11 18:34:19 -06:00
class AmdSmiRegType ( IntEnum ) :
XGMI = amdsmi_wrapper . AMDSMI_REG_XGMI
WAFL = amdsmi_wrapper . AMDSMI_REG_WAFL
PCIE = amdsmi_wrapper . AMDSMI_REG_PCIE
USR = amdsmi_wrapper . AMDSMI_REG_USR
USR1 = amdsmi_wrapper . AMDSMI_REG_USR1
2025-01-31 18:34:01 -05:00
class AmdSmiVirtualizationMode ( IntEnum ) :
UNKNOWN = amdsmi_wrapper . AMDSMI_VIRTUALIZATION_MODE_UNKNOWN
BAREMETAL = amdsmi_wrapper . AMDSMI_VIRTUALIZATION_MODE_BAREMETAL
HOST = amdsmi_wrapper . AMDSMI_VIRTUALIZATION_MODE_HOST
GUEST = amdsmi_wrapper . AMDSMI_VIRTUALIZATION_MODE_GUEST
PASSTHROUGH = amdsmi_wrapper . AMDSMI_VIRTUALIZATION_MODE_PASSTHROUGH
2025-04-12 01:54:57 -05:00
2025-03-11 16:38:46 -05:00
class AmdSmiVramType ( IntEnum ) :
UNKNOWN = amdsmi_wrapper . AMDSMI_VRAM_TYPE_UNKNOWN
HBM = amdsmi_wrapper . AMDSMI_VRAM_TYPE_HBM
HBM2 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_HBM2
HBM2E = amdsmi_wrapper . AMDSMI_VRAM_TYPE_HBM2E
HBM3 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_HBM3
DDR2 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_DDR2
DDR3 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_DDR3
DDR4 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_DDR4
GDDR1 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR1
GDDR2 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR2
GDDR3 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR3
GDDR4 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR4
GDDR5 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR5
GDDR6 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR6
GDDR7 = amdsmi_wrapper . AMDSMI_VRAM_TYPE_GDDR7
MAX = amdsmi_wrapper . AMDSMI_VRAM_TYPE__MAX
2025-06-11 17:19:02 -05:00
2025-05-20 11:36:09 +05:30
class AmdSmiAffinityScope ( IntEnum ) :
NUMA_SCOPE = amdsmi_wrapper . AMDSMI_AFFINITY_SCOPE_NODE
SOCKET_SCOPE = amdsmi_wrapper . AMDSMI_AFFINITY_SCOPE_SOCKET
2025-04-12 01:54:57 -05:00
2025-11-26 08:33:27 -06:00
class AmdSmiPtlData ( IntEnum ) :
I8 = amdsmi_wrapper . AMDSMI_PTL_DATA_FORMAT_I8
F16 = amdsmi_wrapper . AMDSMI_PTL_DATA_FORMAT_F16
BF16 = amdsmi_wrapper . AMDSMI_PTL_DATA_FORMAT_BF16
F32 = amdsmi_wrapper . AMDSMI_PTL_DATA_FORMAT_F32
F64 = amdsmi_wrapper . AMDSMI_PTL_DATA_FORMAT_F64
INVALID = amdsmi_wrapper . AMDSMI_PTL_DATA_FORMAT_INVALID
2025-06-11 17:19:02 -05:00
2025-10-30 09:48:35 -05:00
class AmdSmiPowerCapType ( IntEnum ) :
PPT0 = amdsmi_wrapper . AMDSMI_POWER_CAP_TYPE_PPT0
PPT1 = amdsmi_wrapper . AMDSMI_POWER_CAP_TYPE_PPT1
2022-11-09 15:38:30 +01:00
class AmdSmiEventReader :
2022-11-09 17:32:55 +01:00
def __init__ (
2025-06-11 17:19:02 -05:00
self ,
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-10-16 07:20:13 -05:00
event_types : List [ AmdSmiEvtNotificationType ]
2022-11-09 17:32:55 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:38:30 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
2022-11-09 15:38:30 +01:00
if not isinstance ( event_types , Iterable ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
event_types , Iterable
)
2022-11-09 15:38:30 +01:00
for event_type in event_types :
if not isinstance ( event_type , AmdSmiEvtNotificationType ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
event_type , AmdSmiEvtNotificationType
)
2022-11-09 15:38:30 +01:00
2023-02-25 05:28:40 -05:00
self . processor_handle = processor_handle
2022-11-09 15:38:30 +01:00
mask = 0
for event_type in event_types :
2024-05-29 21:20:59 -05:00
if event_type != AmdSmiEvtNotificationType . NONE :
mask | = ( 1 << ( int ( event_type ) - 1 ) )
2022-11-09 15:38:30 +01:00
2023-02-27 01:50:25 -05:00
_check_res ( amdsmi_wrapper . amdsmi_init_gpu_event_notification ( processor_handle ) )
2023-06-02 01:19:26 -05:00
_check_res ( amdsmi_wrapper . amdsmi_set_gpu_event_notification_mask (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . c_uint64 ( mask ) ) )
2022-11-09 15:38:30 +01:00
2022-11-09 17:32:55 +01:00
def read ( self , timestamp , num_elem = 10 ) :
2025-05-16 11:01:15 -05:00
c_count = ctypes . c_uint32 ( num_elem )
2025-10-06 14:50:00 -05:00
event_info = ( amdsmi_wrapper . amdsmi_evt_notification_data_t * num_elem ) ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_event_notification (
2022-11-09 17:32:55 +01:00
ctypes . c_int ( timestamp ) ,
2025-05-16 11:01:15 -05:00
ctypes . byref ( c_count ) ,
2025-10-06 14:50:00 -05:00
event_info ,
2022-11-09 17:32:55 +01:00
)
)
2022-11-09 15:38:30 +01:00
2023-10-24 20:14:58 -05:00
ret = [ ]
2025-05-16 11:01:15 -05:00
for i in range ( c_count . value ) :
2023-10-16 07:20:13 -05:00
unique_event_values = set ( event . value for event in AmdSmiEvtNotificationType )
2025-10-06 14:50:00 -05:00
if event_info [ i ] . event in unique_event_values :
if AmdSmiEvtNotificationType ( event_info [ i ] . event ) . name != " NONE " :
processor_handle = amdsmi_wrapper . amdsmi_processor_handle ( event_info [ i ] . processor_handle )
2024-07-18 13:12:03 -05:00
ret . append (
{
2025-04-04 18:31:08 -05:00
" processor_handle " : processor_handle ,
2025-10-06 14:50:00 -05:00
" event " : AmdSmiEvtNotificationType ( event_info [ i ] . event ) . name ,
" message " : event_info [ i ] . message . decode ( " utf-8 " ) ,
2024-07-18 13:12:03 -05:00
}
)
2022-11-09 15:38:30 +01:00
return ret
def stop ( self ) :
2023-02-27 01:47:28 -05:00
_check_res ( amdsmi_wrapper . amdsmi_stop_gpu_event_notification (
2023-02-25 05:28:40 -05:00
self . processor_handle ) )
2022-11-09 15:38:30 +01:00
def __enter__ ( self ) :
return self
def __exit__ ( self , exc_type , exc_value , traceback ) :
self . stop ( )
2022-10-11 16:06:32 +02:00
def _format_bad_page_info ( bad_page_info , bad_page_count : ctypes . c_uint32 ) - > List [ Dict ] :
"""
Format bad page info data retrieved.
Parameters:
2024-04-26 02:54:25 -05:00
bad_page_info(`amdsmi_retired_page_record_t`): A populated list of amdsmi_retired_page_record_t(s)
retrieved. Ex: (amdsmi_wrapper.amdsmi_retired_page_record_t * #)()
2022-10-11 16:06:32 +02:00
bad_page_count(`c_uint32`): Bad page count.
Returns:
2024-04-26 02:54:25 -05:00
`list`: List containing formatted bad pages. Can be empty
2022-10-11 16:06:32 +02:00
"""
2024-04-26 02:54:25 -05:00
if bad_page_count == 0 :
return [ ]
# Check if each struct within bad_page_info is valid
for bad_page in bad_page_info :
if not isinstance ( bad_page , amdsmi_wrapper . amdsmi_retired_page_record_t ) :
raise AmdSmiParameterException (
bad_page , amdsmi_wrapper . amdsmi_retired_page_record_t
)
2022-10-11 16:06:32 +02:00
2023-10-24 20:14:58 -05:00
table_records = [ ]
2022-10-11 16:06:32 +02:00
for i in range ( bad_page_count . value ) :
table_records . append (
{
" value " : i ,
" page_address " : bad_page_info [ i ] . page_address ,
" page_size " : bad_page_info [ i ] . page_size ,
" status " : bad_page_info [ i ] . status ,
}
)
return table_records
def _format_bdf ( amdsmi_bdf : amdsmi_wrapper . amdsmi_bdf_t ) - > str :
"""
Format BDF struct to readable data.
Parameters:
amdsmi_bdf(`amdsmi_bdf_t`): Struct containing BDF data that
will be formatted.
Returns:
`str`: String containing BDF data in a readable format.
"""
2024-05-23 10:31:37 -05:00
domain = hex ( amdsmi_bdf . struct_amdsmi_bdf_t . domain_number ) [ 2 : ] . zfill ( 4 )
bus = hex ( amdsmi_bdf . struct_amdsmi_bdf_t . bus_number ) [ 2 : ] . zfill ( 2 )
device = hex ( amdsmi_bdf . struct_amdsmi_bdf_t . device_number ) [ 2 : ] . zfill ( 2 )
function = hex ( amdsmi_bdf . struct_amdsmi_bdf_t . function_number ) [ 2 : ]
2022-10-11 16:06:32 +02:00
return domain + " : " + bus + " : " + device + " . " + function
def _check_res ( ret_code ) - > None :
"""
Wrapper for amdsmi function calls. Checks the status returned
by the call. Raises exceptions if the status was inappropriate.
Parameters:
ret_code(`amdsmi_status_t`): Status code returned by function
call.
Returns:
`None`.
"""
if ret_code == amdsmi_wrapper . AMDSMI_STATUS_RETRY :
raise AmdSmiRetryException ( )
if ret_code == amdsmi_wrapper . AMDSMI_STATUS_TIMEOUT :
raise AmdSmiTimeoutException ( )
if ret_code != amdsmi_wrapper . AMDSMI_STATUS_SUCCESS :
raise AmdSmiLibraryException ( ret_code )
2022-11-16 09:30:31 +01:00
def _parse_bdf ( bdf ) :
if bdf is None :
return None
extended_regex = re . compile (
r ' ^([0-9a-fA-F] {4} ):([0-9a-fA-F] {2} ):([0-1][0-9a-fA-F]) \ .([0-7])$ ' )
if extended_regex . match ( bdf ) is None :
simple_regex = re . compile (
r ' ^([0-9a-fA-F] {2} ):([0-1][0-9a-fA-F]) \ .([0-7])$ ' )
if simple_regex . match ( bdf ) is None :
return None
else :
2025-06-11 17:19:02 -05:00
match = simple_regex . match ( bdf )
if match :
return [ 0 ] + [ int ( x , 16 ) for x in match . groups ( ) ]
else :
return None
2022-11-16 09:30:31 +01:00
else :
2025-06-11 17:19:02 -05:00
match = extended_regex . match ( bdf )
if match :
return [ int ( x , 16 ) for x in match . groups ( ) ]
return None
2022-11-16 09:30:31 +01:00
def _make_amdsmi_bdf_from_list ( bdf ) :
if len ( bdf ) != 4 :
return None
2022-12-01 13:04:58 -06:00
amdsmi_bdf = amdsmi_wrapper . amdsmi_bdf_t ( )
2024-05-23 10:31:37 -05:00
amdsmi_bdf . struct_amdsmi_bdf_t . function_number = bdf [ 3 ]
amdsmi_bdf . struct_amdsmi_bdf_t . device_number = bdf [ 2 ]
amdsmi_bdf . struct_amdsmi_bdf_t . bus_number = bdf [ 1 ]
amdsmi_bdf . struct_amdsmi_bdf_t . domain_number = bdf [ 0 ]
2022-11-16 09:30:31 +01:00
return amdsmi_bdf
2025-02-26 05:45:58 -06:00
2025-10-06 14:50:00 -05:00
def _pad_hex_value ( value , length ) - > str :
2024-04-16 21:26:52 -05:00
""" Pad a hexadecimal value with a given length of zeros
:param value: A hexadecimal value to be padded with zeros
:param length: Number of zeros to pad the hexadecimal value
:param return original string string or
padded hex of confirmed hex output (using length provided)
"""
# Ensure value entered meets the minimum length and is hexadecimal
if len ( value ) > 2 and length > 1 and value [ : 2 ] . lower ( ) == ' 0x ' \
and all ( c in ' 0123456789abcdefABCDEF ' for c in value [ 2 : ] ) :
# Pad with zeros after '0x' prefix
return ' 0x ' + value [ 2 : ] . zfill ( length )
return value
2024-05-14 17:03:05 -05:00
2025-10-06 14:50:00 -05:00
def _validate_if_max_uint ( value , uint_type : MaxUIntegerTypes , isActivity = False , isBool = False ) - > Union [ str , bool , int , list ] :
2024-05-14 17:03:05 -05:00
return_val = " N/A "
2024-09-17 04:54:41 -05:00
if not isinstance ( value , list ) :
2024-05-21 20:30:16 -05:00
if ( value == uint_type ) or ( isActivity and value > 100 ) :
2024-05-14 17:03:05 -05:00
return return_val
2025-10-06 14:50:00 -05:00
if isBool :
return bool ( value )
return value
2024-05-14 17:03:05 -05:00
else :
2024-05-21 20:30:16 -05:00
return_val = [ ]
for _ , v in enumerate ( value ) :
if ( v == uint_type ) or ( isActivity and v > 100 ) :
return_val . append ( " N/A " )
else :
return_val . append ( v )
if isBool :
return bool ( return_val )
2025-10-06 14:50:00 -05:00
return return_val
2024-05-14 17:03:05 -05:00
2025-04-12 01:54:57 -05:00
def _notifyTypeToString ( notify_type_b ) :
guid = [ ]
# Iterate over only the first 8 bytes, but backwards
for i in notify_type_b [ 7 : : - 1 ] :
guid . append ( format ( i , ' 02x ' ) )
hex_string = " " . join ( guid )
hex_value = int ( hex_string , 16 )
if hex_value in AmdSmiCperNotifyType . _value2member_map_ :
# Convert to the corresponding enum name
return AmdSmiCperNotifyType ( hex_value ) . name
else :
return " Unknown "
2025-08-06 16:03:06 -05:00
def _NA_amdsmi_get_gpu_metrics_info ( ) - > Dict [ str , str ] :
"""
Get ' N/A ' metric values for gpu_metric, used for exception handling.
Parameters:
None
Returns:
Dict[str, str]: A dictionary with keys as metric names and values as ' N/A ' .
This is used to indicate that the metric is not available or applicable.
Raises:
N/A
"""
na_gpu_metrics_info = {
" common_header.structure_size " : " N/A " ,
" common_header.format_revision " : " N/A " ,
" common_header.content_revision " : " N/A " ,
" temperature_edge " : " N/A " ,
" temperature_hotspot " : " N/A " ,
" temperature_mem " : " N/A " ,
" temperature_vrgfx " : " N/A " ,
" temperature_vrsoc " : " N/A " ,
" temperature_vrmem " : " N/A " ,
" average_gfx_activity " : " N/A " ,
" average_umc_activity " : " N/A " ,
" average_mm_activity " : " N/A " ,
" average_socket_power " : " N/A " ,
" energy_accumulator " : " N/A " ,
" system_clock_counter " : " N/A " ,
" average_gfxclk_frequency " : " N/A " ,
" average_socclk_frequency " : " N/A " ,
" average_uclk_frequency " : " N/A " ,
" average_vclk0_frequency " : " N/A " ,
" average_dclk0_frequency " : " N/A " ,
" average_vclk1_frequency " : " N/A " ,
" average_dclk1_frequency " : " N/A " ,
" current_gfxclk " : " N/A " ,
" current_socclk " : " N/A " ,
" current_uclk " : " N/A " ,
" current_vclk0 " : " N/A " ,
" current_dclk0 " : " N/A " ,
" current_vclk1 " : " N/A " ,
" current_dclk1 " : " N/A " ,
" throttle_status " : " N/A " ,
" current_fan_speed " : " N/A " ,
" pcie_link_width " : " N/A " ,
" pcie_link_speed " : " N/A " ,
" gfx_activity_acc " : " N/A " ,
" mem_activity_acc " : " N/A " ,
" temperature_hbm " : " N/A " ,
" firmware_timestamp " : " N/A " ,
" voltage_soc " : " N/A " ,
" voltage_gfx " : " N/A " ,
" voltage_mem " : " N/A " ,
" indep_throttle_status " : " N/A " ,
" current_socket_power " : " N/A " ,
" vcn_activity " : " N/A " ,
" gfxclk_lock_status " : " N/A " ,
" xgmi_link_width " : " N/A " ,
" xgmi_link_speed " : " N/A " ,
" pcie_bandwidth_acc " : " N/A " ,
" pcie_bandwidth_inst " : " N/A " ,
" pcie_l0_to_recov_count_acc " : " N/A " ,
" pcie_replay_count_acc " : " N/A " ,
" pcie_replay_rover_count_acc " : " N/A " ,
" xgmi_read_data_acc " : " N/A " ,
" xgmi_write_data_acc " : " N/A " ,
" current_gfxclks " : " N/A " ,
" current_socclks " : " N/A " ,
" current_vclk0s " : " N/A " ,
" current_dclk0s " : " N/A " ,
" jpeg_activity " : " N/A " ,
" pcie_nak_sent_count_acc " : " N/A " ,
" pcie_nak_rcvd_count_acc " : " N/A " ,
" accumulation_counter " : " N/A " ,
" prochot_residency_acc " : " N/A " ,
" ppt_residency_acc " : " N/A " ,
" socket_thm_residency_acc " : " N/A " ,
" vr_thm_residency_acc " : " N/A " ,
" hbm_thm_residency_acc " : " N/A " ,
" num_partition " : " N/A " ,
" xcp_stats.gfx_busy_inst " : " N/A " ,
" xcp_stats.jpeg_busy " : " N/A " ,
" xcp_stats.vcn_busy " : " N/A " ,
" xcp_stats.gfx_busy_acc " : " N/A " ,
" xcp_stats.gfx_below_host_limit_acc " : " N/A " ,
" xcp_stats.gfx_below_host_limit_ppt_acc " : " N/A " ,
" xcp_stats.gfx_below_host_limit_thm_acc " : " N/A " ,
" xcp_stats.gfx_low_utilization_acc " : " N/A " ,
" xcp_stats.gfx_below_host_limit_total_acc " : " N/A " ,
" pcie_lc_perf_other_end_recovery " : " N/A " ,
" vram_max_bandwidth " : " N/A " ,
" xgmi_link_status " : " N/A "
}
return na_gpu_metrics_info
2025-04-12 01:54:57 -05:00
2025-06-11 17:19:02 -05:00
def amdsmi_get_socket_handles ( ) - > List [ c_void_p ] :
2022-10-11 16:06:32 +02:00
"""
Function that gets socket handles. Wraps the same named function call.
Parameters:
`None`.
Returns:
`List`: List containing all of the found socket handles.
"""
socket_count = ctypes . c_uint32 ( 0 )
2025-06-11 17:19:02 -05:00
null_ptr = POINTER ( amdsmi_wrapper . amdsmi_socket_handle ) ( )
2022-10-11 16:06:32 +02:00
_check_res (
2022-11-10 16:18:27 +01:00
amdsmi_wrapper . amdsmi_get_socket_handles (
ctypes . byref ( socket_count ) , null_ptr )
2022-10-11 16:06:32 +02:00
)
2022-11-10 16:18:27 +01:00
socket_handles = ( amdsmi_wrapper . amdsmi_socket_handle *
socket_count . value ) ( )
2022-10-11 16:06:32 +02:00
_check_res (
amdsmi_wrapper . amdsmi_get_socket_handles (
2023-03-06 06:20:21 -06:00
ctypes . byref ( socket_count ) , socket_handles )
2022-10-11 16:06:32 +02:00
)
sockets = [
amdsmi_wrapper . amdsmi_socket_handle ( socket_handles [ sock_idx ] )
for sock_idx in range ( socket_count . value )
]
return sockets
2025-06-11 17:19:02 -05:00
def amdsmi_get_cpusocket_handles ( ) - > List [ c_void_p ] :
2023-10-10 04:16:45 -04:00
"""
Function that gets cpu socket handles. Wraps the same named function call.
Parameters:
`None`.
Returns:
`List`: List containing all of the found cpu socket handles.
"""
2024-10-04 12:20:02 +00:00
cpu_count = ctypes . c_uint32 ( 0 )
2025-06-11 17:19:02 -05:00
null_ptr = POINTER ( amdsmi_wrapper . amdsmi_processor_handle ) ( )
2024-10-04 12:20:02 +00:00
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_handles (
ctypes . byref ( cpu_count ) , null_ptr )
)
proc_handles = ( amdsmi_wrapper . amdsmi_processor_handle *
cpu_count . value ) ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_handles (
ctypes . byref ( cpu_count ) , proc_handles )
)
cpu_handles = [
amdsmi_wrapper . amdsmi_processor_handle ( proc_handles [ sock_idx ] )
for sock_idx in range ( cpu_count . value )
]
2023-12-07 07:30:31 -08:00
return cpu_handles
2023-10-10 04:16:45 -04:00
2022-11-14 12:49:13 +01:00
def amdsmi_get_socket_info ( socket_handle ) :
if not isinstance ( socket_handle , amdsmi_wrapper . amdsmi_socket_handle ) :
raise AmdSmiParameterException (
socket_handle , amdsmi_wrapper . amdsmi_socket_handle )
2023-03-06 06:20:21 -06:00
socket_info = ctypes . create_string_buffer ( 128 )
2022-11-14 12:49:13 +01:00
2023-03-06 06:20:21 -06:00
_check_res (
amdsmi_wrapper . amdsmi_get_socket_info (
2023-06-01 14:46:21 +02:00
socket_handle , ctypes . c_size_t ( 128 ) , socket_info )
2023-03-06 06:20:21 -06:00
)
2023-01-19 15:33:01 +01:00
2023-03-06 06:20:21 -06:00
return socket_info . value . decode ( )
2022-10-11 16:06:32 +02:00
2023-12-07 07:30:31 -08:00
def amdsmi_get_processor_info ( processor_handle ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-10 04:16:45 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
processor_info = ctypes . create_string_buffer ( 128 )
2023-10-10 04:16:45 -04:00
_check_res (
2023-12-07 07:30:31 -08:00
amdsmi_wrapper . amdsmi_get_processor_info (
processor_handle , ctypes . c_size_t ( 128 ) , processor_info )
2023-10-10 04:16:45 -04:00
)
2023-12-07 07:30:31 -08:00
return processor_info . value . decode ( )
2025-06-11 17:19:02 -05:00
def amdsmi_get_processor_handles ( ) - > List [ c_void_p ] :
2022-11-14 12:49:13 +01:00
socket_handles = amdsmi_get_socket_handles ( )
2022-10-11 16:06:32 +02:00
devices = [ ]
for socket in socket_handles :
device_count = ctypes . c_uint32 ( )
2025-06-11 17:19:02 -05:00
null_ptr = POINTER ( amdsmi_wrapper . amdsmi_processor_handle ) ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-25 05:28:40 -05:00
amdsmi_wrapper . amdsmi_get_processor_handles (
2022-10-11 16:06:32 +02:00
socket ,
ctypes . byref ( device_count ) ,
null_ptr ,
)
)
2023-02-25 05:28:40 -05:00
processor_handles = (
2023-02-25 05:26:14 -05:00
amdsmi_wrapper . amdsmi_processor_handle * device_count . value ) ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-25 05:28:40 -05:00
amdsmi_wrapper . amdsmi_get_processor_handles (
2022-10-11 16:06:32 +02:00
socket ,
ctypes . byref ( device_count ) ,
2023-02-25 05:28:40 -05:00
processor_handles ,
2022-10-11 16:06:32 +02:00
)
)
devices . extend (
[
2023-02-25 05:28:40 -05:00
amdsmi_wrapper . amdsmi_processor_handle ( processor_handles [ dev_idx ] )
2022-10-11 16:06:32 +02:00
for dev_idx in range ( device_count . value )
]
)
return devices
2025-06-11 17:19:02 -05:00
def amdsmi_get_cpucore_handles ( ) - > List [ c_void_p ] :
2024-10-04 12:20:02 +00:00
cores_count = ctypes . c_uint32 ( 0 )
2025-06-11 17:19:02 -05:00
null_ptr = POINTER ( amdsmi_wrapper . amdsmi_processor_handle ) ( )
2024-10-04 12:20:02 +00:00
_check_res (
amdsmi_wrapper . amdsmi_get_cpucore_handles (
ctypes . byref ( cores_count ) , null_ptr )
)
proc_handles = ( amdsmi_wrapper . amdsmi_processor_handle *
cores_count . value ) ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpucore_handles (
ctypes . byref ( cores_count ) , proc_handles )
)
core_handles = [
amdsmi_wrapper . amdsmi_processor_handle ( proc_handles [ sock_idx ] )
for sock_idx in range ( cores_count . value )
]
2023-12-07 07:30:31 -08:00
return core_handles
2023-10-10 04:16:45 -04:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_cpu_hsmp_proto_ver ( processor_handle : processor_handle_t ) - > int :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-10 04:16:45 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-10 04:16:45 -04:00
)
proto_ver = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_hsmp_proto_ver (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( proto_ver )
2023-10-10 04:16:45 -04:00
)
)
return proto_ver . value
2025-06-11 17:19:02 -05:00
def amdsmi_get_cpu_smu_fw_version (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ) - > Dict [ str , int ] :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-10 04:16:45 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-10 04:16:45 -04:00
)
smu_fw = amdsmi_wrapper . amdsmi_smu_fw_version_t ( )
2023-12-07 07:30:31 -08:00
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_smu_fw_version ( processor_handle , smu_fw ) )
2023-10-10 04:16:45 -04:00
return {
" smu_fw_debug_ver_num " : smu_fw . debug ,
" smu_fw_minor_ver_num " : smu_fw . minor ,
" smu_fw_major_ver_num " : smu_fw . major
}
2022-10-11 16:06:32 +02:00
2025-06-11 17:19:02 -05:00
def amdsmi_get_cpu_hsmp_driver_version (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ) - > Dict [ str , int ] :
2025-01-30 01:15:02 +05:30
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
hsmp_driver_version = amdsmi_wrapper . amdsmi_hsmp_driver_version_t ( )
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_hsmp_driver_version ( processor_handle , hsmp_driver_version ) )
return {
" hsmp_driver_major_ver_num " : hsmp_driver_version . major ,
" hsmp_driver_minor_ver_num " : hsmp_driver_version . minor ,
}
2023-10-11 06:26:14 -04:00
def amdsmi_get_cpu_core_energy (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-10-11 06:26:14 -04:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
penergy = ctypes . c_uint64 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_core_energy (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( penergy )
2023-10-11 06:26:14 -04:00
)
)
2024-01-04 08:20:19 -05:00
return f " { float ( penergy . value * pow ( 10 , - 6 ) ) } J "
2023-10-11 06:26:14 -04:00
def amdsmi_get_cpu_socket_energy (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
penergy = ctypes . c_uint64 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_energy (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( penergy )
2023-10-11 06:26:14 -04:00
)
)
2024-01-04 08:20:19 -05:00
return f " { float ( penergy . value * pow ( 10 , - 6 ) ) } J "
2023-10-11 06:26:14 -04:00
2025-01-30 01:15:02 +05:30
def amdsmi_get_threads_per_core ( ) :
threads_per_core = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_threads_per_core (
ctypes . byref ( threads_per_core )
)
)
return threads_per_core . value
2023-10-11 06:26:14 -04:00
def amdsmi_get_cpu_prochot_status (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-10-11 06:26:14 -04:00
) - > int :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
prochot = ctypes . c_uint32 ( )
_check_res (
2023-12-07 07:30:31 -08:00
amdsmi_wrapper . amdsmi_get_cpu_prochot_status (
processor_handle , ctypes . byref ( prochot )
2023-10-11 06:26:14 -04:00
)
)
return prochot . value
def amdsmi_get_cpu_fclk_mclk (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-10-11 06:26:14 -04:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
fclk = ctypes . c_uint32 ( )
mclk = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_fclk_mclk (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( fclk ) , ctypes . byref ( mclk )
2023-10-11 06:26:14 -04:00
)
)
return {
2023-12-18 06:41:52 -05:00
" fclk " : f " { fclk . value } MHz " ,
" mclk " : f " { mclk . value } MHz "
2023-10-11 06:26:14 -04:00
}
def amdsmi_get_cpu_cclk_limit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
cclk = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_cclk_limit (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( cclk )
2023-10-11 06:26:14 -04:00
)
)
2023-12-18 06:41:52 -05:00
return f " { cclk . value } MHz "
2023-10-11 06:26:14 -04:00
def amdsmi_get_cpu_socket_current_active_freq_limit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2024-01-03 05:51:30 -05:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
2025-06-11 17:19:02 -05:00
amdsmi_wrapper . amdsmi_get_cpu_socket_current_active_freq_limit . argtypes = [ amdsmi_wrapper . amdsmi_processor_handle , POINTER ( ctypes . c_uint16 ) , POINTER ( ctypes . c_char_p * len ( amdsmi_wrapper . amdsmi_hsmp_freqlimit_src_names ) ) ]
2023-10-11 06:26:14 -04:00
freq = ctypes . c_uint16 ( )
2024-02-16 12:27:51 +00:00
src_type = ( ctypes . c_char_p * len ( amdsmi_wrapper . amdsmi_hsmp_freqlimit_src_names ) ) ( )
2024-01-03 05:51:30 -05:00
2023-10-11 06:26:14 -04:00
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_current_active_freq_limit (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( freq ) , src_type
2023-10-11 06:26:14 -04:00
)
)
2024-02-16 12:27:51 +00:00
freq_src = [ ]
for names in src_type :
if names is not None :
freq_src . append ( names . decode ( ' utf-8 ' ) )
2024-01-03 05:51:30 -05:00
return {
" freq " : f " { freq . value } MHz " ,
2024-02-16 12:27:51 +00:00
" freq_src " : f " { freq_src } "
2024-01-03 05:51:30 -05:00
}
2023-10-11 06:26:14 -04:00
def amdsmi_get_cpu_socket_freq_range (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-10-11 06:26:14 -04:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
freq_max = ctypes . c_uint16 ( )
freq_min = ctypes . c_uint16 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_freq_range (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( freq_max ) , ctypes . byref ( freq_min )
2023-10-11 06:26:14 -04:00
)
)
return {
2023-12-18 06:41:52 -05:00
" max_socket_freq " : f " { freq_max . value } MHz " ,
" min_socket_freq " : f " { freq_min . value } MHz "
2023-10-11 06:26:14 -04:00
}
def amdsmi_get_cpu_core_current_freq_limit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-10-11 06:26:14 -04:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
freq = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_core_current_freq_limit (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( freq )
2023-10-11 06:26:14 -04:00
)
)
2023-12-18 06:41:52 -05:00
return f " { freq . value } MHz "
2023-10-11 06:26:14 -04:00
def amdsmi_get_cpu_socket_power (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-10-11 06:26:14 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-10-11 06:26:14 -04:00
)
ppower = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_power (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( ppower )
2023-10-11 06:26:14 -04:00
)
)
2023-12-18 06:41:52 -05:00
return f " { ppower . value } mW "
2023-10-11 06:26:14 -04:00
2023-11-02 07:23:31 -04:00
def amdsmi_get_cpu_socket_power_cap (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-11-02 07:23:31 -04:00
) - > int :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-02 07:23:31 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-02 07:23:31 -04:00
)
pcap = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_power_cap (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( pcap )
2023-11-02 07:23:31 -04:00
)
)
2025-10-06 14:50:00 -05:00
# in mW
return pcap . value
2023-11-02 07:23:31 -04:00
def amdsmi_get_cpu_socket_power_cap_max (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-02 07:23:31 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-02 07:23:31 -04:00
)
pmax = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_power_cap_max (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( pmax )
2023-11-02 07:23:31 -04:00
)
)
2023-12-18 06:41:52 -05:00
return f " { pmax . value } mW "
2023-11-02 07:23:31 -04:00
def amdsmi_get_cpu_pwr_svi_telemetry_all_rails (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-02 07:23:31 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-02 07:23:31 -04:00
)
power = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_pwr_svi_telemetry_all_rails (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( power )
2023-11-02 07:23:31 -04:00
)
)
2023-12-18 06:41:52 -05:00
return f " { power . value } mW "
2023-11-02 07:23:31 -04:00
def amdsmi_set_cpu_socket_power_cap (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , power_cap : int
2023-11-02 07:23:31 -04:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-02 07:23:31 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-02 07:23:31 -04:00
)
if not isinstance ( power_cap , int ) :
raise AmdSmiParameterException ( power_cap , int )
2023-12-07 07:30:31 -08:00
2025-10-06 14:50:00 -05:00
power_cap_32 = ctypes . c_uint32 ( power_cap )
2023-11-02 07:23:31 -04:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_socket_power_cap (
2025-10-06 14:50:00 -05:00
processor_handle , power_cap_32 )
2023-11-02 07:23:31 -04:00
)
def amdsmi_set_cpu_pwr_efficiency_mode (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , mode : int
2023-11-02 07:23:31 -04:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-02 07:23:31 -04:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-02 07:23:31 -04:00
)
if not isinstance ( mode , int ) :
raise AmdSmiParameterException ( mode , int )
2025-10-06 14:50:00 -05:00
mode_8 = ctypes . c_uint8 ( mode )
2023-11-02 07:23:31 -04:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_pwr_efficiency_mode (
2025-10-06 14:50:00 -05:00
processor_handle , mode_8 )
2023-11-02 07:23:31 -04:00
)
2023-11-07 01:32:53 -05:00
def amdsmi_get_cpu_core_boostlimit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-11-07 01:32:53 -05:00
) - > int :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
boostlimit = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_core_boostlimit (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( boostlimit )
2023-11-07 01:32:53 -05:00
)
)
2025-10-06 14:50:00 -05:00
# In MHz"
return boostlimit . value
2023-11-07 01:32:53 -05:00
def amdsmi_get_cpu_socket_c0_residency (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-07 01:32:53 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-07 01:32:53 -05:00
)
c0_residency = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_c0_residency (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( c0_residency )
2023-11-07 01:32:53 -05:00
)
)
2023-12-18 06:41:52 -05:00
return f " { c0_residency . value } % "
2023-11-07 01:32:53 -05:00
def amdsmi_set_cpu_core_boostlimit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , boostlimit : int
2023-11-07 01:32:53 -05:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( boostlimit , int ) :
raise AmdSmiParameterException ( boostlimit , int )
2025-10-06 14:50:00 -05:00
boostlimit_32 = ctypes . c_uint32 ( boostlimit )
2023-11-07 01:32:53 -05:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_core_boostlimit (
2025-10-06 14:50:00 -05:00
processor_handle , boostlimit_32 )
2023-11-07 01:32:53 -05:00
)
def amdsmi_set_cpu_socket_boostlimit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , boostlimit : int
2023-11-07 01:32:53 -05:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-07 01:32:53 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-07 01:32:53 -05:00
)
if not isinstance ( boostlimit , int ) :
raise AmdSmiParameterException ( boostlimit , int )
2025-10-06 14:50:00 -05:00
boostlimit_32 = ctypes . c_uint32 ( boostlimit )
2023-11-07 01:32:53 -05:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_socket_boostlimit (
2025-10-06 14:50:00 -05:00
processor_handle , boostlimit_32 )
2023-11-07 01:32:53 -05:00
)
2025-10-06 14:50:00 -05:00
def amdsmi_get_cpu_ddr_bw ( processor_handle : processor_handle_t ) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-07 01:32:53 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-07 01:32:53 -05:00
)
ddr_bw = amdsmi_wrapper . amdsmi_ddr_bw_metrics_t ( )
2023-12-07 07:30:31 -08:00
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_ddr_bw ( processor_handle , ddr_bw ) )
2023-11-07 01:32:53 -05:00
return {
2023-12-18 06:41:52 -05:00
" ddr_bw_max_bw " : f " { ddr_bw . max_bw } Gbps " ,
" ddr_bw_utilized_bw " : f " { ddr_bw . utilized_bw } Gbps " ,
" ddr_bw_utilized_pct " : f " { ddr_bw . utilized_pct } % "
2023-11-07 01:32:53 -05:00
}
2023-11-09 10:12:46 -05:00
def amdsmi_get_cpu_socket_temperature (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
) - > str :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-09 10:12:46 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-09 10:12:46 -05:00
)
ptmon = ctypes . c_uint32 ( )
2023-12-07 07:30:31 -08:00
2023-11-09 10:12:46 -05:00
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_temperature (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( ptmon )
2023-11-09 10:12:46 -05:00
)
)
2023-12-18 06:41:52 -05:00
return f " { ptmon . value } Degrees C "
2023-11-09 10:12:46 -05:00
def amdsmi_get_cpu_dimm_temp_range_and_refresh_rate (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
dimm_addr : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-09 10:12:46 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-09 10:12:46 -05:00
)
if not isinstance ( dimm_addr , int ) :
raise AmdSmiParameterException ( dimm_addr , int )
2025-10-06 14:50:00 -05:00
dimm_addr_8 = ctypes . c_uint8 ( dimm_addr )
2023-11-09 10:12:46 -05:00
dimm = amdsmi_wrapper . amdsmi_temp_range_refresh_rate_t ( )
2024-01-03 06:49:20 -05:00
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_dimm_temp_range_and_refresh_rate ( processor_handle ,
2025-10-06 14:50:00 -05:00
dimm_addr_8 ,
2024-01-03 06:49:20 -05:00
ctypes . byref ( dimm ) ) )
2023-11-09 10:12:46 -05:00
return {
" dimm_temperature_range " : dimm . range ,
" dimm_refresh_rate " : dimm . ref_rate
}
def amdsmi_get_cpu_dimm_power_consumption (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
dimm_addr : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-09 10:12:46 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-09 10:12:46 -05:00
)
if not isinstance ( dimm_addr , int ) :
raise AmdSmiParameterException ( dimm_addr , int )
2025-10-06 14:50:00 -05:00
dimm_addr_8 = ctypes . c_uint8 ( dimm_addr )
2023-11-09 10:12:46 -05:00
dimm = amdsmi_wrapper . amdsmi_dimm_power_t ( )
2024-01-03 06:49:20 -05:00
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_dimm_power_consumption ( processor_handle ,
2025-10-06 14:50:00 -05:00
dimm_addr_8 ,
2024-01-03 06:49:20 -05:00
ctypes . byref ( dimm ) ) )
2023-11-09 10:12:46 -05:00
return {
2023-12-18 06:41:52 -05:00
" dimm_power_consumed " : f " { dimm . power } mW " ,
" dimm_power_update_rate " : f " { dimm . update_rate } ms " ,
2023-11-09 10:12:46 -05:00
" dimm_dimm_addr " : dimm . dimm_addr
}
def amdsmi_get_cpu_dimm_thermal_sensor (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
dimm_addr : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-09 10:12:46 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-09 10:12:46 -05:00
)
if not isinstance ( dimm_addr , int ) :
raise AmdSmiParameterException ( dimm_addr , int )
2025-10-06 14:50:00 -05:00
dimm_addr_8 = ctypes . c_uint8 ( dimm_addr )
2023-11-09 10:12:46 -05:00
dimm_thermal = amdsmi_wrapper . amdsmi_dimm_thermal_t ( )
2024-01-03 06:49:20 -05:00
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_dimm_thermal_sensor ( processor_handle ,
2025-10-06 14:50:00 -05:00
dimm_addr_8 ,
2024-01-03 06:49:20 -05:00
ctypes . byref ( dimm_thermal ) ) )
2023-11-09 10:12:46 -05:00
return {
" dimm_thermal_sensor_value " : dimm_thermal . sensor ,
2023-12-18 06:41:52 -05:00
" dimm_thermal_update_rate " : f " { dimm_thermal . update_rate } ms " ,
2023-11-09 10:12:46 -05:00
" dimm_thermal_dimm_addr " : dimm_thermal . dimm_addr ,
2023-12-18 06:41:52 -05:00
" dimm_thermal_temperature " : f " { dimm_thermal . temp } Degrees C "
2023-11-09 10:12:46 -05:00
}
def amdsmi_set_cpu_xgmi_width (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , min_width : int , max_width : int
2023-11-09 10:12:46 -05:00
) :
2023-12-07 07:30:31 -08:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-11-09 10:12:46 -05:00
raise AmdSmiParameterException (
2023-12-07 07:30:31 -08:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-11-09 10:12:46 -05:00
)
if not isinstance ( min_width , int ) :
raise AmdSmiParameterException ( min_width , int )
if not isinstance ( max_width , int ) :
raise AmdSmiParameterException ( max_width , int )
2025-10-06 14:50:00 -05:00
min_width_8 = ctypes . c_uint8 ( min_width )
max_width_8 = ctypes . c_uint8 ( max_width )
2023-11-09 10:12:46 -05:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_xgmi_width (
2025-10-06 14:50:00 -05:00
processor_handle , min_width_8 , max_width_8 )
2023-12-07 07:30:31 -08:00
)
def amdsmi_set_cpu_gmi3_link_width_range (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
min_link_width : int , max_link_width : int
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( min_link_width , int ) :
raise AmdSmiParameterException ( min_link_width , int )
if not isinstance ( max_link_width , int ) :
raise AmdSmiParameterException ( max_link_width , int )
2025-10-06 14:50:00 -05:00
min_link_width_8 = ctypes . c_uint8 ( min_link_width )
max_link_width_8 = ctypes . c_uint8 ( max_link_width )
2023-12-07 07:30:31 -08:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_gmi3_link_width_range (
2025-10-06 14:50:00 -05:00
processor_handle , min_link_width_8 , max_link_width_8 )
2023-12-07 07:30:31 -08:00
)
def amdsmi_cpu_apb_enable (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-12-07 07:30:31 -08:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res (
amdsmi_wrapper . amdsmi_cpu_apb_enable ( processor_handle )
)
def amdsmi_cpu_apb_disable (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
pstate : int
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( pstate , int ) :
raise AmdSmiParameterException ( pstate , int )
2025-10-06 14:50:00 -05:00
pstate_8 = ctypes . c_uint8 ( pstate )
2023-12-07 07:30:31 -08:00
_check_res (
amdsmi_wrapper . amdsmi_cpu_apb_disable (
2025-10-06 14:50:00 -05:00
processor_handle , pstate_8 )
2023-12-07 07:30:31 -08:00
)
def amdsmi_set_cpu_socket_lclk_dpm_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
nbio_id : int , min_val : int , max_val : int
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( nbio_id , int ) :
raise AmdSmiParameterException ( nbio_id , int )
if not isinstance ( min_val , int ) :
raise AmdSmiParameterException ( min_val , int )
if not isinstance ( max_val , int ) :
raise AmdSmiParameterException ( max_val , int )
2025-10-06 14:50:00 -05:00
nbio_id_8 = ctypes . c_uint8 ( nbio_id )
min_val_8 = ctypes . c_uint8 ( min_val )
max_val_8 = ctypes . c_uint8 ( max_val )
2023-12-07 07:30:31 -08:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_socket_lclk_dpm_level (
2025-10-06 14:50:00 -05:00
processor_handle , nbio_id_8 , min_val_8 , max_val_8 )
2023-12-07 07:30:31 -08:00
)
def amdsmi_get_cpu_socket_lclk_dpm_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
nbio_id : int
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( nbio_id , int ) :
raise AmdSmiParameterException ( nbio_id , int )
2025-10-06 14:50:00 -05:00
nbio_id_8 = ctypes . c_uint8 ( nbio_id )
2023-12-07 07:30:31 -08:00
dpm_level = amdsmi_wrapper . amdsmi_dpm_level_t ( )
2025-10-06 14:50:00 -05:00
_check_res ( amdsmi_wrapper . amdsmi_get_cpu_socket_lclk_dpm_level ( processor_handle , nbio_id_8 , dpm_level ) )
2023-12-07 07:30:31 -08:00
return {
2024-01-02 03:07:34 -05:00
" nbio_max_dpm_level " : dpm_level . max_dpm_level ,
" nbio_min_dpm_level " : dpm_level . min_dpm_level
2023-12-07 07:30:31 -08:00
}
def amdsmi_set_cpu_pcie_link_rate (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
rate_ctrl : int
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( rate_ctrl , int ) :
raise AmdSmiParameterException ( rate_ctrl , int )
2025-10-06 14:50:00 -05:00
rate_ctrl_8 = ctypes . c_uint8 ( rate_ctrl )
prev_mode_8 = ctypes . c_uint8 ( )
2023-12-07 07:30:31 -08:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_pcie_link_rate (
2025-10-06 14:50:00 -05:00
processor_handle , rate_ctrl_8 , ctypes . byref ( prev_mode_8 ) )
2023-12-07 07:30:31 -08:00
)
2025-10-06 14:50:00 -05:00
return f " { prev_mode_8 . value } "
2024-02-22 10:17:03 +00:00
2023-12-07 07:30:31 -08:00
def amdsmi_set_cpu_df_pstate_range (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
max_pstate : int , min_pstate : int
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( max_pstate , int ) :
raise AmdSmiParameterException ( max_pstate , int )
if not isinstance ( min_pstate , int ) :
raise AmdSmiParameterException ( min_pstate , int )
2025-10-06 14:50:00 -05:00
max_pstate_8 = ctypes . c_uint8 ( max_pstate )
min_pstate_8 = ctypes . c_uint8 ( min_pstate )
2023-12-07 07:30:31 -08:00
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_df_pstate_range (
2025-10-06 14:50:00 -05:00
processor_handle , max_pstate_8 , min_pstate_8 ) )
2023-12-07 07:30:31 -08:00
def amdsmi_get_cpu_current_io_bandwidth (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
encoding : int ,
link_name : str
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( encoding , int ) :
raise AmdSmiParameterException ( encoding , int )
if not isinstance ( link_name , str ) :
raise AmdSmiParameterException ( link_name , str )
2023-12-07 07:30:31 -08:00
link = amdsmi_wrapper . amdsmi_link_id_bw_type_t ( )
link . bw_type = ctypes . c_uint32 ( encoding )
link . link_name = ctypes . create_string_buffer ( link_name . encode ( ' utf-8 ' ) )
io_bw = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_current_io_bandwidth (
processor_handle , link , ctypes . byref ( io_bw ) )
)
2023-12-18 06:41:52 -05:00
return f " { io_bw . value } Mbps "
2023-12-07 07:30:31 -08:00
def amdsmi_get_cpu_current_xgmi_bw (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-12-07 07:30:31 -08:00
encoding : int ,
link_name : str
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( encoding , int ) :
raise AmdSmiParameterException ( encoding , int )
if not isinstance ( link_name , str ) :
raise AmdSmiParameterException ( link_name , str )
2023-12-07 07:30:31 -08:00
link = amdsmi_wrapper . amdsmi_link_id_bw_type_t ( )
link . bw_type = ctypes . c_uint32 ( encoding )
link . link_name = ctypes . create_string_buffer ( link_name . encode ( ' utf-8 ' ) )
xgmi_bw = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_current_xgmi_bw (
processor_handle , link , ctypes . byref ( xgmi_bw ) )
2023-11-09 10:12:46 -05:00
)
2023-12-18 06:41:52 -05:00
return f " { xgmi_bw . value } Mbps "
2023-12-07 07:30:31 -08:00
2024-02-06 17:40:10 -06:00
def amdsmi_get_hsmp_metrics_table_version (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-12-07 07:30:31 -08:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
metric_tbl_version = ctypes . c_uint32 ( )
_check_res (
2024-02-06 17:40:10 -06:00
amdsmi_wrapper . amdsmi_get_hsmp_metrics_table_version (
2023-12-07 07:30:31 -08:00
processor_handle , ctypes . byref ( metric_tbl_version ) )
)
return metric_tbl_version . value
2026-01-06 10:37:07 -06:00
def amdsmi_set_cpu_rail_isofreq_policy (
processor_handle : processor_handle_t ,
value : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res (
amdsmi_wrapper . amdsmi_set_cpu_rail_isofreq_policy ( processor_handle , value )
)
def amdsmi_get_cpu_rail_isofreq_policy (
processor_handle : processor_handle_t ,
) - > int :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
cpurailiso = ctypes . c_uint8 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_rail_isofreq_policy (
processor_handle , ctypes . byref ( cpurailiso )
)
)
return cpurailiso . value
def amdsmi_get_dfc_ctrl (
processor_handle : processor_handle_t ,
) - > int :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
dfc_ctrl = ctypes . c_uint8 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_dfc_ctrl (
processor_handle , ctypes . byref ( dfc_ctrl )
)
)
return dfc_ctrl . value
def amdsmi_set_dfc_ctrl (
processor_handle : processor_handle_t ,
value : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res (
amdsmi_wrapper . amdsmi_set_dfc_ctrl ( processor_handle , value )
)
2024-01-01 09:21:38 -05:00
# Get 2's complement of 32 bit unsigned integer
def check_msb_32 ( num ) :
msb = 1 << ( NO_OF_32BITS - 1 )
2025-10-06 14:50:00 -05:00
# If msb = 1 , then take 2's complement of the number
2024-01-01 09:21:38 -05:00
if num & msb :
num = ~ num + 1
2025-10-06 14:50:00 -05:00
return num
2024-01-01 09:21:38 -05:00
# Get 2's complement of 64 bit unsigned integer
def check_msb_64 ( num ) :
msb = 1 << ( NO_OF_64BITS - 1 )
2025-10-06 14:50:00 -05:00
# If msb = 1 , then take 2's complement of the number
2024-01-01 09:21:38 -05:00
if num & msb :
num = ~ num + 1
2025-10-06 14:50:00 -05:00
return num
2024-01-01 09:21:38 -05:00
2024-02-06 17:40:10 -06:00
def amdsmi_get_hsmp_metrics_table (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-12-07 07:30:31 -08:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2024-02-06 17:40:10 -06:00
mtbl = amdsmi_wrapper . amdsmi_hsmp_metrics_table_t ( )
2023-12-07 07:30:31 -08:00
2025-10-06 14:50:00 -05:00
# Encodings for the metric table defined for hsmp
2024-01-01 09:21:38 -05:00
fraction_q10 = 1 / math . pow ( 2 , 10 )
fraction_uq10 = fraction_q10
fraction_uq16 = 1 / math . pow ( 2 , 16 )
_check_res (
2024-02-06 17:40:10 -06:00
amdsmi_wrapper . amdsmi_get_hsmp_metrics_table (
2024-01-01 09:21:38 -05:00
processor_handle , mtbl
)
)
2023-12-07 07:30:31 -08:00
2024-02-01 04:59:33 -05:00
rawtime = int ( mtbl . timestamp )
rawtime = time ( )
timeinfo = localtime ( rawtime )
2023-12-07 07:30:31 -08:00
return {
" mtbl_accumulation_counter " : mtbl . accumulation_counter ,
2024-01-02 07:47:01 -05:00
" mtbl_max_socket_temperature " : f " { round ( check_msb_32 ( mtbl . max_socket_temperature ) * fraction_q10 , 3 ) } °C " ,
" mtbl_max_vr_temperature " : f " { round ( check_msb_32 ( mtbl . max_vr_temperature ) * fraction_q10 , 3 ) } °C " ,
" mtbl_max_hbm_temperature " : f " { round ( check_msb_32 ( mtbl . max_hbm_temperature ) * fraction_q10 , 3 ) } °C " ,
" mtbl_max_socket_temperature_acc " : f " { round ( check_msb_64 ( mtbl . max_socket_temperature_acc ) * fraction_q10 , 3 ) } °C " ,
" mtbl_max_vr_temperature_acc " : f " { round ( check_msb_64 ( mtbl . max_vr_temperature_acc ) * fraction_q10 , 3 ) } °C " ,
" mtbl_max_hbm_temperature_acc " : f " { round ( check_msb_64 ( mtbl . max_hbm_temperature_acc ) * fraction_q10 , 3 ) } °C " ,
" mtbl_socket_power_limit " : f " { round ( mtbl . socket_power_limit * fraction_uq10 , 3 ) } W " ,
" mtbl_max_socket_power_limit " : f " { round ( mtbl . max_socket_power_limit * fraction_uq10 , 3 ) } W " ,
" mtbl_socket_power " : f " { round ( mtbl . socket_power * fraction_uq10 , 3 ) } W " ,
" mtbl_timestamp_raw " : mtbl . timestamp ,
2024-02-01 04:59:33 -05:00
" mtbl_timestamp_readable " : f " { asctime ( timeinfo ) } " ,
2024-01-02 07:47:01 -05:00
" mtbl_socket_energy_acc " : f " { round ( ( mtbl . socket_energy_acc * fraction_uq16 ) / KILO , 3 ) } kJ " ,
" mtbl_ccd_energy_acc " : f " { round ( ( mtbl . ccd_energy_acc * fraction_uq16 ) / KILO , 3 ) } kJ " ,
" mtbl_xcd_energy_acc " : f " { round ( ( mtbl . xcd_energy_acc * fraction_uq16 ) / KILO , 3 ) } kJ " ,
" mtbl_aid_energy_acc " : f " { round ( ( mtbl . aid_energy_acc * fraction_uq16 ) / KILO , 3 ) } kJ " ,
" mtbl_hbm_energy_acc " : f " { round ( ( mtbl . hbm_energy_acc * fraction_uq16 ) / KILO , 3 ) } kJ " ,
" mtbl_cclk_frequency_limit " : f " { round ( mtbl . cclk_frequency_limit * fraction_uq10 , 3 ) } GHz " ,
" mtbl_gfxclk_frequency_limit " : f " { round ( mtbl . gfxclk_frequency_limit * fraction_uq10 , 3 ) } MHz " ,
" mtbl_fclk_frequency " : f " { round ( mtbl . fclk_frequency * fraction_uq10 , 3 ) } MHz " ,
" mtbl_uclk_frequency " : f " { round ( mtbl . uclk_frequency * fraction_uq10 , 3 ) } MHz " ,
" mtbl_socclk_frequency " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . socclk_frequency ) ] } MHz " ,
" mtbl_vclk_frequency " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . vclk_frequency ) ] } MHz " ,
" mtbl_dclk_frequency " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . dclk_frequency ) ] } MHz " ,
" mtbl_lclk_frequency " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . lclk_frequency ) ] } MHz " ,
" mtbl_fclk_frequency_table " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . fclk_frequency_table ) ] } MHz " ,
" mtbl_uclk_frequency_table " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . uclk_frequency_table ) ] } MHz " ,
" mtbl_socclk_frequency_table " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . socclk_frequency_table ) ] } MHz " ,
" mtbl_vclk_frequency_table " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . vclk_frequency_table ) ] } MHz " ,
" mtbl_dclk_frequency_table " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . dclk_frequency_table ) ] } MHz " ,
" mtbl_lclk_frequency_table " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . lclk_frequency_table ) ] } MHz " ,
" mtbl_cclk_frequency_acc " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . cclk_frequency_acc ) ] } GHz " ,
" mtbl_gfxclk_frequency_acc " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . gfxclk_frequency_acc ) ] } MHz " ,
" mtbl_gfxclk_frequency " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . gfxclk_frequency ) ] } MHz " ,
" mtbl_max_cclk_frequency " : f " { round ( mtbl . max_cclk_frequency * fraction_uq10 , 3 ) } GHz " ,
" mtbl_min_cclk_frequency " : f " { round ( mtbl . min_cclk_frequency * fraction_uq10 , 3 ) } GHz " ,
" mtbl_max_gfxclk_frequency " : f " { round ( mtbl . max_gfxclk_frequency * fraction_uq10 , 3 ) } MHz " ,
" mtbl_min_gfxclk_frequency " : f " { round ( mtbl . min_gfxclk_frequency * fraction_uq10 , 3 ) } MHz " ,
2023-12-07 07:30:31 -08:00
" mtbl_max_lclk_dpm_range " : mtbl . max_lclk_dpm_range ,
" mtbl_min_lclk_dpm_range " : mtbl . min_lclk_dpm_range ,
2024-01-01 09:21:38 -05:00
" mtbl_xgmi_width " : round ( mtbl . xgmi_width * fraction_uq10 , 3 ) ,
2024-01-02 07:47:01 -05:00
" mtbl_xgmi_bitrate " : f " { round ( mtbl . xgmi_bitrate * fraction_uq10 , 3 ) } Gbps " ,
" mtbl_xgmi_read_bandwidth_acc " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . xgmi_read_bandwidth_acc ) ] } Gbps " ,
" mtbl_xgmi_write_bandwidth_acc " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . xgmi_write_bandwidth_acc ) ] } Gbps " ,
" mtbl_socket_c0_residency " : f " { round ( mtbl . socket_c0_residency * fraction_uq10 , 3 ) } % " ,
" mtbl_socket_gfx_busy " : f " { round ( mtbl . socket_gfx_busy * fraction_uq10 , 3 ) } % " ,
2024-02-06 07:13:16 +00:00
" mtbl_hbm_bandwidth_utilization " : f " { round ( mtbl . dram_bandwidth_utilization * fraction_uq10 , 3 ) } % " ,
2024-01-01 09:21:38 -05:00
" mtbl_socket_c0_residency_acc " : round ( mtbl . socket_c0_residency_acc * fraction_uq10 , 3 ) ,
" mtbl_socket_gfx_busy_acc " : round ( mtbl . socket_gfx_busy_acc * fraction_uq10 , 3 ) ,
2024-02-06 07:13:16 +00:00
" mtbl_hbm_bandwidth_acc " : f " { round ( mtbl . dram_bandwidth_acc * fraction_uq10 , 3 ) } Gbps " ,
" mtbl_max_hbm_bandwidth " : f " { round ( mtbl . max_dram_bandwidth * fraction_uq10 , 3 ) } Gbps " ,
2024-01-01 09:21:38 -05:00
" mtbl_dram_bandwidth_utilization_acc " : round ( mtbl . dram_bandwidth_utilization_acc * fraction_uq10 , 3 ) ,
2024-01-02 07:47:01 -05:00
" mtbl_pcie_bandwidth_acc " : f " { [ round ( x * fraction_uq10 , 3 ) for x in list ( mtbl . pcie_bandwidth_acc ) ] } Gbps " ,
2023-12-07 07:30:31 -08:00
" mtbl_prochot_residency_acc " : mtbl . prochot_residency_acc ,
" mtbl_ppt_residency_acc " : mtbl . ppt_residency_acc ,
" mtbl_socket_thm_residency_acc " : mtbl . socket_thm_residency_acc ,
" mtbl_vr_thm_residency_acc " : mtbl . vr_thm_residency_acc ,
2025-01-08 22:07:23 -06:00
" mtbl_hbm_thm_residency_acc " : mtbl . hbm_thm_residency_acc ,
2023-12-07 07:30:31 -08:00
}
def amdsmi_first_online_core_on_cpu_socket (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-12-07 07:30:31 -08:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
pcore_ind = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_first_online_core_on_cpu_socket (
processor_handle , ctypes . byref ( pcore_ind ) )
)
return pcore_ind . value
2024-02-02 01:29:46 -05:00
def amdsmi_get_cpu_family ( ) :
family = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_family ( ctypes . byref ( family ) )
)
return family . value
def amdsmi_get_cpu_model ( ) :
model = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_model ( ctypes . byref ( model ) )
)
return model . value
2025-03-29 10:21:39 +08:00
def amdsmi_get_cpu_model_name (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2025-03-29 10:21:39 +08:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
cpu_info = amdsmi_wrapper . amdsmi_cpu_info_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_model_name (
processor_handle , cpu_info
)
)
return f " { cpu_info . model_name } "
2025-06-11 17:19:02 -05:00
def amdsmi_get_cpu_cores_per_socket ( sock_count : ctypes . c_uint32 ) :
2025-05-20 11:36:09 +05:30
cps = amdsmi_wrapper . amdsmi_sock_info_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_cores_per_socket ( sock_count , cps )
)
return { " socket_id " : cps . socket_id ,
" cores_per_socket " : cps . cores_per_socket
}
def amdsmi_get_cpu_socket_count ( ) :
sock_count = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_socket_count ( ctypes . byref ( sock_count ) )
)
return sock_count . value
2023-06-01 14:46:21 +02:00
def amdsmi_init ( flag = AmdSmiInitFlags . INIT_AMD_GPUS ) :
2022-11-09 16:17:43 +01:00
if not isinstance ( flag , AmdSmiInitFlags ) :
raise AmdSmiParameterException ( flag , AmdSmiInitFlags )
_check_res ( amdsmi_wrapper . amdsmi_init ( flag ) )
2022-10-11 16:06:32 +02:00
2022-11-09 16:17:43 +01:00
def amdsmi_shut_down ( ) :
2022-10-11 16:06:32 +02:00
_check_res ( amdsmi_wrapper . amdsmi_shut_down ( ) )
2023-02-25 05:30:19 -05:00
def amdsmi_get_processor_type (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
) - > Dict [ str , str ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2023-02-25 05:30:19 -05:00
dev_type = amdsmi_wrapper . processor_type_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-25 05:30:19 -05:00
amdsmi_wrapper . amdsmi_get_processor_type (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( dev_type ) )
2022-10-11 16:06:32 +02:00
)
2023-06-01 14:46:21 +02:00
return {
" processor_type " : AmdSmiProcessorType ( dev_type . value ) . name
}
2022-10-11 16:06:32 +02:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_device_bdf ( processor_handle : processor_handle_t ) - > str :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
bdf_info = amdsmi_wrapper . amdsmi_bdf_t ( )
_check_res (
2023-02-25 06:38:47 -05:00
amdsmi_wrapper . amdsmi_get_gpu_device_bdf (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( bdf_info ) )
2022-10-11 16:06:32 +02:00
)
return _format_bdf ( bdf_info )
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_device_uuid ( processor_handle : processor_handle_t ) - > str :
2025-03-07 09:09:12 -06:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
uuid = ctypes . create_string_buffer ( AMDSMI_GPU_UUID_SIZE )
uuid_length = ctypes . c_uint32 ( )
uuid_length . value = AMDSMI_GPU_UUID_SIZE
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_device_uuid (
processor_handle , ctypes . byref ( uuid_length ) , uuid
)
)
return uuid . value . decode ( " utf-8 " )
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_enumeration_info ( processor_handle : processor_handle_t ) - > Dict [ str , Any ] :
2025-03-07 09:09:12 -06:00
"""
Retrieves GPU enumeration information including DRM card ID, DRM render ID, HIP ID, and HIP UUID.
Parameters:
2025-10-06 14:50:00 -05:00
processor_handle (amdsmi_processor_handle_t): The processor handle.
2025-03-07 09:09:12 -06:00
Returns:
Dict[str, Any]: A dictionary containing the retrieved enumeration information.
Raises:
AmdSmiParameterException: If the input parameters are invalid.
"""
# Validate the processor handle
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
# Create an instance of the enumeration info struct
enumeration_info = amdsmi_wrapper . amdsmi_enumeration_info_t ( )
# Call the C function to populate the struct
status = amdsmi_wrapper . amdsmi_get_gpu_enumeration_info ( processor_handle , ctypes . byref ( enumeration_info ) )
2025-04-12 01:54:57 -05:00
2025-03-07 09:09:12 -06:00
# Validate the status result
_check_res ( status )
# Convert the struct fields into a dictionary and return
enumeration_info = {
" drm_render " : _validate_if_max_uint ( enumeration_info . drm_render , MaxUIntegerTypes . UINT32_T ) ,
2025-03-08 16:08:48 -06:00
" drm_card " : _validate_if_max_uint ( enumeration_info . drm_card , MaxUIntegerTypes . UINT32_T ) ,
2025-03-07 09:09:12 -06:00
" hsa_id " : _validate_if_max_uint ( enumeration_info . hsa_id , MaxUIntegerTypes . UINT32_T ) ,
" hip_id " : _validate_if_max_uint ( enumeration_info . hip_id , MaxUIntegerTypes . UINT32_T ) ,
" hip_uuid " : enumeration_info . hip_uuid . decode ( ' utf-8 ' )
}
return enumeration_info
2025-05-20 11:36:09 +05:30
def amdsmi_get_cpu_affinity_with_scope (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-05-20 11:36:09 +05:30
scope : AmdSmiAffinityScope
) - > List [ int ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( scope , AmdSmiAffinityScope ) :
raise AmdSmiParameterException ( scope , AmdSmiAffinityScope )
socket_count = amdsmi_get_cpu_socket_count ( )
sock_info = amdsmi_get_cpu_cores_per_socket ( socket_count )
core_count = sock_info [ ' cores_per_socket ' ]
2025-10-27 14:43:31 -05:00
2025-05-20 11:36:09 +05:30
size = ctypes . c_uint32 ( 0 )
size = ( socket_count * core_count ) / ( ctypes . sizeof ( ctypes . c_uint64 ) * 8 )
size = int ( math . ceil ( size ) )
size = ctypes . c_uint32 ( size )
cpu_set = ( ctypes . c_uint64 * size . value ) ( )
2025-10-27 14:43:31 -05:00
2025-05-20 11:36:09 +05:30
_check_res (
amdsmi_wrapper . amdsmi_get_cpu_affinity_with_scope (
processor_handle , size , cpu_set , scope )
)
return cpu_set
2025-10-27 14:43:31 -05:00
2023-02-25 07:22:16 -05:00
def amdsmi_get_gpu_asic_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2024-02-07 05:40:25 -06:00
asic_info_struct = amdsmi_wrapper . amdsmi_asic_info_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-25 07:22:16 -05:00
amdsmi_wrapper . amdsmi_get_gpu_asic_info (
2024-02-07 05:40:25 -06:00
processor_handle , ctypes . byref ( asic_info_struct ) )
2022-10-11 16:06:32 +02:00
)
2024-11-08 17:31:25 -06:00
market_name = _pad_hex_value ( asic_info_struct . market_name . decode ( " utf-8 " ) , 4 )
2025-01-21 15:42:05 -06:00
target_graphics_version = hex ( asic_info_struct . target_graphics_version ) [ 2 : ]
2025-05-27 19:19:43 -05:00
subsystem_id = _validate_if_max_uint ( asic_info_struct . subsystem_id , MaxUIntegerTypes . UINT32_T )
subvendor_id = _validate_if_max_uint ( asic_info_struct . subvendor_id , MaxUIntegerTypes . UINT32_T )
2025-10-06 14:50:00 -05:00
if isinstance ( subsystem_id , int ) :
2025-05-27 19:19:43 -05:00
subsystem_id = _pad_hex_value ( hex ( subsystem_id ) , 4 )
2025-10-06 14:50:00 -05:00
if isinstance ( subvendor_id , int ) :
2025-05-27 19:19:43 -05:00
subvendor_id = _pad_hex_value ( hex ( subvendor_id ) , 4 )
2024-02-07 05:40:25 -06:00
asic_info = {
2024-11-08 17:31:25 -06:00
" market_name " : market_name ,
2024-02-07 05:40:25 -06:00
" vendor_id " : asic_info_struct . vendor_id ,
" vendor_name " : asic_info_struct . vendor_name . decode ( " utf-8 " ) ,
2025-05-27 19:19:43 -05:00
" subvendor_id " : subvendor_id ,
2024-02-07 05:40:25 -06:00
" device_id " : asic_info_struct . device_id ,
2024-09-17 04:54:41 -05:00
" rev_id " : _pad_hex_value ( hex ( asic_info_struct . rev_id ) , 2 ) ,
2024-02-07 05:40:25 -06:00
" asic_serial " : asic_info_struct . asic_serial . decode ( " utf-8 " ) ,
2025-05-27 19:19:43 -05:00
" oam_id " : _validate_if_max_uint ( asic_info_struct . oam_id , MaxUIntegerTypes . UINT32_T ) ,
" num_compute_units " : _validate_if_max_uint ( asic_info_struct . num_of_compute_units , MaxUIntegerTypes . UINT32_T ) ,
2025-05-28 18:26:58 -05:00
" target_graphics_version " : " gfx " + target_graphics_version ,
2025-12-09 21:49:06 -06:00
" subsystem_id " : subsystem_id ,
" flags " : asic_info_struct . flags
2022-10-11 16:06:32 +02:00
}
2024-02-07 05:40:25 -06:00
string_values = [ " market_name " , " vendor_name " ]
for value in string_values :
if not asic_info [ value ] :
asic_info [ value ] = " N/A "
2025-05-27 19:19:43 -05:00
hex_values = [ " vendor_id " , " device_id " ]
2024-02-07 05:40:25 -06:00
for value in hex_values :
if asic_info [ value ] :
asic_info [ value ] = hex ( asic_info [ value ] )
else :
asic_info [ value ] = " N/A "
2024-02-15 16:49:10 -06:00
# Convert asic serial (hex string) to hex output format
2024-02-07 05:40:25 -06:00
if asic_info [ " asic_serial " ] :
2024-02-15 16:49:10 -06:00
asic_serial_string = asic_info [ " asic_serial " ]
asic_serial_hex = int ( asic_serial_string , base = 16 )
asic_info [ " asic_serial " ] = str . format ( " 0x {:016X} " , asic_serial_hex )
2024-02-07 05:40:25 -06:00
else :
asic_info [ " asic_serial " ] = " N/A "
# Remove commas from vendor name for clean output
asic_info [ " vendor_name " ] = asic_info [ " vendor_name " ] . replace ( ' , ' , ' ' )
return asic_info
2022-10-11 16:06:32 +02:00
2024-09-17 04:54:41 -05:00
def amdsmi_get_gpu_kfd_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-09-17 04:54:41 -05:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
kfd_info_struct = amdsmi_wrapper . amdsmi_kfd_info_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_kfd_info (
processor_handle , ctypes . byref ( kfd_info_struct ) )
)
kfd_info = {
2024-05-21 20:30:16 -05:00
" kfd_id " : _validate_if_max_uint ( kfd_info_struct . kfd_id , MaxUIntegerTypes . UINT64_T ) ,
" node_id " : _validate_if_max_uint ( kfd_info_struct . node_id , MaxUIntegerTypes . UINT32_T ) ,
" current_partition_id " : _validate_if_max_uint ( kfd_info_struct . current_partition_id , MaxUIntegerTypes . UINT32_T )
2024-09-17 04:54:41 -05:00
}
return kfd_info
2025-10-30 09:48:35 -05:00
def amdsmi_get_supported_power_cap (
processor_handle : processor_handle_t ) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
CONST_AMDSMI_MAX_POWER_SENSORS = 2
sensor_count = ctypes . c_uint32 ( )
sensor_ind = ( ctypes . c_uint32 * CONST_AMDSMI_MAX_POWER_SENSORS ) ( )
sensor_types = ( amdsmi_wrapper . amdsmi_power_cap_type_t * CONST_AMDSMI_MAX_POWER_SENSORS ) ( )
_check_res (
amdsmi_wrapper . amdsmi_get_supported_power_cap (
processor_handle , ctypes . byref ( sensor_count ) , sensor_ind , sensor_types
)
)
return {
" sensor_inds " : [ sensor_ind [ i ] for i in range ( sensor_count . value ) ] ,
" sensor_types " : [ AmdSmiPowerCapType ( sensor_types [ i ] ) for i in range ( sensor_count . value ) ]
}
2024-09-17 04:54:41 -05:00
2022-11-09 16:17:43 +01:00
def amdsmi_get_power_cap_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-12-04 09:52:59 -06:00
sensor_ind : int = AmdSmiPowerCapType . PPT0
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2025-06-03 17:12:13 -05:00
power_cap_info = amdsmi_wrapper . amdsmi_power_cap_info_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
amdsmi_wrapper . amdsmi_get_power_cap_info (
2025-10-30 09:48:35 -05:00
processor_handle , sensor_ind , ctypes . byref ( power_cap_info )
2022-10-11 16:06:32 +02:00
)
)
2025-06-03 17:12:13 -05:00
return { " power_cap " : power_cap_info . power_cap ,
" default_power_cap " : power_cap_info . default_power_cap ,
" dpm_cap " : power_cap_info . dpm_cap ,
" min_power_cap " : power_cap_info . min_power_cap ,
" max_power_cap " : power_cap_info . max_power_cap }
2022-10-11 16:06:32 +02:00
2025-10-27 14:43:31 -05:00
def _get_name_value ( num , data ) - > List [ Dict [ str , int ] ] :
"""
Extracts a list of name-value pairs from a ctypes array buffer.
This function works around a ctypes array issue where direct field access
to the `amdsmi_name_value_t` structure is unreliable. Instead, it uses
memory operations to extract the ' name ' (a 64-byte char array) and ' value '
(a uint64) from each structure in the array.
Parameters:
num (ctypes.c_uint32): Number of elements in the array.
data (ctypes.c_void_p): Pointer to the start of the array buffer containing
`amdsmi_name_value_t` structures.
Returns:
List[Dict[str, int]]: A list of dictionaries, each with keys ' name ' (str)
and ' value ' (int) extracted from the buffer.
Workaround:
Direct access to the fields of the ctypes array is broken, so the function
uses memory alignment and pointer arithmetic to extract the fields manually.
"""
# Work around ctypes array issue by using memory access
# Use 4 byte alignment for amdsmi_name_value_t.name char array, 64=256/4
# Use 8 bytes for amdsmi_name_value_t.value uint64
aligned_name_size = int ( AMDSMI_MAX_STRING_LENGTH / 4 )
value_size_bytes = 8
struct_alignment = aligned_name_size + value_size_bytes
# Access name,value field using memory operations since direct access is broken
struct_ptr = ctypes . cast ( data , ctypes . POINTER ( ctypes . c_char * struct_alignment ) )
results = [ ]
for i in range ( num . value ) :
# Offset into structure array
current_struct = struct_ptr [ i ]
# Cast address for name member with max chars to read
name_ptr = ctypes . cast ( ctypes . addressof ( current_struct ) , ctypes . POINTER ( ctypes . c_char * AMDSMI_MAX_STRING_LENGTH ) )
# Data buffer in bytes
name_bytes = ctypes . string_at ( name_ptr . contents )
# Get string
name_str = name_bytes . rstrip ( b ' \x00 ' ) . decode ( ' utf-8 ' , errors = ' replace ' )
# Address for value member
addr_value = ctypes . addressof ( current_struct ) + struct_alignment
# Cast data buffer to a uint64
int64_ptr = ctypes . cast ( addr_value , ctypes . POINTER ( ctypes . c_uint64 ) )
# Get value
value = int64_ptr . contents . value
item = {
' name ' : name_str ,
' value ' : value
}
results . append ( item )
return results
2024-08-01 10:49:04 -04:00
def amdsmi_get_gpu_pm_metrics_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-06-11 17:19:02 -05:00
) - > List [ Dict [ str , Any ] ] :
2024-08-01 10:49:04 -04:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-06-11 17:19:02 -05:00
pm_metrics = POINTER ( amdsmi_wrapper . amdsmi_name_value_t ) ( )
2025-10-27 14:43:31 -05:00
num_mets = ctypes . c_uint32 ( 0 )
2024-08-01 10:49:04 -04:00
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_pm_metrics_info (
2025-10-06 14:50:00 -05:00
processor_handle , ctypes . byref ( pm_metrics ) , ctypes . byref ( num_mets )
2024-08-01 10:49:04 -04:00
)
)
2025-10-27 14:43:31 -05:00
results = _get_name_value ( num_mets , pm_metrics )
2025-10-06 14:50:00 -05:00
# Free the allocated memory
2024-08-01 10:49:04 -04:00
amdsmi_wrapper . amdsmi_free_name_value_pairs ( pm_metrics )
2025-10-27 14:43:31 -05:00
2024-08-01 10:49:04 -04:00
return results
2024-09-18 19:53:32 -05:00
2024-08-01 10:49:04 -04:00
def amdsmi_get_gpu_reg_table_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , reg_type : AmdSmiRegType
2025-06-11 17:19:02 -05:00
) - > List [ Dict [ str , Any ] ] :
2024-08-01 10:49:04 -04:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( reg_type , AmdSmiRegType ) :
raise AmdSmiParameterException ( reg_type , AmdSmiRegType )
2024-08-01 10:49:04 -04:00
2025-06-11 17:19:02 -05:00
reg_metrics = POINTER ( amdsmi_wrapper . amdsmi_name_value_t ) ( )
2025-10-06 14:50:00 -05:00
num_regs = ctypes . c_uint32 ( 0 )
2024-08-01 10:49:04 -04:00
_check_res (
2024-09-04 11:42:45 -05:00
amdsmi_wrapper . amdsmi_get_gpu_reg_table_info (
2025-10-27 14:43:31 -05:00
processor_handle , reg_type , ctypes . byref ( reg_metrics ) , ctypes . byref ( num_regs )
2024-08-01 10:49:04 -04:00
)
)
2025-10-27 14:43:31 -05:00
results = _get_name_value ( num_regs , reg_metrics )
# Free the allocated memory
2024-11-11 19:17:28 -06:00
amdsmi_wrapper . amdsmi_free_name_value_pairs ( reg_metrics )
2025-10-27 14:43:31 -05:00
2024-08-01 10:49:04 -04:00
return results
2022-10-11 16:06:32 +02:00
2024-09-18 19:53:32 -05:00
2023-09-22 05:10:45 -05:00
def amdsmi_get_gpu_vram_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-09-22 05:10:45 -05:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
vram_info = amdsmi_wrapper . amdsmi_vram_info_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_vram_info (
processor_handle , ctypes . byref ( vram_info ) )
)
return {
" vram_type " : vram_info . vram_type ,
2025-05-28 17:57:49 -05:00
" vram_vendor " : vram_info . vram_vendor . decode ( " utf-8 " ) ,
2024-04-24 11:16:06 +02:00
" vram_size " : vram_info . vram_size ,
2024-11-07 16:35:17 -06:00
" vram_bit_width " : _validate_if_max_uint ( vram_info . vram_bit_width , MaxUIntegerTypes . UINT32_T ) ,
" vram_max_bandwidth " : _validate_if_max_uint ( vram_info . vram_max_bandwidth , MaxUIntegerTypes . UINT64_T ) ,
2023-09-22 05:10:45 -05:00
}
2024-11-07 16:35:17 -06:00
def amdsmi_get_gpu_xgmi_link_status (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-11-07 16:35:17 -06:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
status_info = amdsmi_wrapper . amdsmi_xgmi_link_status_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_xgmi_link_status (
processor_handle , ctypes . byref ( status_info ) )
)
link_status = [ ]
count = 0
for link in status_info . status :
if count == status_info . total_links :
break
if amdsmi_wrapper . amdsmi_xgmi_link_status_type_t__enumvalues [ link ] == ' AMDSMI_XGMI_LINK_DISABLE ' : # XGMI link is disabled
link_status . append ( " X " )
elif amdsmi_wrapper . amdsmi_xgmi_link_status_type_t__enumvalues [ link ] == ' AMDSMI_XGMI_LINK_UP ' : # XGMI Link is up
link_status . append ( " U " )
elif amdsmi_wrapper . amdsmi_xgmi_link_status_type_t__enumvalues [ link ] == ' AMDSMI_XGMI_LINK_DOWN ' : # XGMI Link is down
link_status . append ( " D " )
else :
link_status . append ( " N/A " )
count + = 1
return_dict = {
2025-04-22 23:32:42 -05:00
" status " : link_status ,
" total_links " : status_info . total_links ,
2024-11-07 16:35:17 -06:00
}
return return_dict
2023-10-10 20:42:52 -05:00
def amdsmi_get_gpu_cache_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-06-11 17:19:02 -05:00
) - > Dict [ str , List ] :
2023-10-10 20:42:52 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2024-02-07 05:55:04 -06:00
cache_info_struct = amdsmi_wrapper . amdsmi_gpu_cache_info_t ( )
2023-10-10 20:42:52 -05:00
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_cache_info (
2024-02-07 05:55:04 -06:00
processor_handle , ctypes . byref ( cache_info_struct ) )
2023-10-10 20:42:52 -05:00
)
2024-02-07 05:55:04 -06:00
cache_info_list = [ ]
for cache_index in range ( cache_info_struct . num_cache_types ) :
2024-02-05 10:33:47 -06:00
# Put cache_properties at the start of the dictionary for readability
2024-01-30 20:15:11 -06:00
cache_dict = {
2024-02-07 05:55:04 -06:00
" cache_properties " : [ ] , # This will be a list of strings
" cache_size " : cache_info_struct . cache [ cache_index ] . cache_size ,
" cache_level " : cache_info_struct . cache [ cache_index ] . cache_level ,
" max_num_cu_shared " : cache_info_struct . cache [ cache_index ] . max_num_cu_shared ,
" num_cache_instance " : cache_info_struct . cache [ cache_index ] . num_cache_instance
2024-01-30 20:15:11 -06:00
}
2024-02-05 10:33:47 -06:00
# Check against cache properties bitmask
2024-02-07 05:55:04 -06:00
cache_properties = cache_info_struct . cache [ cache_index ] . cache_properties
2024-02-20 04:56:52 -06:00
data_cache = cache_properties & amdsmi_wrapper . AMDSMI_CACHE_PROPERTY_DATA_CACHE
inst_cache = cache_properties & amdsmi_wrapper . AMDSMI_CACHE_PROPERTY_INST_CACHE
cpu_cache = cache_properties & amdsmi_wrapper . AMDSMI_CACHE_PROPERTY_CPU_CACHE
simd_cache = cache_properties & amdsmi_wrapper . AMDSMI_CACHE_PROPERTY_SIMD_CACHE
2024-02-05 10:33:47 -06:00
cache_properties_status = [ data_cache , inst_cache , cpu_cache , simd_cache ]
cache_property_list = [ ]
for cache_property in cache_properties_status :
if cache_property :
2024-02-20 04:56:52 -06:00
property_name = amdsmi_wrapper . amdsmi_cache_property_type_t__enumvalues [ cache_property ]
property_name = property_name . replace ( " AMDSMI_CACHE_PROPERTY_ " , " " )
2024-02-05 10:33:47 -06:00
cache_property_list . append ( property_name )
cache_dict [ " cache_properties " ] = cache_property_list
2024-02-07 05:55:04 -06:00
cache_info_list . append ( cache_dict )
2024-01-30 20:15:11 -06:00
2024-02-07 05:55:04 -06:00
if not cache_info_list :
2023-11-22 03:32:55 -06:00
raise AmdSmiLibraryException ( amdsmi_wrapper . AMDSMI_STATUS_NO_DATA )
2024-02-20 04:56:52 -06:00
return {
" cache " : cache_info_list
}
2023-10-10 20:42:52 -05:00
2023-02-25 07:45:11 -05:00
def amdsmi_get_gpu_vbios_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
vbios_info = amdsmi_wrapper . amdsmi_vbios_info_t ( )
_check_res (
2023-02-25 07:45:11 -05:00
amdsmi_wrapper . amdsmi_get_gpu_vbios_info (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( vbios_info ) )
2022-10-11 16:06:32 +02:00
)
2025-09-16 19:51:13 -05:00
boot_firmware = vbios_info . boot_firmware . decode ( " utf-8 " )
if boot_firmware == " " :
boot_firmware = " N/A "
2022-10-11 16:06:32 +02:00
return {
" name " : vbios_info . name . decode ( " utf-8 " ) ,
" build_date " : vbios_info . build_date . decode ( " utf-8 " ) ,
" part_number " : vbios_info . part_number . decode ( " utf-8 " ) ,
2023-06-01 14:46:21 +02:00
" version " : vbios_info . version . decode ( " utf-8 " ) ,
2025-09-16 19:51:13 -05:00
" boot_firmware " : boot_firmware ,
2022-10-11 16:06:32 +02:00
}
def amdsmi_get_gpu_activity (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
engine_usage = amdsmi_wrapper . amdsmi_engine_usage_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_activity (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( engine_usage )
2022-10-11 16:06:32 +02:00
)
)
2024-01-24 21:18:35 -06:00
activity_dict = {
2022-11-09 16:17:43 +01:00
" gfx_activity " : engine_usage . gfx_activity ,
" umc_activity " : engine_usage . umc_activity ,
2023-05-17 15:45:18 +02:00
" mm_activity " : engine_usage . mm_activity ,
2022-10-11 16:06:32 +02:00
}
2024-01-24 21:18:35 -06:00
for key , value in activity_dict . items ( ) :
if value == 0xFFFF :
activity_dict [ key ] = " N/A "
return activity_dict
2022-10-11 16:06:32 +02:00
2023-05-16 12:31:52 +02:00
def amdsmi_get_clock_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-06-01 14:46:21 +02:00
clock_type : AmdSmiClkType ,
2025-06-11 17:19:02 -05:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2022-12-28 12:55:15 +01:00
if not isinstance ( clock_type , AmdSmiClkType ) :
raise AmdSmiParameterException ( clock_type , AmdSmiClkType )
2022-10-11 16:06:32 +02:00
2023-05-16 12:31:52 +02:00
clock_measure = amdsmi_wrapper . amdsmi_clk_info_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-05-16 12:31:52 +02:00
amdsmi_wrapper . amdsmi_get_clock_info (
2023-02-25 05:28:40 -05:00
processor_handle ,
2023-06-01 14:46:21 +02:00
clock_type ,
2022-10-11 16:06:32 +02:00
ctypes . byref ( clock_measure ) ,
)
)
2025-04-02 14:08:48 -05:00
dict_ret = {
" clk " : _validate_if_max_uint ( clock_measure . clk , MaxUIntegerTypes . UINT32_T ) ,
" min_clk " : _validate_if_max_uint ( clock_measure . min_clk , MaxUIntegerTypes . UINT32_T ) ,
" max_clk " : _validate_if_max_uint ( clock_measure . max_clk , MaxUIntegerTypes . UINT32_T ) ,
" clk_locked " : _validate_if_max_uint ( clock_measure . clk_locked , MaxUIntegerTypes . UINT8_T , isBool = True ) ,
2025-05-08 14:39:21 -05:00
" clk_deep_sleep " : _validate_if_max_uint ( clock_measure . clk_deep_sleep , MaxUIntegerTypes . UINT8_T ) ,
2022-10-11 16:06:32 +02:00
}
2025-04-02 14:08:48 -05:00
return dict_ret
2022-10-11 16:06:32 +02:00
2023-02-27 01:18:07 -05:00
def amdsmi_get_gpu_bad_page_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-06-07 13:37:00 -05:00
) - > List [ Dict [ str , Any ] ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
num_pages = ctypes . c_uint32 ( )
2025-06-11 17:19:02 -05:00
nullptr = POINTER ( amdsmi_wrapper . amdsmi_retired_page_record_t ) ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-27 01:18:07 -05:00
amdsmi_wrapper . amdsmi_get_gpu_bad_page_info (
2024-04-26 02:54:25 -05:00
processor_handle , ctypes . byref ( num_pages ) , nullptr
2022-10-11 16:06:32 +02:00
)
)
2024-04-11 05:11:54 -05:00
2022-10-11 16:06:32 +02:00
if num_pages . value == 0 :
2024-04-26 02:54:25 -05:00
return [ ]
2022-10-11 16:06:32 +02:00
2025-10-06 14:50:00 -05:00
bad_pages_array_type = amdsmi_wrapper . amdsmi_retired_page_record_t * num_pages . value
bad_pages = bad_pages_array_type ( )
2024-04-26 02:54:25 -05:00
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_bad_page_info (
processor_handle , ctypes . byref ( num_pages ) , bad_pages
)
)
return _format_bad_page_info ( bad_pages , num_pages )
2022-10-11 16:06:32 +02:00
2025-04-14 04:19:45 -05:00
def amdsmi_get_gpu_bad_page_threshold (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-06-11 17:19:02 -05:00
) - > int :
2025-04-14 04:19:45 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
threshold = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_bad_page_threshold (
processor_handle , ctypes . byref ( threshold )
)
)
return threshold . value
2022-10-11 16:06:32 +02:00
2024-05-21 20:30:16 -05:00
def amdsmi_get_violation_status (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-05-21 20:30:16 -05:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
violation_status = amdsmi_wrapper . amdsmi_violation_status_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_violation_status (
processor_handle , ctypes . byref ( violation_status ) )
)
2025-01-08 22:07:23 -06:00
dict_return = {
2024-05-21 20:30:16 -05:00
" reference_timestamp " : _validate_if_max_uint ( violation_status . reference_timestamp , MaxUIntegerTypes . UINT64_T ) ,
" violation_timestamp " : _validate_if_max_uint ( violation_status . violation_timestamp , MaxUIntegerTypes . UINT64_T ) ,
2024-10-02 15:50:41 -05:00
" acc_counter " : _validate_if_max_uint ( violation_status . acc_counter , MaxUIntegerTypes . UINT64_T ) ,
" acc_prochot_thrm " : _validate_if_max_uint ( violation_status . acc_prochot_thrm , MaxUIntegerTypes . UINT64_T ) ,
" acc_ppt_pwr " : _validate_if_max_uint ( violation_status . acc_ppt_pwr , MaxUIntegerTypes . UINT64_T ) , #PVIOL
" acc_socket_thrm " : _validate_if_max_uint ( violation_status . acc_socket_thrm , MaxUIntegerTypes . UINT64_T ) , #TVIOL
" acc_vr_thrm " : _validate_if_max_uint ( violation_status . acc_vr_thrm , MaxUIntegerTypes . UINT64_T ) ,
" acc_hbm_thrm " : _validate_if_max_uint ( violation_status . acc_hbm_thrm , MaxUIntegerTypes . UINT64_T ) ,
2025-01-08 22:07:23 -06:00
" acc_gfx_clk_below_host_limit " : _validate_if_max_uint ( violation_status . acc_gfx_clk_below_host_limit , MaxUIntegerTypes . UINT64_T ) ,
2025-05-29 13:26:21 -05:00
" acc_gfx_clk_below_host_limit_pwr " : list ( violation_status . acc_gfx_clk_below_host_limit_pwr ) ,
2025-08-12 15:05:46 -05:00
" acc_gfx_clk_below_host_limit_thm " : list ( violation_status . acc_gfx_clk_below_host_limit_thm ) ,
2025-05-29 13:26:21 -05:00
" acc_gfx_clk_below_host_limit_total " : list ( violation_status . acc_gfx_clk_below_host_limit_total ) ,
2025-08-06 16:03:06 -05:00
" acc_low_utilization " : list ( violation_status . acc_low_utilization ) ,
2024-05-21 20:30:16 -05:00
" per_prochot_thrm " : _validate_if_max_uint ( violation_status . per_prochot_thrm , MaxUIntegerTypes . UINT64_T , isActivity = True ) ,
" per_ppt_pwr " : _validate_if_max_uint ( violation_status . per_ppt_pwr , MaxUIntegerTypes . UINT64_T , isActivity = True ) , #PVIOL
" per_socket_thrm " : _validate_if_max_uint ( violation_status . per_socket_thrm , MaxUIntegerTypes . UINT64_T , isActivity = True ) , #TVIOL
" per_vr_thrm " : _validate_if_max_uint ( violation_status . per_vr_thrm , MaxUIntegerTypes . UINT64_T , isActivity = True ) ,
" per_hbm_thrm " : _validate_if_max_uint ( violation_status . per_hbm_thrm , MaxUIntegerTypes . UINT64_T , isActivity = True ) ,
2025-01-08 22:07:23 -06:00
" per_gfx_clk_below_host_limit " : _validate_if_max_uint ( violation_status . per_gfx_clk_below_host_limit , MaxUIntegerTypes . UINT64_T , isActivity = True ) ,
2025-05-29 13:26:21 -05:00
" per_gfx_clk_below_host_limit_pwr " : list ( violation_status . per_gfx_clk_below_host_limit_pwr ) ,
2025-08-12 15:05:46 -05:00
" per_gfx_clk_below_host_limit_thm " : list ( violation_status . per_gfx_clk_below_host_limit_thm ) ,
2025-05-29 13:26:21 -05:00
" per_gfx_clk_below_host_limit_total " : list ( violation_status . per_gfx_clk_below_host_limit_total ) ,
2025-08-06 16:03:06 -05:00
" per_low_utilization " : list ( violation_status . per_low_utilization ) ,
2024-05-21 20:30:16 -05:00
" active_prochot_thrm " : _validate_if_max_uint ( violation_status . active_prochot_thrm , MaxUIntegerTypes . UINT8_T , isBool = True ) ,
" active_ppt_pwr " : _validate_if_max_uint ( violation_status . active_ppt_pwr , MaxUIntegerTypes . UINT8_T , isBool = True ) , #PVIOL
" active_socket_thrm " : _validate_if_max_uint ( violation_status . active_socket_thrm , MaxUIntegerTypes . UINT8_T , isBool = True ) , #TVIOL
" active_vr_thrm " : _validate_if_max_uint ( violation_status . active_vr_thrm , MaxUIntegerTypes . UINT8_T , isBool = True ) ,
2025-01-08 22:07:23 -06:00
" active_hbm_thrm " : _validate_if_max_uint ( violation_status . active_hbm_thrm , MaxUIntegerTypes . UINT8_T , isBool = True ) ,
" active_gfx_clk_below_host_limit " : _validate_if_max_uint ( violation_status . active_gfx_clk_below_host_limit , MaxUIntegerTypes . UINT8_T , isBool = True ) ,
2025-05-29 13:26:21 -05:00
" active_gfx_clk_below_host_limit_pwr " : list ( violation_status . active_gfx_clk_below_host_limit_pwr ) ,
2025-08-12 15:05:46 -05:00
" active_gfx_clk_below_host_limit_thm " : list ( violation_status . active_gfx_clk_below_host_limit_thm ) ,
2025-05-29 13:26:21 -05:00
" active_gfx_clk_below_host_limit_total " : list ( violation_status . active_gfx_clk_below_host_limit_total ) ,
2025-08-06 16:03:06 -05:00
" active_low_utilization " : list ( violation_status . active_low_utilization ) ,
2024-05-21 20:30:16 -05:00
}
2025-05-29 13:26:21 -05:00
# Create 2d array with each XCD's stats
if ' acc_gfx_clk_below_host_limit_pwr ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' acc_gfx_clk_below_host_limit_pwr ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
2025-08-06 16:03:06 -05:00
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
2025-05-29 13:26:21 -05:00
dict_return [ ' acc_gfx_clk_below_host_limit_pwr ' ] [ xcp_index ] = xcp_detail
2025-08-12 15:05:46 -05:00
if ' acc_gfx_clk_below_host_limit_thm ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' acc_gfx_clk_below_host_limit_thm ' ] ) :
2025-05-29 13:26:21 -05:00
xcp_detail = [ ]
for val in xcp_metrics :
2025-08-06 16:03:06 -05:00
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
2025-08-12 15:05:46 -05:00
dict_return [ ' acc_gfx_clk_below_host_limit_thm ' ] [ xcp_index ] = xcp_detail
2025-05-29 13:26:21 -05:00
if ' acc_low_utilization ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' acc_low_utilization ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
2025-08-06 16:03:06 -05:00
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
2025-05-29 13:26:21 -05:00
dict_return [ ' acc_low_utilization ' ] [ xcp_index ] = xcp_detail
if ' acc_gfx_clk_below_host_limit_total ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' acc_gfx_clk_below_host_limit_total ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
2025-08-06 16:03:06 -05:00
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
2025-05-29 13:26:21 -05:00
dict_return [ ' acc_gfx_clk_below_host_limit_total ' ] [ xcp_index ] = xcp_detail
if ' per_gfx_clk_below_host_limit_pwr ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' per_gfx_clk_below_host_limit_pwr ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T , isActivity = True ) )
dict_return [ ' per_gfx_clk_below_host_limit_pwr ' ] [ xcp_index ] = xcp_detail
2025-08-12 15:05:46 -05:00
if ' per_gfx_clk_below_host_limit_thm ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' per_gfx_clk_below_host_limit_thm ' ] ) :
2025-05-29 13:26:21 -05:00
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T , isActivity = True ) )
2025-08-12 15:05:46 -05:00
dict_return [ ' per_gfx_clk_below_host_limit_thm ' ] [ xcp_index ] = xcp_detail
2025-05-29 13:26:21 -05:00
if ' per_low_utilization ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' per_low_utilization ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T , isActivity = True ) )
dict_return [ ' per_low_utilization ' ] [ xcp_index ] = xcp_detail
if ' per_gfx_clk_below_host_limit_total ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' per_gfx_clk_below_host_limit_total ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T , isActivity = True ) )
dict_return [ ' per_gfx_clk_below_host_limit_total ' ] [ xcp_index ] = xcp_detail
if ' active_gfx_clk_below_host_limit_pwr ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' active_gfx_clk_below_host_limit_pwr ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT8_T , isBool = True ) )
dict_return [ ' active_gfx_clk_below_host_limit_pwr ' ] [ xcp_index ] = xcp_detail
2025-08-12 15:05:46 -05:00
if ' active_gfx_clk_below_host_limit_thm ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' active_gfx_clk_below_host_limit_thm ' ] ) :
2025-05-29 13:26:21 -05:00
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT8_T , isBool = True ) )
2025-08-12 15:05:46 -05:00
dict_return [ ' active_gfx_clk_below_host_limit_thm ' ] [ xcp_index ] = xcp_detail
2025-05-29 13:26:21 -05:00
if ' active_low_utilization ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' active_low_utilization ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT8_T , isBool = True ) )
dict_return [ ' active_low_utilization ' ] [ xcp_index ] = xcp_detail
if ' active_gfx_clk_below_host_limit_total ' in dict_return :
for xcp_index , xcp_metrics in enumerate ( dict_return [ ' active_gfx_clk_below_host_limit_total ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT8_T , isBool = True ) )
dict_return [ ' active_gfx_clk_below_host_limit_total ' ] [ xcp_index ] = xcp_detail
2025-01-08 22:07:23 -06:00
return dict_return
2024-05-21 20:30:16 -05:00
2023-05-31 10:30:59 +02:00
def amdsmi_get_gpu_total_ecc_count (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2024-03-26 03:33:19 -05:00
ec = amdsmi_wrapper . amdsmi_error_count_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-05-31 10:30:59 +02:00
amdsmi_wrapper . amdsmi_get_gpu_total_ecc_count (
2024-03-26 03:33:19 -05:00
processor_handle , ctypes . byref ( ec )
2022-10-11 16:06:32 +02:00
)
)
return {
2024-03-26 03:33:19 -05:00
" correctable_count " : ec . correctable_count ,
" uncorrectable_count " : ec . uncorrectable_count ,
" deferred_count " : ec . deferred_count ,
2022-10-11 16:06:32 +02:00
}
2025-05-15 21:49:56 -05:00
def amdsmi_get_gpu_cper_entries (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2025-04-12 01:54:57 -05:00
severity_mask : int ,
2025-05-15 21:49:56 -05:00
buffer_size : int = 4 * 1048576 ,
2025-04-12 01:54:57 -05:00
cursor : int = 0
2025-07-07 11:11:13 -05:00
) - > Tuple [ Dict [ str , Any ] , int , List [ Dict [ str , Any ] ] , int ] :
2025-04-12 01:54:57 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( severity_mask , int ) :
raise AmdSmiParameterException ( severity_mask , int )
if not isinstance ( buffer_size , int ) :
raise AmdSmiParameterException ( buffer_size , int )
if not isinstance ( cursor , int ) :
raise AmdSmiParameterException ( cursor , int )
2025-04-12 01:54:57 -05:00
# Allocate a buffer for CPER data.
buf = ctypes . create_string_buffer ( buffer_size )
buf_size = ctypes . c_uint64 ( buffer_size )
2025-07-07 11:11:13 -05:00
num_cper_hdrs = 20
entry_count = ctypes . c_uint64 ( num_cper_hdrs )
2025-04-12 01:54:57 -05:00
cur = ctypes . c_uint64 ( cursor )
2025-05-15 21:49:56 -05:00
2025-04-12 01:54:57 -05:00
# Allocate a pointer for the CPER header array.
2025-07-07 11:11:13 -05:00
cper_hdrs_array = ( ctypes . POINTER ( amdsmi_wrapper . amdsmi_cper_hdr_t ) * num_cper_hdrs ) ( )
cper_hdrs = ctypes . cast ( cper_hdrs_array , ctypes . POINTER ( ctypes . POINTER ( amdsmi_wrapper . amdsmi_cper_hdr_t ) ) )
2025-04-12 01:54:57 -05:00
# Call the underlying AMD-SMI API.
2025-07-07 11:11:13 -05:00
status_code = amdsmi_wrapper . amdsmi_get_gpu_cper_entries (
2025-04-12 01:54:57 -05:00
processor_handle ,
ctypes . c_uint32 ( severity_mask ) ,
buf ,
ctypes . byref ( buf_size ) ,
cper_hdrs ,
ctypes . byref ( entry_count ) ,
ctypes . byref ( cur )
)
2025-07-07 11:11:13 -05:00
if status_code not in { amdsmi_wrapper . AMDSMI_STATUS_SUCCESS , amdsmi_wrapper . AMDSMI_STATUS_MORE_DATA } :
raise AmdSmiLibraryException ( status_code )
2025-04-12 01:54:57 -05:00
entries = { }
cper_data = [ ]
offset = 0
2025-05-15 21:49:56 -05:00
2025-04-12 01:54:57 -05:00
# Iterate over each entry using its variable record_length.
for i in range ( entry_count . value ) :
entry_address = ctypes . addressof ( buf ) + offset
2025-06-11 17:19:02 -05:00
entry_ptr = ctypes . cast ( entry_address , POINTER ( amdsmi_wrapper . amdsmi_cper_hdr_t ) )
2025-05-15 21:49:56 -05:00
# Extract the raw bytes and size of the entry.
2025-04-12 01:54:57 -05:00
cper_data . append ( {
2025-05-15 21:49:56 -05:00
" bytes " : list ( ( entry_ptr . contents . record_length * ctypes . c_byte ) . from_address ( entry_address ) ) ,
" size " : entry_ptr . contents . record_length
2025-04-12 01:54:57 -05:00
} )
2025-05-15 21:49:56 -05:00
2025-04-12 01:54:57 -05:00
# Extract the timestamp fields.
year = entry_ptr . contents . timestamp . year
2025-05-15 21:49:56 -05:00
if year < 100 : # Adjust the year if it's less than 100.
year + = 2000
2025-04-12 01:54:57 -05:00
formatted_timestamp = (
2025-05-15 21:49:56 -05:00
f " { year : 04d } / "
f " { entry_ptr . contents . timestamp . month : 02d } / "
f " { entry_ptr . contents . timestamp . day : 02d } "
f " { entry_ptr . contents . timestamp . hours : 02d } : "
f " { entry_ptr . contents . timestamp . minutes : 02d } : "
f " { entry_ptr . contents . timestamp . seconds : 02d } "
2025-04-12 01:54:57 -05:00
)
2025-05-15 21:49:56 -05:00
2025-11-17 13:25:56 -06:00
serial_number = " "
if isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
try :
board_info = amdsmi_get_gpu_board_info ( processor_handle )
serial_number = board_info . get ( ' product_serial ' , " " )
except Exception :
serial_number = " "
2025-05-15 21:49:56 -05:00
# Create a dictionary for the CPER entry.
2025-04-12 01:54:57 -05:00
cper_entry = {
2025-05-15 21:49:56 -05:00
" error_severity " : amdsmi_wrapper . amdsmi_cper_sev_t__enumvalues . get (
entry_ptr . contents . error_severity , " AMDSMI_CPER_SEV_UNUSED "
) . replace ( " AMDSMI_CPER_SEV_ " , " " ) . lower ( ) ,
2025-04-12 01:54:57 -05:00
" notify_type " : _notifyTypeToString ( entry_ptr . contents . notify_type . b ) ,
" timestamp " : formatted_timestamp ,
2025-05-15 21:49:56 -05:00
" signature " : entry_ptr . contents . signature ,
" revision " : entry_ptr . contents . revision ,
" signature_end " : hex ( entry_ptr . contents . signature_end ) ,
" sec_cnt " : entry_ptr . contents . sec_cnt ,
" record_length " : entry_ptr . contents . record_length ,
2025-11-17 13:25:56 -06:00
" serial_number " : serial_number ,
2025-05-15 21:49:56 -05:00
" platform_id " : entry_ptr . contents . platform_id ,
" creator_id " : entry_ptr . contents . creator_id ,
" record_id " : entry_ptr . contents . record_id ,
" flags " : entry_ptr . contents . flags ,
" persistence_info " : entry_ptr . contents . persistence_info ,
2025-04-12 01:54:57 -05:00
#"reserved" : entry_ptr.contents.reserved
#"cper_valid_bit" : entry_ptr.contents.cper_valid_bits,
#"partition_id" : entry_ptr.contents.partition_id,
}
2025-05-15 21:49:56 -05:00
2025-04-12 01:54:57 -05:00
entries [ i ] = cper_entry . copy ( )
2025-05-15 21:49:56 -05:00
offset + = entry_ptr . contents . record_length # Use the actual record length to advance the offset.
2025-04-12 01:54:57 -05:00
2025-07-07 11:11:13 -05:00
return entries , cur . value , cper_data , status_code
2025-04-12 01:54:57 -05:00
2022-10-11 16:06:32 +02:00
2025-05-15 21:49:56 -05:00
def amdsmi_get_afids_from_cper (
2025-10-17 15:42:17 -05:00
cper_afid_data : bytes
2025-05-15 21:49:56 -05:00
) - > Tuple [ List [ int ] , int ] :
"""
2025-10-17 15:42:17 -05:00
Extract AFIDs from a CPER blob.
2025-05-15 21:49:56 -05:00
Args:
2025-10-17 15:42:17 -05:00
cper_afid_data: raw bytes of a single CPER record.
2025-05-15 21:49:56 -05:00
Returns:
Tuple[List[int], int]: A tuple containing:
- A list of extracted AFIDs.
- The total count of AFIDs.
"""
2025-10-17 15:42:17 -05:00
cper_records = [ ]
2025-05-15 21:49:56 -05:00
# Normalize single blob into a list of records
2025-10-17 15:42:17 -05:00
if isinstance ( cper_afid_data , bytes ) :
2025-05-15 21:49:56 -05:00
cper_records = [ {
" bytes " : list ( cper_afid_data ) ,
" size " : len ( cper_afid_data )
} ]
2025-12-08 12:57:23 -06:00
elif isinstance ( cper_afid_data , List [ Dict [ str , Any ] ] ) :
cper_records = cper_afid_data
else :
raise AmdSmiParameterException ( cper_afid_data , bytes )
2025-05-15 21:49:56 -05:00
all_afids : List [ int ] = [ ]
for record in cper_records :
2025-06-07 13:37:00 -05:00
if isinstance ( record , dict ) and " bytes " in record and " size " in record :
raw_bytes = bytes ( record [ " bytes " ] )
record_size = record [ " size " ]
else :
2025-10-06 14:50:00 -05:00
raise AmdSmiParameterException ( record ,
2025-06-07 13:37:00 -05:00
" dict with keys ' bytes ' and ' size ' or bytes/bytearray " )
2025-05-15 21:49:56 -05:00
# Wrap as char*
buf = ctypes . create_string_buffer ( raw_bytes , record_size )
2025-06-11 17:19:02 -05:00
buf_ptr = ctypes . cast ( buf , POINTER ( ctypes . c_char ) )
2025-05-15 21:49:56 -05:00
afid_array = ( ctypes . c_uint64 * MAX_NUMBER_OF_AFIDS_PER_RECORD ) ( )
num_afids_ct = ctypes . c_uint32 ( MAX_NUMBER_OF_AFIDS_PER_RECORD )
# Call the wrapper function
status = amdsmi_wrapper . amdsmi_get_afids_from_cper (
buf_ptr ,
ctypes . c_uint32 ( record_size ) ,
afid_array ,
ctypes . byref ( num_afids_ct )
)
if status != amdsmi_wrapper . AMDSMI_STATUS_SUCCESS :
2025-06-09 23:44:09 -05:00
raise AmdSmiLibraryException ( status )
2025-05-15 21:49:56 -05:00
# Collect exactly the decoded AFIDs
count = num_afids_ct . value
all_afids . extend ( afid_array [ i ] for i in range ( count ) )
return all_afids , len ( all_afids )
2023-02-25 07:26:18 -05:00
def amdsmi_get_gpu_board_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
board_info = amdsmi_wrapper . amdsmi_board_info_t ( )
_check_res (
2023-02-25 07:26:18 -05:00
amdsmi_wrapper . amdsmi_get_gpu_board_info (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( board_info ) )
2022-10-11 16:06:32 +02:00
)
2024-03-26 08:24:12 -05:00
board_info_dict = {
2024-09-17 04:54:41 -05:00
" model_number " : _pad_hex_value ( board_info . model_number . decode ( " utf-8 " ) . strip ( ) , 4 ) ,
2023-10-25 17:13:03 -05:00
" product_serial " : board_info . product_serial . decode ( " utf-8 " ) . strip ( ) ,
2023-09-28 21:56:09 -05:00
" fru_id " : board_info . fru_id . decode ( " utf-8 " ) . strip ( ) ,
2024-09-17 04:54:41 -05:00
" product_name " : _pad_hex_value ( board_info . product_name . decode ( " utf-8 " ) . strip ( ) , 4 ) ,
2024-02-21 21:46:32 -06:00
" manufacturer_name " : board_info . manufacturer_name . decode ( " utf-8 " ) . strip ( )
2022-10-11 16:06:32 +02:00
}
2024-03-26 08:24:12 -05:00
for key , value in board_info_dict . items ( ) :
if value == " " :
board_info_dict [ key ] = " N/A "
return board_info_dict
2022-10-11 16:06:32 +02:00
2023-10-10 14:20:11 -05:00
def amdsmi_get_gpu_ras_feature_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-02-25 05:26:14 -05:00
) - > Dict [ str , Any ] :
2023-10-10 14:20:11 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2023-10-12 17:45:48 -05:00
ras_feature = amdsmi_wrapper . amdsmi_ras_feature_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_ras_feature_info (
processor_handle , ctypes . byref ( ras_feature )
)
)
return {
2023-11-02 04:30:01 -05:00
" eeprom_version " : hex ( ras_feature . ras_eeprom_version ) ,
2023-10-13 21:31:17 -05:00
" parity_schema " : bool ( ras_feature . ecc_correction_schema_flag & 1 ) ,
" single_bit_schema " : bool ( ras_feature . ecc_correction_schema_flag & 2 ) ,
" double_bit_schema " : bool ( ras_feature . ecc_correction_schema_flag & 4 ) ,
" poison_schema " : bool ( ras_feature . ecc_correction_schema_flag & 8 )
2023-10-12 17:45:48 -05:00
}
2023-10-10 14:20:11 -05:00
def amdsmi_get_gpu_ras_block_features_enabled (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-10-10 14:20:11 -05:00
) - > List [ Dict [ str , Any ] ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
ras_state = amdsmi_wrapper . amdsmi_ras_err_state_t ( )
ras_states = [ ]
2023-02-23 15:51:45 +01:00
for gpu_block in AmdSmiGpuBlock :
2023-05-18 15:53:48 -05:00
if gpu_block . name == " RESERVED " or gpu_block . name == " INVALID " :
2022-10-11 16:06:32 +02:00
continue
2025-10-06 14:50:00 -05:00
gpu_block_name = gpu_block . name
2023-02-23 15:51:45 +01:00
if gpu_block . name == " LAST " :
2025-10-06 14:50:00 -05:00
gpu_block_name = " MPIO "
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-27 01:14:03 -05:00
amdsmi_wrapper . amdsmi_get_gpu_ras_block_features_enabled (
2023-02-25 05:28:40 -05:00
processor_handle ,
2023-02-23 15:51:45 +01:00
amdsmi_wrapper . amdsmi_gpu_block_t ( gpu_block . value ) ,
2022-10-11 16:06:32 +02:00
ctypes . byref ( ras_state ) ,
)
)
ras_states . append (
{
2025-10-06 14:50:00 -05:00
" block " : gpu_block_name ,
2023-02-23 15:51:45 +01:00
" status " : AmdSmiRasErrState ( ras_state . value ) . name ,
2022-10-11 16:06:32 +02:00
}
)
return ras_states
2023-02-27 01:33:11 -05:00
def amdsmi_get_gpu_process_list (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-03-14 05:53:26 -05:00
) - > List [ amdsmi_wrapper . amdsmi_proc_info_t ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2024-03-14 05:53:26 -05:00
# This will get populated with the number of processes found
2023-06-28 17:54:36 +02:00
max_processes = ctypes . c_uint32 ( MAX_NUM_PROCESSES )
2022-10-11 16:06:32 +02:00
2024-06-21 16:12:02 -05:00
process_list = ( amdsmi_wrapper . amdsmi_proc_info_t * max_processes . value ) ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-27 01:33:11 -05:00
amdsmi_wrapper . amdsmi_get_gpu_process_list (
2023-06-01 14:46:21 +02:00
processor_handle , ctypes . byref ( max_processes ) , process_list
2022-10-11 16:06:32 +02:00
)
)
2024-03-14 05:53:26 -05:00
result = [ ]
for index in range ( max_processes . value ) :
2024-06-21 16:12:02 -05:00
process_name = process_list [ index ] . name . decode ( " utf-8 " ) . strip ( )
if process_name == " " :
process_name = " N/A "
2024-04-11 05:11:54 -05:00
result . append ( {
2024-06-21 16:12:02 -05:00
" name " : process_name ,
2024-04-11 05:11:54 -05:00
" pid " : process_list [ index ] . pid ,
" mem " : process_list [ index ] . mem ,
" engine_usage " : {
" gfx " : process_list [ index ] . engine_usage . gfx ,
" enc " : process_list [ index ] . engine_usage . enc
} ,
" memory_usage " : {
" gtt_mem " : process_list [ index ] . memory_usage . gtt_mem ,
" cpu_mem " : process_list [ index ] . memory_usage . cpu_mem ,
" vram_mem " : process_list [ index ] . memory_usage . vram_mem ,
} ,
2025-10-28 14:49:03 -05:00
" cu_occupancy " : _validate_if_max_uint ( process_list [ index ] . cu_occupancy , MaxUIntegerTypes . UINT32_T ) ,
" evicted_time " : _validate_if_max_uint ( process_list [ index ] . evicted_time , MaxUIntegerTypes . UINT32_T )
2024-04-11 05:11:54 -05:00
} )
2024-04-23 04:49:47 -05:00
2024-03-14 05:53:26 -05:00
return result
2022-10-11 16:06:32 +02:00
2023-07-21 08:26:59 -05:00
def amdsmi_get_gpu_driver_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-07-21 08:26:59 -05:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2023-07-21 08:26:59 -05:00
info = amdsmi_wrapper . amdsmi_driver_info_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-07-21 08:26:59 -05:00
amdsmi_wrapper . amdsmi_get_gpu_driver_info (
processor_handle , ctypes . byref ( info )
2022-10-11 16:06:32 +02:00
)
)
2026-01-15 13:11:58 -06:00
# Not including os_kernel_version here due to it just being os.uname().release
2025-02-21 19:15:18 -06:00
driver_info = {
2023-09-29 13:46:46 -05:00
" driver_name " : info . driver_name . decode ( " utf-8 " ) ,
2024-02-21 21:46:32 -06:00
" driver_version " : info . driver_version . decode ( " utf-8 " ) ,
" driver_date " : info . driver_date . decode ( " utf-8 " )
2023-07-21 08:26:59 -05:00
}
2022-10-11 16:06:32 +02:00
2025-02-21 19:15:18 -06:00
for key , value in driver_info . items ( ) :
if value == " " :
driver_info [ key ] = " N/A "
return driver_info
2022-10-11 16:06:32 +02:00
2023-05-16 12:31:52 +02:00
def amdsmi_get_power_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-09-19 18:50:03 -05:00
) - > Dict [ str , ctypes . c_uint32 ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2025-06-03 17:12:13 -05:00
power_info = amdsmi_wrapper . amdsmi_power_info_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2025-05-28 18:09:13 -05:00
amdsmi_wrapper . amdsmi_get_power_info (
2025-06-03 17:12:13 -05:00
processor_handle , ctypes . byref ( power_info )
2022-10-11 16:06:32 +02:00
)
)
2024-03-26 08:45:08 -05:00
power_info_dict = {
2025-06-03 17:12:13 -05:00
" socket_power " : power_info . socket_power ,
" current_socket_power " : power_info . current_socket_power ,
" average_socket_power " : power_info . average_socket_power ,
" gfx_voltage " : power_info . gfx_voltage ,
" soc_voltage " : power_info . soc_voltage ,
" mem_voltage " : power_info . mem_voltage ,
" power_limit " : power_info . power_limit ,
2022-10-11 16:06:32 +02:00
}
2024-03-26 08:45:08 -05:00
for key , value in power_info_dict . items ( ) :
2025-12-08 21:36:45 -06:00
if value in ( MaxUIntegerTypes . UINT8_T , MaxUIntegerTypes . UINT16_T , MaxUIntegerTypes . UINT32_T , MaxUIntegerTypes . UINT64_T ) :
2024-03-26 08:45:08 -05:00
power_info_dict [ key ] = " N/A "
return power_info_dict
2022-10-11 16:06:32 +02:00
2023-09-20 14:02:45 -05:00
def amdsmi_is_gpu_power_management_enabled (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2023-09-20 14:02:45 -05:00
) - > bool :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
is_power_management_enabled = ctypes . c_bool ( )
_check_res (
amdsmi_wrapper . amdsmi_is_gpu_power_management_enabled (
processor_handle , ctypes . byref ( is_power_management_enabled )
)
)
return is_power_management_enabled . value
2022-10-11 16:06:32 +02:00
def amdsmi_get_fw_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2025-06-11 17:19:02 -05:00
) - > Dict [ str , List [ Dict [ str , str ] ] ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
2022-10-11 16:06:32 +02:00
fw_info = amdsmi_wrapper . amdsmi_fw_info_t ( )
2024-08-22 23:50:19 -05:00
_check_res (
amdsmi_wrapper . amdsmi_get_fw_info (
processor_handle , ctypes . byref ( fw_info )
)
)
# Certain FW blocks are padded with 0s in the front intentionally
# But the C library converts the hex to an integer which trims the leading 0s
# Nor do we have a flag that defines the expected format for each FW block
# We can expect the following blocks to have a padded value and a specified format
2023-10-24 20:08:48 -05:00
2024-04-24 11:16:06 +02:00
hex_format_fw = [ AmdSmiFwBlock . AMDSMI_FW_ID_PSP_SOSDRV ,
AmdSmiFwBlock . AMDSMI_FW_ID_TA_RAS ,
AmdSmiFwBlock . AMDSMI_FW_ID_TA_XGMI ,
AmdSmiFwBlock . AMDSMI_FW_ID_UVD ,
AmdSmiFwBlock . AMDSMI_FW_ID_VCE ,
2025-06-11 07:21:05 -05:00
AmdSmiFwBlock . AMDSMI_FW_ID_VCN ]
2023-11-02 02:18:40 -05:00
2024-08-22 23:50:19 -05:00
# PM(AKA: SMC) firmware's hex value looks like 0x12345678
# However, they are parsed as: int(0x12).int(0x34).int(0x56).int(0x78)
# Which results in the following: 12.34.56.78
2025-06-11 07:21:05 -05:00
dec_format_fw = [ AmdSmiFwBlock . AMDSMI_FW_ID_PM ,
2025-06-12 02:18:55 -05:00
AmdSmiFwBlock . AMDSMI_FW_ID_PLDM_BUNDLE ]
2023-11-10 23:43:17 -06:00
2023-10-24 20:08:48 -05:00
firmwares = [ ]
2022-12-28 16:11:10 +01:00
for i in range ( 0 , fw_info . num_fw_info ) :
2023-11-02 02:18:40 -05:00
fw_name = AmdSmiFwBlock ( fw_info . fw_info_list [ i ] . fw_id )
2024-08-22 23:50:19 -05:00
fw_version = fw_info . fw_info_list [ i ] . fw_version # This is in int format (base 10)
2023-11-02 02:18:40 -05:00
if fw_name in hex_format_fw :
2024-08-22 23:50:19 -05:00
# Convert the fw_version from a int to a hex string padded leading 0s
fw_version_string = hex ( fw_version ) [ 2 : ] . zfill ( 8 )
# Join every two hex digits with a dot
fw_version_string = " . " . join ( re . findall ( ' ..? ' , fw_version_string ) )
2023-11-10 23:43:17 -06:00
elif fw_name in dec_format_fw :
2024-08-22 23:50:19 -05:00
# Convert the fw_version from a int to a hex string padded leading 0s
fw_version_string = hex ( fw_version ) [ 2 : ] . zfill ( 8 )
2023-11-10 23:43:17 -06:00
# Convert every two hex digits to decimal and join them with a dot
dec_version_string = ' '
2024-08-22 23:50:19 -05:00
for index , _ in enumerate ( fw_version_string ) :
if index % 2 != 0 :
continue
hex_digits = f " 0x { fw_version_string [ index : index + 2 ] } "
dec_version_string + = str ( int ( hex_digits , 16 ) ) . zfill ( 2 ) + " . "
2023-11-10 23:43:17 -06:00
fw_version_string = dec_version_string . strip ( ' . ' )
2023-11-02 02:18:40 -05:00
else :
fw_version_string = str ( fw_version )
2022-12-28 16:11:10 +01:00
firmwares . append ( {
2023-11-02 02:18:40 -05:00
' fw_name ' : fw_name ,
2023-10-24 20:08:48 -05:00
' fw_version ' : fw_version_string . upper ( ) ,
2022-12-28 16:11:10 +01:00
} )
2025-06-11 17:19:02 -05:00
return { ' fw_list ' : firmwares }
2022-10-11 16:06:32 +02:00
2023-02-25 07:47:11 -05:00
def amdsmi_get_gpu_vram_usage (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
2024-02-21 21:46:32 -06:00
vram_usage = amdsmi_wrapper . amdsmi_vram_usage_t ( )
2022-10-11 16:06:32 +02:00
_check_res (
2023-02-25 07:47:11 -05:00
amdsmi_wrapper . amdsmi_get_gpu_vram_usage (
2024-02-21 21:46:32 -06:00
processor_handle , ctypes . byref ( vram_usage ) )
2022-10-11 16:06:32 +02:00
)
2024-02-21 21:46:32 -06:00
return { " vram_total " : vram_usage . vram_total , " vram_used " : vram_usage . vram_used }
2022-10-11 16:06:32 +02:00
2024-01-24 10:16:35 -06:00
def amdsmi_get_pcie_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-10-11 16:06:32 +02:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-10-11 16:06:32 +02:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-10-11 16:06:32 +02:00
)
pcie_info = amdsmi_wrapper . amdsmi_pcie_info_t ( )
_check_res (
2024-01-24 10:16:35 -06:00
amdsmi_wrapper . amdsmi_get_pcie_info (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( pcie_info )
2022-10-11 16:06:32 +02:00
)
)
2024-03-05 14:01:06 -06:00
pcie_info_dict = {
2024-02-15 12:41:50 -06:00
" pcie_static " : {
2024-09-17 04:54:41 -05:00
" max_pcie_width " : _validate_if_max_uint ( pcie_info . pcie_static . max_pcie_width , MaxUIntegerTypes . UINT16_T ) ,
" max_pcie_speed " : _validate_if_max_uint ( pcie_info . pcie_static . max_pcie_speed , MaxUIntegerTypes . UINT32_T ) ,
" pcie_interface_version " : _validate_if_max_uint ( pcie_info . pcie_static . pcie_interface_version , MaxUIntegerTypes . UINT32_T ) ,
2024-02-15 12:41:50 -06:00
" slot_type " : pcie_info . pcie_static . slot_type ,
} ,
" pcie_metric " : {
2024-09-17 04:54:41 -05:00
" pcie_width " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_width , MaxUIntegerTypes . UINT16_T ) ,
" pcie_speed " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_speed , MaxUIntegerTypes . UINT32_T ) ,
" pcie_bandwidth " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_bandwidth , MaxUIntegerTypes . UINT32_T ) ,
" pcie_replay_count " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_replay_count , MaxUIntegerTypes . UINT64_T ) ,
" pcie_l0_to_recovery_count " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_l0_to_recovery_count , MaxUIntegerTypes . UINT64_T ) ,
" pcie_replay_roll_over_count " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_replay_roll_over_count , MaxUIntegerTypes . UINT64_T ) ,
" pcie_nak_sent_count " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_nak_sent_count , MaxUIntegerTypes . UINT64_T ) ,
" pcie_nak_received_count " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_nak_received_count , MaxUIntegerTypes . UINT64_T ) ,
2024-05-21 20:30:16 -05:00
" pcie_lc_perf_other_end_recovery_count " : _validate_if_max_uint ( pcie_info . pcie_metric . pcie_lc_perf_other_end_recovery_count , MaxUIntegerTypes . UINT32_T )
2024-02-15 12:41:50 -06:00
}
}
2022-10-11 16:06:32 +02:00
2024-03-05 14:01:06 -06:00
slot_type = pcie_info_dict [ ' pcie_static ' ] [ ' slot_type ' ]
if isinstance ( slot_type , int ) :
slot_types = amdsmi_wrapper . amdsmi_card_form_factor_t__enumvalues
if slot_type in slot_types :
pcie_info_dict [ ' pcie_static ' ] [ ' slot_type ' ] = slot_types [ slot_type ] . replace ( " AMDSMI_CARD_FORM_FACTOR_ " , " " )
else :
pcie_info_dict [ ' pcie_static ' ] [ ' slot_type ' ] = " Unknown "
else :
pcie_info_dict [ ' pcie_static ' ] [ ' slot_type ' ] = " N/A "
return pcie_info_dict
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_xcd_counter ( processor_handle : processor_handle_t ) - > int :
2025-03-11 16:38:46 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
xcd_counter = ctypes . c_uint16 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_xcd_counter (
processor_handle , ctypes . byref ( xcd_counter )
)
)
return xcd_counter . value
2022-10-11 16:06:32 +02:00
2023-02-25 05:28:40 -05:00
def amdsmi_get_processor_handle_from_bdf ( bdf ) :
2022-11-16 09:30:31 +01:00
bdf = _parse_bdf ( bdf )
if bdf is None :
raise AmdSmiBdfFormatException ( bdf )
amdsmi_bdf = _make_amdsmi_bdf_from_list ( bdf )
2023-02-25 05:28:40 -05:00
processor_handle = amdsmi_wrapper . amdsmi_processor_handle ( )
_check_res ( amdsmi_wrapper . amdsmi_get_processor_handle_from_bdf (
amdsmi_bdf , ctypes . byref ( processor_handle ) ) )
return processor_handle
2022-11-09 15:21:42 +01:00
2023-02-25 07:00:50 -05:00
def amdsmi_get_gpu_vendor_name (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 15:29:32 +01:00
) - > str :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
length = ctypes . c_uint64 ( )
length . value = _AMDSMI_STRING_LENGTH
vendor_name = ctypes . create_string_buffer ( _AMDSMI_STRING_LENGTH )
_check_res (
2023-02-25 07:00:50 -05:00
amdsmi_wrapper . amdsmi_get_gpu_vendor_name (
2023-02-25 05:28:40 -05:00
processor_handle , vendor_name , length )
2022-11-10 15:29:32 +01:00
)
return vendor_name . value . decode ( " utf-8 " )
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_id ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2025-10-06 14:50:00 -05:00
gpu_id_16 = ctypes . c_uint16 ( )
2022-11-10 15:29:32 +01:00
2023-02-25 06:58:22 -05:00
_check_res ( amdsmi_wrapper . amdsmi_get_gpu_id (
2025-10-06 14:50:00 -05:00
processor_handle , ctypes . byref ( gpu_id_16 ) ) )
2022-11-10 15:29:32 +01:00
2025-10-06 14:50:00 -05:00
return gpu_id_16 . value
2022-11-10 15:29:32 +01:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_vram_vendor ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
length = ctypes . c_uint32 ( )
length . value = _AMDSMI_STRING_LENGTH
vram_vendor = ctypes . create_string_buffer ( _AMDSMI_STRING_LENGTH )
_check_res (
2023-02-25 07:06:13 -05:00
amdsmi_wrapper . amdsmi_get_gpu_vram_vendor (
2023-02-25 05:28:40 -05:00
processor_handle , vram_vendor , length )
2022-11-10 15:29:32 +01:00
)
return vram_vendor . value . decode ( " utf-8 " )
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_subsystem_id ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2025-10-06 14:50:00 -05:00
subsystem_id_16 = ctypes . c_uint16 ( )
2022-11-10 15:29:32 +01:00
_check_res (
2023-02-25 07:08:28 -05:00
amdsmi_wrapper . amdsmi_get_gpu_subsystem_id (
2025-10-06 14:50:00 -05:00
processor_handle , ctypes . byref ( subsystem_id_16 ) )
2022-11-10 15:29:32 +01:00
)
2025-10-06 14:50:00 -05:00
return _pad_hex_value ( hex ( subsystem_id_16 . value ) , 4 )
2022-11-10 15:29:32 +01:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_subsystem_name ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
length = ctypes . c_uint64 ( )
length . value = _AMDSMI_STRING_LENGTH
name = ctypes . create_string_buffer ( _AMDSMI_STRING_LENGTH )
_check_res (
2023-02-25 07:11:22 -05:00
amdsmi_wrapper . amdsmi_get_gpu_subsystem_name (
2023-02-25 05:28:40 -05:00
processor_handle , name , length )
2022-11-10 15:29:32 +01:00
)
return name . value . decode ( " utf-8 " )
2023-05-31 10:30:59 +02:00
def amdsmi_get_lib_version ( ) :
2022-11-10 15:29:32 +01:00
version = amdsmi_wrapper . amdsmi_version_t ( )
2023-05-31 10:30:59 +02:00
_check_res ( amdsmi_wrapper . amdsmi_get_lib_version ( ctypes . byref ( version ) ) )
2022-11-10 15:29:32 +01:00
return {
" major " : version . major ,
" minor " : version . minor ,
2023-07-31 07:42:05 -05:00
" release " : version . release ,
" build " : version . build . contents . value . decode ( " utf-8 " )
2022-11-10 15:29:32 +01:00
}
def amdsmi_topo_get_numa_node_number (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 15:29:32 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
numa_node_number = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_topo_get_numa_node_number (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( numa_node_number )
2022-11-10 15:29:32 +01:00
)
)
return numa_node_number . value
def amdsmi_topo_get_link_weight (
2025-10-06 14:50:00 -05:00
processor_handle_src : processor_handle_t ,
processor_handle_dst : processor_handle_t
2022-11-10 15:29:32 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
weight = ctypes . c_uint64 ( )
_check_res (
amdsmi_wrapper . amdsmi_topo_get_link_weight (
2023-02-25 05:28:40 -05:00
processor_handle_src , processor_handle_dst , ctypes . byref ( weight )
2022-11-10 15:29:32 +01:00
)
)
return weight . value
2023-09-13 16:16:33 -05:00
def amdsmi_get_minmax_bandwidth_between_processors (
2025-10-06 14:50:00 -05:00
processor_handle_src : processor_handle_t ,
processor_handle_dst : processor_handle_t ,
2022-11-10 15:29:32 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
min_bandwidth = ctypes . c_uint64 ( )
max_bandwidth = ctypes . c_uint64 ( )
_check_res (
2023-09-13 16:16:33 -05:00
amdsmi_wrapper . amdsmi_get_minmax_bandwidth_between_processors (
2023-02-25 05:28:40 -05:00
processor_handle_src ,
processor_handle_dst ,
2022-11-10 15:29:32 +01:00
ctypes . byref ( min_bandwidth ) ,
ctypes . byref ( max_bandwidth ) ,
)
)
return { " min_bandwidth " : min_bandwidth . value , " max_bandwidth " : max_bandwidth . value }
2025-10-06 14:50:00 -05:00
def amdsmi_get_link_metrics ( processor_handle : processor_handle_t ) :
2025-05-30 16:51:11 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
link_metrics = amdsmi_wrapper . amdsmi_link_metrics_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_link_metrics (
processor_handle , ctypes . byref ( link_metrics )
)
)
links = [ ]
for i in range ( AMDSMI_MAX_NUM_XGMI_LINKS ) :
link = link_metrics . links [ i ]
links . append ( {
2025-06-02 14:01:06 -05:00
" bdf " : _format_bdf ( link . bdf ) ,
2025-07-14 13:16:52 -05:00
" bit_rate " : link . bit_rate ,
" max_bandwidth " : link . max_bandwidth ,
2025-05-30 16:51:11 -05:00
" link_type " : link . link_type ,
" read " : link . read ,
" write " : link . write ,
} )
return {
2025-06-02 14:01:06 -05:00
" num_links " : link_metrics . num_links ,
2025-05-30 16:51:11 -05:00
" links " : links
}
2022-11-10 15:29:32 +01:00
def amdsmi_topo_get_link_type (
2025-10-06 14:50:00 -05:00
processor_handle_src : processor_handle_t ,
processor_handle_dst : processor_handle_t ,
2022-11-10 15:29:32 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2025-10-06 14:50:00 -05:00
hops_64 = ctypes . c_uint64 ( )
type_32 = ctypes . c_uint32 ( )
2022-11-10 15:29:32 +01:00
_check_res (
amdsmi_wrapper . amdsmi_topo_get_link_type (
2025-07-22 18:22:49 -05:00
processor_handle_src , processor_handle_dst ,
2025-10-06 14:50:00 -05:00
ctypes . byref ( hops_64 ) , ctypes . byref ( type_32 )
2022-11-10 15:29:32 +01:00
)
)
2025-10-06 14:50:00 -05:00
return { " hops " : hops_64 . value , " type " : type_32 . value }
2022-11-10 15:29:32 +01:00
2024-09-18 19:53:32 -05:00
2024-08-21 11:26:36 +08:00
def amdsmi_topo_get_p2p_status (
2025-10-06 14:50:00 -05:00
processor_handle_src : processor_handle_t ,
processor_handle_dst : processor_handle_t ,
2024-08-21 11:26:36 +08:00
) :
if not isinstance ( processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle
)
2025-10-06 14:50:00 -05:00
type_32 = ctypes . c_uint32 ( )
2024-08-21 11:26:36 +08:00
cap = amdsmi_wrapper . struct_amdsmi_p2p_capability_t ( )
_check_res (
amdsmi_wrapper . amdsmi_topo_get_p2p_status (
2025-10-06 14:50:00 -05:00
processor_handle_src , processor_handle_dst , ctypes . byref ( type_32 ) , ctypes . byref ( cap )
2024-08-21 11:26:36 +08:00
)
)
return {
' type ' : type ,
' cap ' : {
' is_iolink_coherent ' : cap . is_iolink_coherent ,
' is_iolink_atomics_32bit ' : cap . is_iolink_atomics_32bit ,
' is_iolink_atomics_64bit ' : cap . is_iolink_atomics_64bit ,
' is_iolink_dma ' : cap . is_iolink_dma ,
' is_iolink_bi_directional ' : cap . is_iolink_bi_directional
}
}
2022-11-10 15:29:32 +01:00
def amdsmi_is_P2P_accessible (
2025-10-06 14:50:00 -05:00
processor_handle_src : processor_handle_t ,
processor_handle_dst : processor_handle_t ,
2022-11-10 15:29:32 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_src , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle_dst , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
accessible = ctypes . c_bool ( )
_check_res (
amdsmi_wrapper . amdsmi_is_P2P_accessible (
2023-02-25 05:28:40 -05:00
processor_handle_src , processor_handle_dst , ctypes . byref ( accessible )
2022-11-10 15:29:32 +01:00
)
)
return accessible . value
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_compute_partition ( processor_handle : processor_handle_t ) :
2023-10-13 01:41:14 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
length = ctypes . c_uint32 ( )
length . value = _AMDSMI_STRING_LENGTH
compute_partition = ctypes . create_string_buffer ( _AMDSMI_STRING_LENGTH )
_check_res (
2023-11-22 03:32:15 -06:00
amdsmi_wrapper . amdsmi_get_gpu_compute_partition (
2023-10-13 01:41:14 -05:00
processor_handle , compute_partition , length
)
)
return compute_partition . value . decode ( " utf-8 " )
2025-10-06 14:50:00 -05:00
def amdsmi_set_gpu_compute_partition ( processor_handle : processor_handle_t ,
2023-10-13 01:41:14 -05:00
compute_partition : AmdSmiComputePartitionType ) :
2025-01-15 20:28:45 -06:00
2023-10-13 01:41:14 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( compute_partition , AmdSmiComputePartitionType ) :
raise AmdSmiParameterException ( compute_partition , AmdSmiComputePartitionType )
_check_res (
2023-11-22 03:32:15 -06:00
amdsmi_wrapper . amdsmi_set_gpu_compute_partition (
2023-10-13 01:41:14 -05:00
processor_handle , compute_partition
)
)
2025-10-06 14:50:00 -05:00
def amdsmi_set_gpu_accelerator_partition_profile ( processor_handle : processor_handle_t ,
2025-01-15 20:28:45 -06:00
profile_index : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( profile_index , int ) :
raise AmdSmiParameterException ( profile_index , int )
_check_res (
amdsmi_wrapper . amdsmi_set_gpu_accelerator_partition_profile (
processor_handle , profile_index
)
)
2023-10-13 01:41:14 -05:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_memory_partition ( processor_handle : processor_handle_t ) :
2023-10-13 01:41:14 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
length = ctypes . c_uint32 ( )
length . value = _AMDSMI_STRING_LENGTH
memory_partition = ctypes . create_string_buffer ( _AMDSMI_STRING_LENGTH )
_check_res (
2023-11-22 03:32:15 -06:00
amdsmi_wrapper . amdsmi_get_gpu_memory_partition (
2023-10-13 01:41:14 -05:00
processor_handle , memory_partition , length
)
)
return memory_partition . value . decode ( " utf-8 " )
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_memory_partition_config ( processor_handle : processor_handle_t ) :
2025-01-15 20:28:45 -06:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-04-12 01:54:57 -05:00
2025-01-15 20:28:45 -06:00
config = amdsmi_wrapper . amdsmi_memory_partition_config_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_memory_partition_config (
processor_handle , config
)
)
mem_caps_list = [ ]
2025-02-13 13:52:34 -06:00
if config . partition_caps . nps_flags . nps1_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS1 " )
2025-02-13 13:52:34 -06:00
if config . partition_caps . nps_flags . nps2_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS2 " )
2025-02-13 13:52:34 -06:00
if config . partition_caps . nps_flags . nps4_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS4 " )
2025-02-13 13:52:34 -06:00
if config . partition_caps . nps_flags . nps8_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS8 " )
2025-05-27 19:19:43 -05:00
if ( config . partition_caps . nps_flags . nps1_cap == 0 and
2025-10-06 14:50:00 -05:00
config . partition_caps . nps_flags . nps2_cap == 0 and
config . partition_caps . nps_flags . nps4_cap == 0 and
2025-05-27 19:19:43 -05:00
config . partition_caps . nps_flags . nps8_cap == 0 ) :
mem_caps_list . append ( " N/A " )
2025-01-15 20:28:45 -06:00
return_dict = {
" partition_caps " : mem_caps_list ,
" mp_mode " : amdsmi_wrapper . amdsmi_memory_partition_type_t__enumvalues [
config . mp_mode ] . replace ( " AMDSMI_MEMORY_PARTITION_ " , " " ) . replace ( " UNKNOWN " , " N/A " ) ,
" num_numa_ranges " : " N/A " ,
" numa_range " : " N/A " ,
}
return return_dict
2023-10-13 01:41:14 -05:00
2025-10-06 14:50:00 -05:00
def amdsmi_set_gpu_memory_partition ( processor_handle : processor_handle_t ,
2023-10-13 01:41:14 -05:00
memory_partition : AmdSmiMemoryPartitionType ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( memory_partition , AmdSmiMemoryPartitionType ) :
raise AmdSmiParameterException ( memory_partition , AmdSmiMemoryPartitionType )
_check_res (
2023-11-22 03:32:15 -06:00
amdsmi_wrapper . amdsmi_set_gpu_memory_partition (
2023-10-13 01:41:14 -05:00
processor_handle , memory_partition
)
)
2025-10-06 14:50:00 -05:00
def amdsmi_set_gpu_memory_partition_mode ( processor_handle : processor_handle_t ,
2025-01-15 20:28:45 -06:00
memory_partition : AmdSmiMemoryPartitionType ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( memory_partition , AmdSmiMemoryPartitionType ) :
raise AmdSmiParameterException ( memory_partition , AmdSmiMemoryPartitionType )
_check_res (
amdsmi_wrapper . amdsmi_set_gpu_memory_partition (
processor_handle , memory_partition
)
)
2023-10-13 01:41:14 -05:00
2024-09-18 19:53:32 -05:00
def amdsmi_get_gpu_accelerator_partition_profile (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2024-09-18 19:53:32 -05:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-04-02 14:08:48 -05:00
exception_caught = False
2025-10-06 14:50:00 -05:00
return_dictionary = { }
2025-01-15 20:28:45 -06:00
length = 8
partition_id = [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ]
partition_id_list = ( ctypes . c_uint32 * length ) ( * partition_id )
2024-09-18 19:53:32 -05:00
profile = amdsmi_wrapper . amdsmi_accelerator_partition_profile_t ( )
2025-01-15 20:28:45 -06:00
partition_ids = [ ]
2025-03-11 16:38:46 -05:00
kPOSITION_OF_PARTITION_ID = 0
2025-01-15 20:28:45 -06:00
2025-04-02 14:08:48 -05:00
ret = amdsmi_wrapper . amdsmi_get_gpu_accelerator_partition_profile ( processor_handle ,
ctypes . byref ( profile ) , partition_id_list )
if ret == amdsmi_wrapper . AMDSMI_STATUS_NOT_SUPPORTED :
#partition_id[0] will contain the partition id of each device
#BM/Guest will include this logic. Host will only display primary partition ids.
partition_ids . append ( partition_id_list [ kPOSITION_OF_PARTITION_ID ] )
try :
_check_res ( ret )
except AmdSmiException as e :
partition_profile_dict = {
" profile_type " : " N/A " ,
" num_partitions " : " N/A " ,
" profile_index " : " N/A " ,
" memory_caps " : " N/A " ,
" num_resources " : " N/A " ,
" resources " : " N/A "
}
return_dictionary = {
" partition_id " : partition_ids ,
" partition_profile " : partition_profile_dict
}
if ret == amdsmi_wrapper . AMDSMI_STATUS_NOT_SUPPORTED :
exception_caught = True
else :
_check_res ( ret ) # re-raise the exception if error is anything other than AMDSMI_STATUS_NOT_SUPPORTED
# this ensures we can get partition ID even if the profile is not supported.
finally :
2025-10-06 14:50:00 -05:00
if not exception_caught :
2025-04-02 14:08:48 -05:00
profile_type_ret = amdsmi_wrapper . amdsmi_accelerator_partition_type_t__enumvalues [ profile . profile_type ] . replace ( " AMDSMI_ACCELERATOR_PARTITION_ " , " " )
profile_type_ret = profile_type_ret . replace ( " INVALID " , " N/A " )
length = profile . num_partitions
#partition_id[0] will contain the partition id of each device
#BM/Guest will include this logic. Host will only display primary partition ids.
partition_ids . append ( partition_id_list [ kPOSITION_OF_PARTITION_ID ] )
mem_caps_list = [ ]
if profile . memory_caps . nps_flags . nps1_cap == 1 :
mem_caps_list . append ( " NPS1 " )
if profile . memory_caps . nps_flags . nps2_cap == 1 :
mem_caps_list . append ( " NPS2 " )
if profile . memory_caps . nps_flags . nps4_cap == 1 :
mem_caps_list . append ( " NPS4 " )
if profile . memory_caps . nps_flags . nps8_cap == 1 :
mem_caps_list . append ( " NPS8 " )
2025-05-27 19:19:43 -05:00
if ( profile . memory_caps . nps_flags . nps1_cap == 0 and
2025-10-06 14:50:00 -05:00
profile . memory_caps . nps_flags . nps2_cap == 0 and
profile . memory_caps . nps_flags . nps4_cap == 0 and
2025-05-27 19:19:43 -05:00
profile . memory_caps . nps_flags . nps8_cap == 0 ) :
mem_caps_list . append ( " N/A " )
2025-04-02 14:08:48 -05:00
partition_profile_dict = {
" profile_type " : profile_type_ret ,
" num_partitions " : profile . num_partitions ,
" profile_index " : profile . profile_index ,
" memory_caps " : mem_caps_list ,
" num_resources " : profile . num_resources ,
" resources " : " N/A "
}
return_dictionary = {
" partition_id " : partition_ids ,
" partition_profile " : partition_profile_dict
}
2025-10-06 14:50:00 -05:00
return return_dictionary
2025-01-15 20:28:45 -06:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_accelerator_partition_profile_config ( processor_handle : processor_handle_t ) - > Dict :
2025-01-15 20:28:45 -06:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
config = amdsmi_wrapper . amdsmi_accelerator_partition_profile_config_t ( )
_check_res ( amdsmi_wrapper . amdsmi_get_gpu_accelerator_partition_profile_config ( processor_handle ,
ctypes . byref ( config ) ) )
profiles = [ ]
2025-10-06 14:50:00 -05:00
resources = [ ]
2025-01-15 20:28:45 -06:00
resource_idx = 0
for i in range ( config . num_profiles ) :
profile = config . profiles [ i ]
profile_type_ret = amdsmi_wrapper . amdsmi_accelerator_partition_type_t__enumvalues [
config . profiles [ i ] . profile_type ] . replace ( " AMDSMI_ACCELERATOR_PARTITION_ " , " " )
profile_type_ret = profile_type_ret . replace ( " INVALID " , " N/A " )
2025-04-12 01:54:57 -05:00
2025-01-15 20:28:45 -06:00
mem_caps_list = [ ]
2025-02-13 13:52:34 -06:00
if profile . memory_caps . nps_flags . nps1_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS1 " )
2025-02-13 13:52:34 -06:00
if profile . memory_caps . nps_flags . nps2_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS2 " )
2025-02-13 13:52:34 -06:00
if profile . memory_caps . nps_flags . nps4_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS4 " )
2025-02-13 13:52:34 -06:00
if profile . memory_caps . nps_flags . nps8_cap == 1 :
2025-01-15 20:28:45 -06:00
mem_caps_list . append ( " NPS8 " )
2025-05-27 19:19:43 -05:00
if ( profile . memory_caps . nps_flags . nps1_cap == 0 and
2025-10-06 14:50:00 -05:00
profile . memory_caps . nps_flags . nps2_cap == 0 and
profile . memory_caps . nps_flags . nps4_cap == 0 and
2025-05-27 19:19:43 -05:00
profile . memory_caps . nps_flags . nps8_cap == 0 ) :
mem_caps_list . append ( " N/A " )
2025-01-15 20:28:45 -06:00
2025-10-06 14:50:00 -05:00
resources = [ ]
for _ in range ( config . num_resource_profiles ) :
2025-01-15 20:28:45 -06:00
res_profile = config . resource_profiles [ resource_idx ]
resource_profiles_ret = amdsmi_wrapper . amdsmi_accelerator_partition_resource_type_t__enumvalues [
res_profile . resource_type ] . replace ( " AMDSMI_ACCELERATOR_ " , " " )
resource_profile_dict = {
" profile_index " : res_profile . profile_index ,
" resource_type " : resource_profiles_ret ,
" partition_resource " : res_profile . partition_resource ,
" num_partitions_share_resource " : res_profile . num_partitions_share_resource ,
}
resources . append ( resource_profile_dict )
resource_idx + = 1
2025-04-12 01:54:57 -05:00
2025-01-15 20:28:45 -06:00
profile_dict = {
" profile_type " : profile_type_ret ,
" num_partitions " : profile . num_partitions ,
" profile_index " : profile . profile_index ,
" memory_caps " : mem_caps_list ,
" num_resources " : profile . num_resources ,
" resources " : resources
}
profiles . append ( profile_dict )
config_dict = {
" num_profiles " : config . num_profiles ,
" num_resource_profiles " : config . num_resource_profiles ,
" resource_profiles " : resources ,
" default_profile_index " : config . default_profile_index ,
" profiles " : profiles ,
}
return config_dict
2024-09-18 19:53:32 -05:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_xgmi_info ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 15:29:32 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 15:29:32 +01:00
)
xgmi_info = amdsmi_wrapper . amdsmi_xgmi_info_t ( )
2023-02-25 05:28:40 -05:00
_check_res ( amdsmi_wrapper . amdsmi_get_xgmi_info ( processor_handle , xgmi_info ) )
2022-11-10 15:29:32 +01:00
return {
" xgmi_lanes " : xgmi_info . xgmi_lanes ,
" xgmi_hive_id " : xgmi_info . xgmi_hive_id ,
" xgmi_node_id " : xgmi_info . xgmi_node_id ,
" index " : xgmi_info . index ,
}
2023-02-27 02:00:14 -05:00
def amdsmi_gpu_counter_group_supported (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:11:59 +01:00
event_group : AmdSmiEventGroup ,
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( event_group , AmdSmiEventGroup ) :
raise AmdSmiParameterException ( event_group , AmdSmiEventGroup )
_check_res (
2023-02-27 02:00:14 -05:00
amdsmi_wrapper . amdsmi_gpu_counter_group_supported (
2023-02-25 05:28:40 -05:00
processor_handle , event_group )
2022-11-10 10:11:59 +01:00
)
2023-02-27 02:01:35 -05:00
def amdsmi_gpu_create_counter (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:11:59 +01:00
event_type : AmdSmiEventType ,
) - > amdsmi_wrapper . amdsmi_event_handle_t :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( event_type , AmdSmiEventType ) :
raise AmdSmiParameterException ( event_type , AmdSmiEventType )
event_handle = amdsmi_wrapper . amdsmi_event_handle_t ( )
_check_res (
2023-02-27 02:01:35 -05:00
amdsmi_wrapper . amdsmi_gpu_create_counter (
2023-02-25 05:28:40 -05:00
processor_handle , event_type , ctypes . byref ( event_handle )
2022-11-10 10:11:59 +01:00
)
)
return event_handle
2025-10-06 14:50:00 -05:00
def amdsmi_gpu_destroy_counter ( event_handle : amdsmi_wrapper . amdsmi_event_handle_t ) - > None :
2022-11-10 10:11:59 +01:00
if not isinstance ( event_handle , amdsmi_wrapper . amdsmi_event_handle_t ) :
2025-10-06 14:50:00 -05:00
raise AmdSmiParameterException ( event_handle , amdsmi_wrapper . amdsmi_event_handle_t )
2023-02-27 02:03:20 -05:00
_check_res ( amdsmi_wrapper . amdsmi_gpu_destroy_counter ( event_handle ) )
2022-11-10 10:11:59 +01:00
2023-02-27 02:04:54 -05:00
def amdsmi_gpu_control_counter (
2022-11-10 10:11:59 +01:00
event_handle : amdsmi_wrapper . amdsmi_event_handle_t ,
counter_command : AmdSmiCounterCommand ,
) :
if not isinstance ( event_handle , amdsmi_wrapper . amdsmi_event_handle_t ) :
2025-10-06 14:50:00 -05:00
raise AmdSmiParameterException ( event_handle , amdsmi_wrapper . amdsmi_event_handle_t )
2022-11-10 10:11:59 +01:00
if not isinstance ( counter_command , AmdSmiCounterCommand ) :
raise AmdSmiParameterException ( counter_command , AmdSmiCounterCommand )
2025-10-06 14:50:00 -05:00
event_handle_value = amdsmi_wrapper . amdsmi_event_handle_t ( event_handle . value )
2022-11-10 10:11:59 +01:00
command_args = ctypes . c_void_p ( )
_check_res (
2023-02-27 02:04:54 -05:00
amdsmi_wrapper . amdsmi_gpu_control_counter (
2025-10-06 14:50:00 -05:00
event_handle_value , counter_command , command_args
2022-11-10 10:11:59 +01:00
)
)
2023-02-27 02:06:14 -05:00
def amdsmi_gpu_read_counter (
2022-11-10 10:11:59 +01:00
event_handle : amdsmi_wrapper . amdsmi_event_handle_t ,
) - > Dict [ str , Any ] :
if not isinstance ( event_handle , amdsmi_wrapper . amdsmi_event_handle_t ) :
2025-10-06 14:50:00 -05:00
raise AmdSmiParameterException ( event_handle , amdsmi_wrapper . amdsmi_event_handle_t )
2022-11-10 10:11:59 +01:00
counter_value = amdsmi_wrapper . amdsmi_counter_value_t ( )
_check_res (
2023-02-27 02:06:14 -05:00
amdsmi_wrapper . amdsmi_gpu_read_counter (
2022-11-22 15:21:21 +01:00
event_handle , ctypes . byref ( counter_value ) )
2022-11-10 10:11:59 +01:00
)
return {
" value " : counter_value . value ,
" time_enabled " : counter_value . time_enabled ,
2022-11-10 10:30:10 +01:00
" time_running " : counter_value . time_running ,
2022-11-10 10:11:59 +01:00
}
2023-02-27 02:08:28 -05:00
def amdsmi_get_gpu_available_counters (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:11:59 +01:00
event_group : AmdSmiEventGroup ,
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( event_group , AmdSmiEventGroup ) :
raise AmdSmiParameterException ( event_group , AmdSmiEventGroup )
2022-12-28 10:19:27 +01:00
available = ctypes . c_uint32 ( )
2022-11-10 10:11:59 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_available_counters (
2023-02-25 05:28:40 -05:00
processor_handle , event_group , ctypes . byref ( available )
2022-11-10 10:11:59 +01:00
)
)
return available . value
2023-02-26 20:59:06 -05:00
def amdsmi_set_gpu_perf_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:11:59 +01:00
perf_level : AmdSmiDevPerfLevel ,
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( perf_level , AmdSmiDevPerfLevel ) :
raise AmdSmiParameterException ( perf_level , AmdSmiDevPerfLevel )
2023-06-02 01:19:26 -05:00
_check_res ( amdsmi_wrapper . amdsmi_set_gpu_perf_level (
2023-02-25 05:28:40 -05:00
processor_handle , perf_level ) )
2022-11-10 10:11:59 +01:00
2025-10-06 14:50:00 -05:00
def amdsmi_reset_gpu ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
2023-02-27 00:56:31 -05:00
_check_res ( amdsmi_wrapper . amdsmi_reset_gpu ( processor_handle ) )
2022-11-10 10:11:59 +01:00
2025-08-05 20:44:28 -05:00
def amdsmi_gpu_driver_reload ( ) :
_check_res ( amdsmi_wrapper . amdsmi_gpu_driver_reload ( ) )
2022-11-10 10:11:59 +01:00
2023-02-25 08:29:08 -05:00
def amdsmi_set_gpu_fan_speed (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_idx : int , fan_speed : int
2022-11-10 10:11:59 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( sensor_idx , int ) :
raise AmdSmiParameterException ( sensor_idx , int )
if not isinstance ( fan_speed , int ) :
raise AmdSmiParameterException ( fan_speed , int )
2025-10-06 14:50:00 -05:00
sensor_idx_32 = ctypes . c_uint32 ( sensor_idx )
fan_speed_64 = ctypes . c_uint64 ( fan_speed )
2022-11-10 10:11:59 +01:00
_check_res (
2023-02-25 08:29:08 -05:00
amdsmi_wrapper . amdsmi_set_gpu_fan_speed (
2025-10-06 14:50:00 -05:00
processor_handle , sensor_idx_32 , fan_speed_64 )
2022-11-10 10:11:59 +01:00
)
2023-02-25 08:27:40 -05:00
def amdsmi_reset_gpu_fan (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_idx : int
2022-11-10 10:11:59 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( sensor_idx , int ) :
raise AmdSmiParameterException ( sensor_idx , int )
2025-10-06 14:50:00 -05:00
sensor_idx_32 = ctypes . c_uint32 ( sensor_idx )
2022-11-10 10:11:59 +01:00
2025-10-06 14:50:00 -05:00
_check_res ( amdsmi_wrapper . amdsmi_reset_gpu_fan ( processor_handle , sensor_idx_32 ) )
2022-11-10 10:11:59 +01:00
2023-02-26 21:01:44 -05:00
def amdsmi_set_clk_freq (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-12-04 11:46:59 -06:00
clk_type : str ,
2022-11-10 10:11:59 +01:00
freq_bitmask : int ,
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
2025-10-06 14:50:00 -05:00
if not isinstance ( clk_type , str ) :
raise AmdSmiParameterException ( clk_type , str )
if not isinstance ( freq_bitmask , int ) :
raise AmdSmiParameterException ( freq_bitmask , int )
2024-12-04 11:46:59 -06:00
if clk_type . lower ( ) == " sclk " :
clk_type_conversion = AmdSmiClkType . SYS
elif clk_type . lower ( ) == " mclk " :
clk_type_conversion = AmdSmiClkType . MEM
elif clk_type . lower ( ) == " fclk " :
clk_type_conversion = AmdSmiClkType . DF
elif clk_type . lower ( ) == " socclk " :
clk_type_conversion = AmdSmiClkType . SOC
else :
clk_type_conversion = " N/A "
if not isinstance ( clk_type_conversion , AmdSmiClkType ) :
raise AmdSmiParameterException ( clk_type_conversion , AmdSmiClkType )
2025-10-06 14:50:00 -05:00
freq_bitmask_64 = ctypes . c_uint64 ( freq_bitmask )
2022-11-10 10:11:59 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_clk_freq (
2025-10-06 14:50:00 -05:00
processor_handle , clk_type_conversion , freq_bitmask_64
2022-11-10 10:11:59 +01:00
)
)
2024-04-08 10:35:24 -05:00
2024-05-23 10:31:37 -05:00
def amdsmi_set_soc_pstate (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-02-22 08:38:54 -06:00
policy_id : int ,
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( policy_id , int ) :
raise AmdSmiParameterException ( policy_id , int )
2024-02-22 08:38:54 -06:00
_check_res (
2024-05-23 10:31:37 -05:00
amdsmi_wrapper . amdsmi_set_soc_pstate (
2024-02-22 08:38:54 -06:00
processor_handle , policy_id
)
)
2022-11-10 10:11:59 +01:00
2024-04-08 10:35:24 -05:00
2024-03-20 12:06:24 -05:00
def amdsmi_set_xgmi_plpd (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-03-20 12:06:24 -05:00
policy_id : int ,
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( policy_id , int ) :
raise AmdSmiParameterException ( policy_id , int )
2024-03-20 12:06:24 -05:00
_check_res (
amdsmi_wrapper . amdsmi_set_xgmi_plpd (
processor_handle , policy_id
)
)
2025-10-06 14:50:00 -05:00
return
2024-03-20 12:06:24 -05:00
2024-04-08 10:35:24 -05:00
def amdsmi_set_gpu_process_isolation (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-04-08 10:35:24 -05:00
pisolate : int ,
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( pisolate , int ) :
raise AmdSmiParameterException ( pisolate , int )
2024-04-08 10:35:24 -05:00
_check_res (
amdsmi_wrapper . amdsmi_set_gpu_process_isolation (
processor_handle , pisolate
)
)
2024-06-13 14:13:46 -05:00
def amdsmi_clean_gpu_local_data (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-04-08 10:35:24 -05:00
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res (
2024-06-13 14:13:46 -05:00
amdsmi_wrapper . amdsmi_clean_gpu_local_data (
processor_handle
2024-04-08 10:35:24 -05:00
)
)
2023-02-27 04:14:05 -05:00
def amdsmi_set_gpu_overdrive_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , overdrive_value : int
2022-11-10 10:11:59 +01:00
) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:11:59 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:11:59 +01:00
)
if not isinstance ( overdrive_value , int ) :
raise AmdSmiParameterException ( overdrive_value , int )
2025-10-06 14:50:00 -05:00
overdrive_value_32 = ctypes . c_uint32 ( overdrive_value )
2022-11-10 10:11:59 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_gpu_overdrive_level (
2025-10-06 14:50:00 -05:00
processor_handle , overdrive_value_32 )
2022-11-10 10:11:59 +01:00
)
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_bdf_id ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
bdfid = ctypes . c_uint64 ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_bdf_id (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( bdfid ) )
2022-11-10 16:18:27 +01:00
)
return bdfid . value
2024-04-08 10:35:24 -05:00
2023-02-25 07:43:32 -05:00
def amdsmi_set_gpu_pci_bandwidth (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , bitmask : int
2022-11-09 15:21:42 +01:00
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:21:42 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 15:21:42 +01:00
)
if not isinstance ( bitmask , int ) :
raise AmdSmiParameterException ( bitmask , int )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_gpu_pci_bandwidth (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . c_uint64 ( bitmask )
2022-11-09 15:21:42 +01:00
)
)
2022-11-10 16:18:27 +01:00
def _format_transfer_rate ( transfer_rate ) :
return {
' num_supported ' : transfer_rate . num_supported ,
' current ' : transfer_rate . current ,
' frequency ' : list ( transfer_rate . frequency )
}
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_pci_bandwidth ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
bandwidth = amdsmi_wrapper . amdsmi_pcie_bandwidth_t ( )
_check_res (
2023-02-25 07:24:40 -05:00
amdsmi_wrapper . amdsmi_get_gpu_pci_bandwidth (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( bandwidth ) )
2022-11-10 16:18:27 +01:00
)
transfer_rate = _format_transfer_rate ( bandwidth . transfer_rate )
return {
' transfer_rate ' : transfer_rate ,
' lanes ' : list ( bandwidth . lanes )
}
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_pci_throughput ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
sent = ctypes . c_uint64 ( )
received = ctypes . c_uint64 ( )
max_pkt_sz = ctypes . c_uint64 ( )
_check_res (
2023-02-25 07:39:10 -05:00
amdsmi_wrapper . amdsmi_get_gpu_pci_throughput ( processor_handle , ctypes . byref (
2022-11-10 16:18:27 +01:00
sent ) , ctypes . byref ( received ) , ctypes . byref ( max_pkt_sz ) )
)
return {
' sent ' : sent . value ,
' received ' : received . value ,
' max_pkt_sz ' : max_pkt_sz . value
}
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_pci_replay_counter ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
counter = ctypes . c_uint64 ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_pci_replay_counter (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( counter ) )
2022-11-10 16:18:27 +01:00
)
return counter . value
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_topo_numa_affinity ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
2023-08-28 18:32:31 -05:00
numa_node = ctypes . c_int32 ( )
2022-11-10 16:18:27 +01:00
_check_res (
2023-02-25 07:36:54 -05:00
amdsmi_wrapper . amdsmi_get_gpu_topo_numa_affinity (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( numa_node ) )
2022-11-10 16:18:27 +01:00
)
return numa_node . value
2023-02-25 08:02:08 -05:00
def amdsmi_set_power_cap (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_ind : int , cap : int
2022-11-09 15:21:42 +01:00
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:21:42 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 15:21:42 +01:00
)
if not isinstance ( sensor_ind , int ) :
raise AmdSmiParameterException ( sensor_ind , int )
if not isinstance ( cap , int ) :
raise AmdSmiParameterException ( cap , int )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_power_cap (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . c_uint32 ( sensor_ind ) , ctypes . c_uint64 ( cap )
2022-11-09 15:21:42 +01:00
)
)
2023-02-25 08:04:42 -05:00
def amdsmi_set_gpu_power_profile (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 15:21:42 +01:00
reserved : int ,
profile : AmdSmiPowerProfilePresetMasks ,
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:21:42 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 15:21:42 +01:00
)
if not isinstance ( reserved , int ) :
raise AmdSmiParameterException ( reserved , int )
if not isinstance ( profile , AmdSmiPowerProfilePresetMasks ) :
raise AmdSmiParameterException ( profile , AmdSmiPowerProfilePresetMasks )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_gpu_power_profile (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . c_uint32 ( reserved ) , profile
2022-11-09 15:21:42 +01:00
)
)
2025-10-06 14:50:00 -05:00
def amdsmi_get_energy_count ( processor_handle : processor_handle_t ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
2024-08-29 17:13:21 -05:00
energy_accumulator = ctypes . c_uint64 ( )
2022-11-10 16:18:27 +01:00
counter_resolution = ctypes . c_float ( )
timestamp = ctypes . c_uint64 ( )
_check_res (
2023-02-25 07:59:27 -05:00
amdsmi_wrapper . amdsmi_get_energy_count ( processor_handle , ctypes . byref (
2024-08-29 17:13:21 -05:00
energy_accumulator ) , ctypes . byref ( counter_resolution ) , ctypes . byref ( timestamp ) )
2022-11-10 16:18:27 +01:00
)
return {
2024-08-29 17:13:21 -05:00
' energy_accumulator ' : energy_accumulator . value ,
2022-11-10 16:18:27 +01:00
' counter_resolution ' : counter_resolution . value ,
' timestamp ' : timestamp . value ,
}
2023-02-26 20:57:22 -05:00
def amdsmi_set_gpu_clk_range (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 15:21:42 +01:00
min_clk_value : int ,
max_clk_value : int ,
2022-12-28 12:55:15 +01:00
clk_type : AmdSmiClkType ,
2022-11-09 15:21:42 +01:00
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:21:42 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 15:21:42 +01:00
)
if not isinstance ( min_clk_value , int ) :
raise AmdSmiParameterException ( min_clk_value , int )
if not isinstance ( max_clk_value , int ) :
raise AmdSmiParameterException ( min_clk_value , int )
2022-12-28 12:55:15 +01:00
if not isinstance ( clk_type , AmdSmiClkType ) :
raise AmdSmiParameterException ( clk_type , AmdSmiClkType )
2022-11-09 15:21:42 +01:00
_check_res (
2023-02-26 20:57:22 -05:00
amdsmi_wrapper . amdsmi_set_gpu_clk_range (
2023-02-25 05:28:40 -05:00
processor_handle ,
2022-11-09 15:21:42 +01:00
ctypes . c_uint64 ( min_clk_value ) ,
ctypes . c_uint64 ( max_clk_value ) ,
clk_type ,
)
)
2024-09-12 13:54:18 -05:00
def amdsmi_set_gpu_clk_limit (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-09-12 13:54:18 -05:00
clk_type : str ,
limit_type : str ,
value : int
) - > None :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-12-08 12:57:23 -06:00
if not isinstance ( clk_type , str ) :
raise AmdSmiParameterException ( clk_type , str )
if not isinstance ( limit_type , str ) :
raise AmdSmiParameterException ( limit_type , str )
2024-09-12 13:54:18 -05:00
if not isinstance ( value , int ) :
raise AmdSmiParameterException ( value , int )
if clk_type . lower ( ) == " sclk " :
clk_type_conversion = amdsmi_wrapper . AMDSMI_CLK_TYPE_SYS
elif clk_type . lower ( ) == " mclk " :
clk_type_conversion = amdsmi_wrapper . AMDSMI_CLK_TYPE_MEM
if limit_type . lower ( ) == " min " :
limit_type_conversion = amdsmi_wrapper . CLK_LIMIT_MIN
elif limit_type . lower ( ) == " max " :
limit_type_conversion = amdsmi_wrapper . CLK_LIMIT_MAX
_check_res (
amdsmi_wrapper . amdsmi_set_gpu_clk_limit (
processor_handle ,
2025-10-06 14:50:00 -05:00
clk_type_conversion ,
limit_type_conversion ,
2024-09-12 13:54:18 -05:00
ctypes . c_uint64 ( value ) ,
)
)
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_memory_total ( processor_handle : processor_handle_t , mem_type : AmdSmiMemoryType ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
if not isinstance ( mem_type , AmdSmiMemoryType ) :
raise AmdSmiParameterException (
mem_type , AmdSmiMemoryType
)
total = ctypes . c_uint64 ( )
_check_res (
2023-02-25 08:06:58 -05:00
amdsmi_wrapper . amdsmi_get_gpu_memory_total (
2023-02-25 05:28:40 -05:00
processor_handle , mem_type , ctypes . byref ( total ) )
2022-11-10 16:18:27 +01:00
)
return total . value
2023-02-27 04:08:29 -05:00
def amdsmi_set_gpu_od_clk_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 15:21:42 +01:00
level : AmdSmiFreqInd ,
value : int ,
2022-12-28 12:55:15 +01:00
clk_type : AmdSmiClkType ,
2022-11-09 15:21:42 +01:00
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:21:42 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 15:21:42 +01:00
)
if not isinstance ( level , AmdSmiFreqInd ) :
raise AmdSmiParameterException ( level , AmdSmiFreqInd )
if not isinstance ( value , int ) :
raise AmdSmiParameterException ( value , int )
2022-12-28 12:55:15 +01:00
if not isinstance ( clk_type , AmdSmiClkType ) :
raise AmdSmiParameterException ( clk_type , AmdSmiClkType )
2022-11-09 15:21:42 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_gpu_od_clk_info (
2023-02-25 05:28:40 -05:00
processor_handle , level , ctypes . c_uint64 ( value ) , clk_type
2022-11-09 15:21:42 +01:00
)
)
2025-10-06 14:50:00 -05:00
def amdsmi_get_gpu_memory_usage ( processor_handle : processor_handle_t , mem_type : AmdSmiMemoryType ) :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
if not isinstance ( mem_type , AmdSmiMemoryType ) :
raise AmdSmiParameterException (
mem_type , AmdSmiMemoryType
)
used = ctypes . c_uint64 ( )
_check_res (
2023-02-25 08:08:29 -05:00
amdsmi_wrapper . amdsmi_get_gpu_memory_usage (
2023-02-25 05:28:40 -05:00
processor_handle , mem_type , ctypes . byref ( used ) )
2022-11-10 16:18:27 +01:00
)
return used . value
2023-02-27 04:10:34 -05:00
def amdsmi_set_gpu_od_volt_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 15:21:42 +01:00
vpoint : int ,
clk_value : int ,
volt_value : int ,
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 15:21:42 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 15:21:42 +01:00
)
if not isinstance ( vpoint , int ) :
raise AmdSmiParameterException ( vpoint , int )
if not isinstance ( clk_value , int ) :
raise AmdSmiParameterException ( clk_value , int )
if not isinstance ( volt_value , int ) :
raise AmdSmiParameterException ( volt_value , int )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_set_gpu_od_volt_info (
2023-02-25 05:28:40 -05:00
processor_handle ,
2022-11-09 15:21:42 +01:00
ctypes . c_uint32 ( vpoint ) ,
ctypes . c_uint64 ( clk_value ) ,
ctypes . c_uint64 ( volt_value ) ,
)
)
2023-02-25 08:13:46 -05:00
def amdsmi_get_gpu_fan_rpms (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_idx : int
2022-11-09 17:32:55 +01:00
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
if not isinstance ( sensor_idx , int ) :
raise AmdSmiParameterException ( sensor_idx , int )
2022-12-28 10:19:27 +01:00
fan_speed = ctypes . c_int64 ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-02-25 08:13:46 -05:00
amdsmi_wrapper . amdsmi_get_gpu_fan_rpms (
2023-02-25 05:28:40 -05:00
processor_handle , sensor_idx , ctypes . byref ( fan_speed )
2022-11-09 17:32:55 +01:00
)
)
return fan_speed . value
2023-02-25 08:15:36 -05:00
def amdsmi_get_gpu_fan_speed (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_idx : int
2022-11-09 17:32:55 +01:00
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
if not isinstance ( sensor_idx , int ) :
raise AmdSmiParameterException ( sensor_idx , int )
2022-12-28 10:19:27 +01:00
fan_speed = ctypes . c_int64 ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-02-25 08:15:36 -05:00
amdsmi_wrapper . amdsmi_get_gpu_fan_speed (
2023-02-25 05:28:40 -05:00
processor_handle , sensor_idx , ctypes . byref ( fan_speed )
2022-11-09 17:32:55 +01:00
)
)
return fan_speed . value
2023-02-25 08:15:36 -05:00
def amdsmi_get_gpu_fan_speed_max (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_idx : int
2022-11-09 17:32:55 +01:00
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
if not isinstance ( sensor_idx , int ) :
raise AmdSmiParameterException ( sensor_idx , int )
2022-12-28 10:19:27 +01:00
fan_speed = ctypes . c_uint64 ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-02-25 08:15:36 -05:00
amdsmi_wrapper . amdsmi_get_gpu_fan_speed_max (
2023-02-25 05:28:40 -05:00
processor_handle , sensor_idx , ctypes . byref ( fan_speed )
2022-11-09 17:32:55 +01:00
)
)
return fan_speed . value
2025-11-13 21:51:31 -06:00
def amdsmi_get_node_handle ( processor_handle ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException ( processor_handle ,
amdsmi_wrapper . amdsmi_processor_handle
)
node_handle = amdsmi_wrapper . amdsmi_node_handle ( )
_check_res (
amdsmi_wrapper . amdsmi_get_node_handle ( processor_handle , ctypes . byref ( node_handle ) )
)
return node_handle
def amdsmi_get_npm_info ( node_handle : processor_handle_t ) - > Dict [ str , Any ] :
if not isinstance ( node_handle , amdsmi_wrapper . amdsmi_node_handle ) :
raise AmdSmiParameterException ( node_handle , amdsmi_wrapper . amdsmi_node_handle )
npm_info = amdsmi_wrapper . amdsmi_npm_info_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_npm_info (
node_handle , ctypes . byref ( npm_info )
)
)
dict_ret = {
" limit " : npm_info . limit ,
" status " : npm_info . status ,
}
return dict_ret
2023-02-25 08:23:10 -05:00
def amdsmi_get_temp_metric (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 17:32:55 +01:00
sensor_type : AmdSmiTemperatureType ,
metric : AmdSmiTemperatureMetric ,
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
if not isinstance ( sensor_type , AmdSmiTemperatureType ) :
raise AmdSmiParameterException ( sensor_type , AmdSmiTemperatureType )
if not isinstance ( metric , AmdSmiTemperatureMetric ) :
raise AmdSmiParameterException ( metric , AmdSmiTemperatureMetric )
2022-12-28 10:19:27 +01:00
temp_value = ctypes . c_int64 ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_temp_metric (
2023-02-25 05:28:40 -05:00
processor_handle , sensor_type , metric , ctypes . byref ( temp_value )
2022-11-09 17:32:55 +01:00
)
)
return temp_value . value
2023-02-25 08:25:12 -05:00
def amdsmi_get_gpu_volt_metric (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 17:32:55 +01:00
sensor_type : AmdSmiVoltageType ,
metric : AmdSmiVoltageMetric ,
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
if not isinstance ( sensor_type , AmdSmiVoltageType ) :
raise AmdSmiParameterException ( sensor_type , AmdSmiVoltageType )
if not isinstance ( metric , AmdSmiVoltageMetric ) :
raise AmdSmiParameterException ( metric , AmdSmiVoltageMetric )
2022-12-28 10:19:27 +01:00
voltage = ctypes . c_int64 ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_volt_metric (
2023-02-25 05:28:40 -05:00
processor_handle , sensor_type , metric , ctypes . byref ( voltage )
2022-11-09 17:32:55 +01:00
)
)
return voltage . value
2022-12-15 08:17:34 -06:00
def amdsmi_get_utilization_count (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-01-24 02:09:00 -06:00
counter_types : List [ AmdSmiUtilizationCounterType ]
2022-11-09 17:32:55 +01:00
) - > List [ Dict [ str , Any ] ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
2024-09-17 13:20:04 -05:00
# Enforce List typing
if not isinstance ( counter_types , list ) :
counter_types = [ counter_types ]
counter_types = list ( set ( counter_types ) )
# Validate Inputs
2024-01-24 02:09:00 -06:00
if len ( counter_types ) == 0 :
2022-11-09 17:32:55 +01:00
raise AmdSmiLibraryException ( amdsmi_wrapper . AMDSMI_STATUS_INVAL )
counters = [ ]
for counter_type in counter_types :
if not isinstance ( counter_type , AmdSmiUtilizationCounterType ) :
2022-11-22 15:21:21 +01:00
raise AmdSmiParameterException (
counter_type , AmdSmiUtilizationCounterType )
2022-11-09 17:32:55 +01:00
counter = amdsmi_wrapper . amdsmi_utilization_counter_t ( )
counter . type = counter_type
counters . append ( counter )
2022-12-28 10:19:27 +01:00
count = ctypes . c_uint32 ( len ( counters ) )
timestamp = ctypes . c_uint64 ( )
2024-01-24 02:09:00 -06:00
util_counter_list = ( amdsmi_wrapper . amdsmi_utilization_counter_t * len ( counters ) ) ( * counters )
2022-11-09 17:32:55 +01:00
_check_res (
2022-12-15 08:17:34 -06:00
amdsmi_wrapper . amdsmi_get_utilization_count (
2023-02-25 05:28:40 -05:00
processor_handle , util_counter_list , count , ctypes . byref ( timestamp )
2022-11-09 17:32:55 +01:00
)
)
if count . value != len ( counters ) :
raise AmdSmiLibraryException ( amdsmi_wrapper . AMDSMI_STATUS_API_FAILED )
2025-10-06 14:50:00 -05:00
result = [ ]
result . append ( { " timestamp " : timestamp . value } )
2024-01-30 20:15:11 -06:00
for index in range ( count . value ) :
2024-01-24 02:09:00 -06:00
counter_type = amdsmi_wrapper . amdsmi_utilization_counter_type_t__enumvalues [
2024-01-30 20:15:11 -06:00
util_counter_list [ index ] . type
2022-11-09 17:32:55 +01:00
]
2024-01-24 02:09:00 -06:00
if counter_type == " AMDSMI_UTILIZATION_COUNTER_FIRST " :
counter_type = " AMDSMI_COARSE_GRAIN_GPU_ACTIVITY "
2022-11-09 17:32:55 +01:00
if counter_type == " AMDSMI_UTILIZATION_COUNTER_LAST " :
2024-08-02 21:40:28 -05:00
counter_type = " AMDSMI_FINE_DECODER_ACTIVITY "
2022-11-22 15:21:21 +01:00
result . append (
2024-01-30 20:15:11 -06:00
{ " type " : counter_type , " value " : util_counter_list [ index ] . value } )
2022-11-09 17:32:55 +01:00
return result
2023-02-25 08:41:13 -05:00
def amdsmi_get_gpu_perf_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 17:32:55 +01:00
) - > str :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
perf = amdsmi_wrapper . amdsmi_dev_perf_level_t ( )
_check_res (
2023-02-25 08:41:13 -05:00
amdsmi_wrapper . amdsmi_get_gpu_perf_level (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( perf ) )
2022-11-09 17:32:55 +01:00
)
2023-08-28 18:42:12 -05:00
result = amdsmi_wrapper . amdsmi_dev_perf_level_t__enumvalues [ perf . value ]
2022-11-09 17:32:55 +01:00
if result == " AMDSMI_DEV_PERF_LEVEL_FIRST " :
result = " AMDSMI_DEV_PERF_LEVEL_AUTO "
if result == " AMDSMI_DEV_PERF_LEVEL_LAST " :
result = " AMDSMI_DEV_PERF_LEVEL_DETERMINISM "
return result
2023-02-25 08:43:44 -05:00
def amdsmi_set_gpu_perf_determinism_mode (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , clkvalue : int
2023-02-25 05:26:14 -05:00
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-02-25 05:26:14 -05:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-02-25 05:26:14 -05:00
)
if not isinstance ( clkvalue , int ) :
raise AmdSmiParameterException ( clkvalue , int )
2023-02-25 08:43:44 -05:00
_check_res ( amdsmi_wrapper . amdsmi_set_gpu_perf_determinism_mode (
2023-02-25 05:28:40 -05:00
processor_handle , clkvalue ) )
2023-02-25 05:26:14 -05:00
2023-02-27 04:05:11 -05:00
def amdsmi_get_gpu_overdrive_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 17:32:55 +01:00
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
2022-12-28 10:19:27 +01:00
od_level = ctypes . c_uint32 ( )
2022-11-09 17:32:55 +01:00
_check_res (
2023-02-27 04:05:11 -05:00
amdsmi_wrapper . amdsmi_get_gpu_overdrive_level (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( od_level )
2022-11-09 17:32:55 +01:00
)
)
return od_level . value
2024-08-29 10:21:06 -05:00
def amdsmi_get_gpu_mem_overdrive_level (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-08-29 10:21:06 -05:00
) - > int :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
mem_od_level = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_mem_overdrive_level (
processor_handle , ctypes . byref ( mem_od_level )
)
)
return mem_od_level . value
2023-02-26 20:54:22 -05:00
def amdsmi_get_clk_freq (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , clk_type : AmdSmiClkType
2022-11-09 17:32:55 +01:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
2022-12-28 12:55:15 +01:00
if not isinstance ( clk_type , AmdSmiClkType ) :
raise AmdSmiParameterException ( clk_type , AmdSmiClkType )
2022-11-09 17:32:55 +01:00
freq = amdsmi_wrapper . amdsmi_frequencies_t ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_clk_freq (
2023-02-25 05:28:40 -05:00
processor_handle , clk_type , ctypes . byref ( freq )
2022-11-09 17:32:55 +01:00
)
)
2024-12-20 09:59:15 -06:00
dict_ret = {
2022-11-09 17:32:55 +01:00
" num_supported " : freq . num_supported ,
" current " : freq . current ,
2024-08-15 10:34:47 -06:00
" frequency " : list ( freq . frequency ) [ : freq . num_supported ] ,
2022-11-09 17:32:55 +01:00
}
2024-12-20 09:59:15 -06:00
return dict_ret
2022-11-09 17:32:55 +01:00
2024-04-08 10:35:24 -05:00
2024-05-23 10:31:37 -05:00
def amdsmi_get_soc_pstate (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-02-22 08:38:54 -06:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
policy = amdsmi_wrapper . amdsmi_dpm_policy_t ( )
_check_res (
2024-05-23 10:31:37 -05:00
amdsmi_wrapper . amdsmi_get_soc_pstate (
2024-02-22 08:38:54 -06:00
processor_handle , ctypes . byref ( policy )
)
)
2024-03-05 14:01:06 -06:00
2024-02-22 08:38:54 -06:00
polices = [ ]
for i in range ( 0 , policy . num_supported ) :
2025-10-06 14:50:00 -05:00
policy_id = policy . policies [ i ] . policy_id
2024-02-22 08:38:54 -06:00
desc = policy . policies [ i ] . policy_description
polices . append ( {
2025-10-06 14:50:00 -05:00
' policy_id ' : policy_id ,
2024-02-22 08:38:54 -06:00
' policy_description ' : desc . decode ( )
} )
current_id = policy . policies [ policy . current ] . policy_id
return {
" num_supported " : policy . num_supported ,
" current_id " : current_id ,
" policies " : polices ,
}
2022-11-09 17:32:55 +01:00
2024-04-08 10:35:24 -05:00
2024-03-20 12:06:24 -05:00
def amdsmi_get_xgmi_plpd (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2024-03-20 12:06:24 -05:00
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
policy = amdsmi_wrapper . amdsmi_dpm_policy_t ( )
_check_res (
2025-10-06 14:50:00 -05:00
amdsmi_wrapper . amdsmi_get_xgmi_plpd ( processor_handle , ctypes . byref ( policy ) )
2024-03-20 12:06:24 -05:00
)
2025-10-06 14:50:00 -05:00
policies = [ ]
for i in range ( policy . num_supported ) :
try :
# Access the policy entry directly
policy_entry = policy . policies [ i ]
policy_id = policy_entry . policy_id
2024-03-20 12:06:24 -05:00
2025-10-06 14:50:00 -05:00
# Handle the policy description more carefully
policy_desc_bytes = policy_entry . policy_description
if policy_desc_bytes :
# Convert ctypes array to bytes and decode
policy_desc = ctypes . string_at ( policy_desc_bytes ) . decode ( ' utf-8 ' ) . rstrip ( ' \x00 ' )
else :
policy_desc = " "
policies . append ( {
' policy_id ' : policy_id ,
' policy_description ' : policy_desc
} )
except ( UnicodeDecodeError , AttributeError , ValueError ) :
# Fallback for problematic entries
policies . append ( {
' policy_id ' : 0 , # Default fallback
' policy_description ' : " "
} )
# Get current policy ID correctly
if policy . current < policy . num_supported :
current_id = policy . policies [ policy . current ] . policy_id
else :
current_id = 0 # Fallback
return {
2024-03-20 12:06:24 -05:00
" num_supported " : policy . num_supported ,
" current_id " : current_id ,
2025-10-06 14:50:00 -05:00
" plpds " : policies , # Marked for deprecation
" policies " : policies , # Correct field name
2024-03-20 12:06:24 -05:00
}
2024-04-08 10:35:24 -05:00
def amdsmi_get_gpu_process_isolation (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-04-08 10:35:24 -05:00
) - > int :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
pisolate = ctypes . c_uint32 ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_process_isolation (
processor_handle , ctypes . byref ( pisolate )
)
)
return pisolate . value
2023-02-27 04:06:56 -05:00
def amdsmi_get_gpu_od_volt_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 17:32:55 +01:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
freq_data = amdsmi_wrapper . amdsmi_od_volt_freq_data_t ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_od_volt_info (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( freq_data )
2022-11-09 17:32:55 +01:00
)
)
2025-08-06 11:20:09 -05:00
sclk_lower = freq_data . curr_sclk_range . lower_bound
sclk_upper = freq_data . curr_sclk_range . upper_bound
mclk_lower = freq_data . curr_mclk_range . lower_bound
mclk_upper = freq_data . curr_mclk_range . upper_bound
if sclk_lower == MaxUIntegerTypes . UINT64_T :
sclk_lower = " N/A "
if sclk_upper == MaxUIntegerTypes . UINT64_T :
sclk_upper = " N/A "
if mclk_lower == MaxUIntegerTypes . UINT64_T :
mclk_lower = " N/A "
if mclk_upper == MaxUIntegerTypes . UINT64_T :
mclk_upper = " N/A "
2022-11-09 17:32:55 +01:00
return {
" curr_sclk_range " : {
2025-08-06 11:20:09 -05:00
" lower_bound " : sclk_lower ,
2025-10-06 14:50:00 -05:00
" upper_bound " : sclk_upper
2022-11-09 17:32:55 +01:00
} ,
" curr_mclk_range " : {
2025-08-06 11:20:09 -05:00
" lower_bound " : mclk_lower ,
2025-10-06 14:50:00 -05:00
" upper_bound " : mclk_upper
2022-11-09 17:32:55 +01:00
} ,
" sclk_freq_limits " : {
" lower_bound " : freq_data . sclk_freq_limits . lower_bound ,
2025-10-06 14:50:00 -05:00
" upper_bound " : freq_data . sclk_freq_limits . upper_bound
2022-11-09 17:32:55 +01:00
} ,
" mclk_freq_limits " : {
" lower_bound " : freq_data . mclk_freq_limits . lower_bound ,
2025-10-06 14:50:00 -05:00
" upper_bound " : freq_data . mclk_freq_limits . upper_bound
2022-11-09 17:32:55 +01:00
} ,
2025-10-06 14:50:00 -05:00
" curve.vc_points " : [
{
" frequency " : freq_data . curve . vc_points [ 0 ] . frequency ,
" voltage " : freq_data . curve . vc_points [ 0 ] . voltage
} ,
{
" frequency " : freq_data . curve . vc_points [ 1 ] . frequency ,
" voltage " : freq_data . curve . vc_points [ 1 ] . voltage
} ,
{
" frequency " : freq_data . curve . vc_points [ 2 ] . frequency ,
" voltage " : freq_data . curve . vc_points [ 2 ] . voltage
}
] ,
" num_regions " : freq_data . num_regions
2022-11-09 17:32:55 +01:00
}
2023-02-27 01:01:17 -05:00
def amdsmi_get_gpu_metrics_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-09 17:32:55 +01:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
gpu_metrics = amdsmi_wrapper . amdsmi_gpu_metrics_t ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_metrics_info (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( gpu_metrics )
2022-11-09 17:32:55 +01:00
)
)
2024-01-24 05:19:20 -06:00
gpu_metrics_output = {
2025-08-06 16:03:06 -05:00
" common_header.structure_size " : _validate_if_max_uint ( gpu_metrics . common_header . structure_size , MaxUIntegerTypes . UINT16_T ) ,
" common_header.format_revision " : _validate_if_max_uint ( gpu_metrics . common_header . format_revision , MaxUIntegerTypes . UINT8_T ) ,
" common_header.content_revision " : _validate_if_max_uint ( gpu_metrics . common_header . content_revision , MaxUIntegerTypes . UINT8_T ) ,
2024-05-21 20:30:16 -05:00
" temperature_edge " : _validate_if_max_uint ( gpu_metrics . temperature_edge , MaxUIntegerTypes . UINT16_T ) ,
2025-10-20 14:43:40 -05:00
" temperature_hotspot " : _validate_if_max_uint ( gpu_metrics . temperature_hotspot , MaxUIntegerTypes . UINT16_T ) ,
" temperature_mem " : _validate_if_max_uint ( gpu_metrics . temperature_mem , MaxUIntegerTypes . UINT16_T ) ,
" temperature_vrgfx " : _validate_if_max_uint ( gpu_metrics . temperature_vrgfx , MaxUIntegerTypes . UINT16_T ) ,
" temperature_vrsoc " : _validate_if_max_uint ( gpu_metrics . temperature_vrsoc , MaxUIntegerTypes . UINT16_T ) ,
" temperature_vrmem " : _validate_if_max_uint ( gpu_metrics . temperature_vrmem , MaxUIntegerTypes . UINT16_T ) ,
" average_gfx_activity " : _validate_if_max_uint ( gpu_metrics . average_gfx_activity , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" average_umc_activity " : _validate_if_max_uint ( gpu_metrics . average_umc_activity , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" average_mm_activity " : _validate_if_max_uint ( gpu_metrics . average_mm_activity , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" average_socket_power " : _validate_if_max_uint ( gpu_metrics . average_socket_power , MaxUIntegerTypes . UINT16_T ) ,
" energy_accumulator " : _validate_if_max_uint ( gpu_metrics . energy_accumulator , MaxUIntegerTypes . UINT64_T ) ,
" system_clock_counter " : _validate_if_max_uint ( gpu_metrics . system_clock_counter , MaxUIntegerTypes . UINT64_T ) ,
" average_gfxclk_frequency " : _validate_if_max_uint ( gpu_metrics . average_gfxclk_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_socclk_frequency " : _validate_if_max_uint ( gpu_metrics . average_socclk_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_uclk_frequency " : _validate_if_max_uint ( gpu_metrics . average_uclk_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_vclk0_frequency " : _validate_if_max_uint ( gpu_metrics . average_vclk0_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_dclk0_frequency " : _validate_if_max_uint ( gpu_metrics . average_dclk0_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_vclk1_frequency " : _validate_if_max_uint ( gpu_metrics . average_vclk1_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_dclk1_frequency " : _validate_if_max_uint ( gpu_metrics . average_dclk1_frequency , MaxUIntegerTypes . UINT16_T ) ,
" current_gfxclk " : _validate_if_max_uint ( gpu_metrics . current_gfxclk , MaxUIntegerTypes . UINT16_T ) ,
" current_socclk " : _validate_if_max_uint ( gpu_metrics . current_socclk , MaxUIntegerTypes . UINT16_T ) ,
" current_uclk " : _validate_if_max_uint ( gpu_metrics . current_uclk , MaxUIntegerTypes . UINT16_T ) ,
" current_vclk0 " : _validate_if_max_uint ( gpu_metrics . current_vclk0 , MaxUIntegerTypes . UINT16_T ) ,
" current_dclk0 " : _validate_if_max_uint ( gpu_metrics . current_dclk0 , MaxUIntegerTypes . UINT16_T ) ,
" current_vclk1 " : _validate_if_max_uint ( gpu_metrics . current_vclk1 , MaxUIntegerTypes . UINT16_T ) ,
" current_dclk1 " : _validate_if_max_uint ( gpu_metrics . current_dclk1 , MaxUIntegerTypes . UINT16_T ) ,
" throttle_status " : _validate_if_max_uint ( gpu_metrics . throttle_status , MaxUIntegerTypes . UINT32_T , isBool = True ) ,
" current_fan_speed " : _validate_if_max_uint ( gpu_metrics . current_fan_speed , MaxUIntegerTypes . UINT16_T ) ,
" pcie_link_width " : _validate_if_max_uint ( gpu_metrics . pcie_link_width , MaxUIntegerTypes . UINT16_T ) ,
" pcie_link_speed " : _validate_if_max_uint ( gpu_metrics . pcie_link_speed , MaxUIntegerTypes . UINT16_T ) ,
" gfx_activity_acc " : _validate_if_max_uint ( gpu_metrics . gfx_activity_acc , MaxUIntegerTypes . UINT32_T ) ,
" mem_activity_acc " : _validate_if_max_uint ( gpu_metrics . mem_activity_acc , MaxUIntegerTypes . UINT32_T ) ,
" temperature_hbm " : _validate_if_max_uint ( list ( gpu_metrics . temperature_hbm ) , MaxUIntegerTypes . UINT16_T ) ,
" firmware_timestamp " : _validate_if_max_uint ( gpu_metrics . firmware_timestamp , MaxUIntegerTypes . UINT64_T ) ,
" voltage_soc " : _validate_if_max_uint ( gpu_metrics . voltage_soc , MaxUIntegerTypes . UINT16_T ) ,
" voltage_gfx " : _validate_if_max_uint ( gpu_metrics . voltage_gfx , MaxUIntegerTypes . UINT16_T ) ,
" voltage_mem " : _validate_if_max_uint ( gpu_metrics . voltage_mem , MaxUIntegerTypes . UINT16_T ) ,
" indep_throttle_status " : _validate_if_max_uint ( gpu_metrics . indep_throttle_status , MaxUIntegerTypes . UINT64_T , isBool = True ) ,
" current_socket_power " : _validate_if_max_uint ( gpu_metrics . current_socket_power , MaxUIntegerTypes . UINT16_T ) ,
" vcn_activity " : _validate_if_max_uint ( list ( gpu_metrics . vcn_activity ) , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" gfxclk_lock_status " : _validate_if_max_uint ( gpu_metrics . gfxclk_lock_status , MaxUIntegerTypes . UINT32_T ) ,
" xgmi_link_width " : _validate_if_max_uint ( gpu_metrics . xgmi_link_width , MaxUIntegerTypes . UINT16_T ) ,
" xgmi_link_speed " : _validate_if_max_uint ( gpu_metrics . xgmi_link_speed , MaxUIntegerTypes . UINT16_T ) ,
" pcie_bandwidth_acc " : _validate_if_max_uint ( gpu_metrics . pcie_bandwidth_acc , MaxUIntegerTypes . UINT64_T ) ,
" pcie_bandwidth_inst " : _validate_if_max_uint ( gpu_metrics . pcie_bandwidth_inst , MaxUIntegerTypes . UINT64_T ) ,
" pcie_l0_to_recov_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_l0_to_recov_count_acc , MaxUIntegerTypes . UINT64_T ) ,
" pcie_replay_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_replay_count_acc , MaxUIntegerTypes . UINT64_T ) ,
" pcie_replay_rover_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_replay_rover_count_acc , MaxUIntegerTypes . UINT64_T ) ,
" xgmi_read_data_acc " : _validate_if_max_uint ( list ( gpu_metrics . xgmi_read_data_acc ) , MaxUIntegerTypes . UINT64_T ) ,
" xgmi_write_data_acc " : _validate_if_max_uint ( list ( gpu_metrics . xgmi_write_data_acc ) , MaxUIntegerTypes . UINT64_T ) ,
" current_gfxclks " : _validate_if_max_uint ( list ( gpu_metrics . current_gfxclks ) , MaxUIntegerTypes . UINT16_T ) ,
" current_socclks " : _validate_if_max_uint ( list ( gpu_metrics . current_socclks ) , MaxUIntegerTypes . UINT16_T ) ,
" current_vclk0s " : _validate_if_max_uint ( list ( gpu_metrics . current_vclk0s ) , MaxUIntegerTypes . UINT16_T ) ,
" current_dclk0s " : _validate_if_max_uint ( list ( gpu_metrics . current_dclk0s ) , MaxUIntegerTypes . UINT16_T ) ,
" jpeg_activity " : _validate_if_max_uint ( list ( gpu_metrics . jpeg_activity ) , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" pcie_nak_sent_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_nak_sent_count_acc , MaxUIntegerTypes . UINT32_T ) ,
" pcie_nak_rcvd_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_nak_rcvd_count_acc , MaxUIntegerTypes . UINT32_T ) ,
" accumulation_counter " : _validate_if_max_uint ( gpu_metrics . accumulation_counter , MaxUIntegerTypes . UINT64_T ) ,
" prochot_residency_acc " : _validate_if_max_uint ( gpu_metrics . prochot_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" ppt_residency_acc " : _validate_if_max_uint ( gpu_metrics . ppt_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" socket_thm_residency_acc " : _validate_if_max_uint ( gpu_metrics . socket_thm_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" vr_thm_residency_acc " : _validate_if_max_uint ( gpu_metrics . vr_thm_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" hbm_thm_residency_acc " : _validate_if_max_uint ( gpu_metrics . hbm_thm_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" num_partition " : _validate_if_max_uint ( gpu_metrics . num_partition , MaxUIntegerTypes . UINT16_T ) ,
" xcp_stats.gfx_busy_inst " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.jpeg_busy " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.vcn_busy " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_busy_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_below_host_limit_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_below_host_limit_ppt_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_below_host_limit_thm_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_low_utilization_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_below_host_limit_total_acc " : list ( gpu_metrics . xcp_stats ) ,
" pcie_lc_perf_other_end_recovery " : _validate_if_max_uint ( gpu_metrics . pcie_lc_perf_other_end_recovery , MaxUIntegerTypes . UINT32_T ) ,
" vram_max_bandwidth " : _validate_if_max_uint ( gpu_metrics . vram_max_bandwidth , MaxUIntegerTypes . UINT64_T ) ,
" xgmi_link_status " : _validate_if_max_uint ( list ( gpu_metrics . xgmi_link_status ) , MaxUIntegerTypes . UINT16_T ) ,
}
# Create 2d array with each XCD's stats
if ' xcp_stats.gfx_busy_inst ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_busy_inst ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_busy_inst :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT32_T , isActivity = True ) )
gpu_metrics_output [ ' xcp_stats.gfx_busy_inst ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.jpeg_busy ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.jpeg_busy ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . jpeg_busy :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT16_T , isActivity = True ) )
gpu_metrics_output [ ' xcp_stats.jpeg_busy ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.vcn_busy ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.vcn_busy ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . vcn_busy :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT16_T , isActivity = True ) )
gpu_metrics_output [ " xcp_stats.vcn_busy " ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_busy_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_busy_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_busy_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ " xcp_stats.gfx_busy_acc " ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_below_host_limit_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_acc ' ] [ xcp_index ] = xcp_detail
# new for gpu metrics v1.8
if ' xcp_stats.gfx_below_host_limit_ppt_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_ppt_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_ppt_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_ppt_acc ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_below_host_limit_thm_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_thm_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_thm_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_thm_acc ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_low_utilization_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_low_utilization_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_low_utilization_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_low_utilization_acc ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_below_host_limit_total_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_total_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_total_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_total_acc ' ] [ xcp_index ] = xcp_detail
return gpu_metrics_output
def amdsmi_get_gpu_partition_metrics_info (
processor_handle : processor_handle_t ,
) - > Dict [ str , Any ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
gpu_metrics = amdsmi_wrapper . amdsmi_gpu_metrics_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_partition_metrics_info (
processor_handle , ctypes . byref ( gpu_metrics )
)
)
gpu_metrics_output = {
" common_header.structure_size " : _validate_if_max_uint ( gpu_metrics . common_header . structure_size , MaxUIntegerTypes . UINT16_T ) ,
" common_header.format_revision " : _validate_if_max_uint ( gpu_metrics . common_header . format_revision , MaxUIntegerTypes . UINT8_T ) ,
" common_header.content_revision " : _validate_if_max_uint ( gpu_metrics . common_header . content_revision , MaxUIntegerTypes . UINT8_T ) ,
" temperature_edge " : _validate_if_max_uint ( gpu_metrics . temperature_edge , MaxUIntegerTypes . UINT16_T ) ,
2024-05-21 20:30:16 -05:00
" temperature_hotspot " : _validate_if_max_uint ( gpu_metrics . temperature_hotspot , MaxUIntegerTypes . UINT16_T ) ,
" temperature_mem " : _validate_if_max_uint ( gpu_metrics . temperature_mem , MaxUIntegerTypes . UINT16_T ) ,
" temperature_vrgfx " : _validate_if_max_uint ( gpu_metrics . temperature_vrgfx , MaxUIntegerTypes . UINT16_T ) ,
" temperature_vrsoc " : _validate_if_max_uint ( gpu_metrics . temperature_vrsoc , MaxUIntegerTypes . UINT16_T ) ,
" temperature_vrmem " : _validate_if_max_uint ( gpu_metrics . temperature_vrmem , MaxUIntegerTypes . UINT16_T ) ,
" average_gfx_activity " : _validate_if_max_uint ( gpu_metrics . average_gfx_activity , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" average_umc_activity " : _validate_if_max_uint ( gpu_metrics . average_umc_activity , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" average_mm_activity " : _validate_if_max_uint ( gpu_metrics . average_mm_activity , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" average_socket_power " : _validate_if_max_uint ( gpu_metrics . average_socket_power , MaxUIntegerTypes . UINT16_T ) ,
" energy_accumulator " : _validate_if_max_uint ( gpu_metrics . energy_accumulator , MaxUIntegerTypes . UINT64_T ) ,
" system_clock_counter " : _validate_if_max_uint ( gpu_metrics . system_clock_counter , MaxUIntegerTypes . UINT64_T ) ,
" average_gfxclk_frequency " : _validate_if_max_uint ( gpu_metrics . average_gfxclk_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_socclk_frequency " : _validate_if_max_uint ( gpu_metrics . average_socclk_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_uclk_frequency " : _validate_if_max_uint ( gpu_metrics . average_uclk_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_vclk0_frequency " : _validate_if_max_uint ( gpu_metrics . average_vclk0_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_dclk0_frequency " : _validate_if_max_uint ( gpu_metrics . average_dclk0_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_vclk1_frequency " : _validate_if_max_uint ( gpu_metrics . average_vclk1_frequency , MaxUIntegerTypes . UINT16_T ) ,
" average_dclk1_frequency " : _validate_if_max_uint ( gpu_metrics . average_dclk1_frequency , MaxUIntegerTypes . UINT16_T ) ,
" current_gfxclk " : _validate_if_max_uint ( gpu_metrics . current_gfxclk , MaxUIntegerTypes . UINT16_T ) ,
" current_socclk " : _validate_if_max_uint ( gpu_metrics . current_socclk , MaxUIntegerTypes . UINT16_T ) ,
" current_uclk " : _validate_if_max_uint ( gpu_metrics . current_uclk , MaxUIntegerTypes . UINT16_T ) ,
" current_vclk0 " : _validate_if_max_uint ( gpu_metrics . current_vclk0 , MaxUIntegerTypes . UINT16_T ) ,
" current_dclk0 " : _validate_if_max_uint ( gpu_metrics . current_dclk0 , MaxUIntegerTypes . UINT16_T ) ,
" current_vclk1 " : _validate_if_max_uint ( gpu_metrics . current_vclk1 , MaxUIntegerTypes . UINT16_T ) ,
" current_dclk1 " : _validate_if_max_uint ( gpu_metrics . current_dclk1 , MaxUIntegerTypes . UINT16_T ) ,
" throttle_status " : _validate_if_max_uint ( gpu_metrics . throttle_status , MaxUIntegerTypes . UINT32_T , isBool = True ) ,
" current_fan_speed " : _validate_if_max_uint ( gpu_metrics . current_fan_speed , MaxUIntegerTypes . UINT16_T ) ,
" pcie_link_width " : _validate_if_max_uint ( gpu_metrics . pcie_link_width , MaxUIntegerTypes . UINT16_T ) ,
" pcie_link_speed " : _validate_if_max_uint ( gpu_metrics . pcie_link_speed , MaxUIntegerTypes . UINT16_T ) ,
" gfx_activity_acc " : _validate_if_max_uint ( gpu_metrics . gfx_activity_acc , MaxUIntegerTypes . UINT32_T ) ,
" mem_activity_acc " : _validate_if_max_uint ( gpu_metrics . mem_activity_acc , MaxUIntegerTypes . UINT32_T ) ,
" temperature_hbm " : _validate_if_max_uint ( list ( gpu_metrics . temperature_hbm ) , MaxUIntegerTypes . UINT16_T ) ,
" firmware_timestamp " : _validate_if_max_uint ( gpu_metrics . firmware_timestamp , MaxUIntegerTypes . UINT64_T ) ,
" voltage_soc " : _validate_if_max_uint ( gpu_metrics . voltage_soc , MaxUIntegerTypes . UINT16_T ) ,
" voltage_gfx " : _validate_if_max_uint ( gpu_metrics . voltage_gfx , MaxUIntegerTypes . UINT16_T ) ,
" voltage_mem " : _validate_if_max_uint ( gpu_metrics . voltage_mem , MaxUIntegerTypes . UINT16_T ) ,
" indep_throttle_status " : _validate_if_max_uint ( gpu_metrics . indep_throttle_status , MaxUIntegerTypes . UINT64_T , isBool = True ) ,
" current_socket_power " : _validate_if_max_uint ( gpu_metrics . current_socket_power , MaxUIntegerTypes . UINT16_T ) ,
" vcn_activity " : _validate_if_max_uint ( list ( gpu_metrics . vcn_activity ) , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" gfxclk_lock_status " : _validate_if_max_uint ( gpu_metrics . gfxclk_lock_status , MaxUIntegerTypes . UINT32_T ) ,
" xgmi_link_width " : _validate_if_max_uint ( gpu_metrics . xgmi_link_width , MaxUIntegerTypes . UINT16_T ) ,
" xgmi_link_speed " : _validate_if_max_uint ( gpu_metrics . xgmi_link_speed , MaxUIntegerTypes . UINT16_T ) ,
" pcie_bandwidth_acc " : _validate_if_max_uint ( gpu_metrics . pcie_bandwidth_acc , MaxUIntegerTypes . UINT64_T ) ,
" pcie_bandwidth_inst " : _validate_if_max_uint ( gpu_metrics . pcie_bandwidth_inst , MaxUIntegerTypes . UINT64_T ) ,
" pcie_l0_to_recov_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_l0_to_recov_count_acc , MaxUIntegerTypes . UINT64_T ) ,
" pcie_replay_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_replay_count_acc , MaxUIntegerTypes . UINT64_T ) ,
" pcie_replay_rover_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_replay_rover_count_acc , MaxUIntegerTypes . UINT64_T ) ,
" xgmi_read_data_acc " : _validate_if_max_uint ( list ( gpu_metrics . xgmi_read_data_acc ) , MaxUIntegerTypes . UINT64_T ) ,
" xgmi_write_data_acc " : _validate_if_max_uint ( list ( gpu_metrics . xgmi_write_data_acc ) , MaxUIntegerTypes . UINT64_T ) ,
" current_gfxclks " : _validate_if_max_uint ( list ( gpu_metrics . current_gfxclks ) , MaxUIntegerTypes . UINT16_T ) ,
" current_socclks " : _validate_if_max_uint ( list ( gpu_metrics . current_socclks ) , MaxUIntegerTypes . UINT16_T ) ,
" current_vclk0s " : _validate_if_max_uint ( list ( gpu_metrics . current_vclk0s ) , MaxUIntegerTypes . UINT16_T ) ,
" current_dclk0s " : _validate_if_max_uint ( list ( gpu_metrics . current_dclk0s ) , MaxUIntegerTypes . UINT16_T ) ,
" jpeg_activity " : _validate_if_max_uint ( list ( gpu_metrics . jpeg_activity ) , MaxUIntegerTypes . UINT16_T , isActivity = True ) ,
" pcie_nak_sent_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_nak_sent_count_acc , MaxUIntegerTypes . UINT32_T ) ,
" pcie_nak_rcvd_count_acc " : _validate_if_max_uint ( gpu_metrics . pcie_nak_rcvd_count_acc , MaxUIntegerTypes . UINT32_T ) ,
" accumulation_counter " : _validate_if_max_uint ( gpu_metrics . accumulation_counter , MaxUIntegerTypes . UINT64_T ) ,
" prochot_residency_acc " : _validate_if_max_uint ( gpu_metrics . prochot_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" ppt_residency_acc " : _validate_if_max_uint ( gpu_metrics . ppt_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" socket_thm_residency_acc " : _validate_if_max_uint ( gpu_metrics . socket_thm_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" vr_thm_residency_acc " : _validate_if_max_uint ( gpu_metrics . vr_thm_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" hbm_thm_residency_acc " : _validate_if_max_uint ( gpu_metrics . hbm_thm_residency_acc , MaxUIntegerTypes . UINT64_T ) ,
" num_partition " : _validate_if_max_uint ( gpu_metrics . num_partition , MaxUIntegerTypes . UINT16_T ) ,
" xcp_stats.gfx_busy_inst " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.jpeg_busy " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.vcn_busy " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_busy_acc " : list ( gpu_metrics . xcp_stats ) ,
2024-11-07 16:35:17 -06:00
" xcp_stats.gfx_below_host_limit_acc " : list ( gpu_metrics . xcp_stats ) ,
2025-03-19 10:24:02 -05:00
" xcp_stats.gfx_below_host_limit_ppt_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_below_host_limit_thm_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_low_utilization_acc " : list ( gpu_metrics . xcp_stats ) ,
" xcp_stats.gfx_below_host_limit_total_acc " : list ( gpu_metrics . xcp_stats ) ,
2024-05-21 20:30:16 -05:00
" pcie_lc_perf_other_end_recovery " : _validate_if_max_uint ( gpu_metrics . pcie_lc_perf_other_end_recovery , MaxUIntegerTypes . UINT32_T ) ,
2024-11-07 16:35:17 -06:00
" vram_max_bandwidth " : _validate_if_max_uint ( gpu_metrics . vram_max_bandwidth , MaxUIntegerTypes . UINT64_T ) ,
" xgmi_link_status " : _validate_if_max_uint ( list ( gpu_metrics . xgmi_link_status ) , MaxUIntegerTypes . UINT16_T ) ,
2022-11-09 17:32:55 +01:00
}
2024-05-21 20:30:16 -05:00
# Create 2d array with each XCD's stats
2025-01-08 20:18:24 -06:00
if ' xcp_stats.gfx_busy_inst ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_busy_inst ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_busy_inst :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT32_T , isActivity = True ) )
gpu_metrics_output [ ' xcp_stats.gfx_busy_inst ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.jpeg_busy ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.jpeg_busy ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . jpeg_busy :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT16_T , isActivity = True ) )
gpu_metrics_output [ ' xcp_stats.jpeg_busy ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.vcn_busy ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.vcn_busy ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . vcn_busy :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT16_T , isActivity = True ) )
gpu_metrics_output [ " xcp_stats.vcn_busy " ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_busy_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_busy_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_busy_acc :
2025-03-19 10:24:02 -05:00
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
2025-01-08 20:18:24 -06:00
gpu_metrics_output [ " xcp_stats.gfx_busy_acc " ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_below_host_limit_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_acc :
2025-03-19 10:24:02 -05:00
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
2025-01-08 20:18:24 -06:00
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_acc ' ] [ xcp_index ] = xcp_detail
2025-03-19 10:24:02 -05:00
# new for gpu metrics v1.8
if ' xcp_stats.gfx_below_host_limit_ppt_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_ppt_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_ppt_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_ppt_acc ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_below_host_limit_thm_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_thm_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_thm_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_thm_acc ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_low_utilization_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_low_utilization_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_low_utilization_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_low_utilization_acc ' ] [ xcp_index ] = xcp_detail
if ' xcp_stats.gfx_below_host_limit_total_acc ' in gpu_metrics_output :
for xcp_index , xcp_metrics in enumerate ( gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_total_acc ' ] ) :
xcp_detail = [ ]
for val in xcp_metrics . gfx_below_host_limit_total_acc :
xcp_detail . append ( _validate_if_max_uint ( val , MaxUIntegerTypes . UINT64_T ) )
gpu_metrics_output [ ' xcp_stats.gfx_below_host_limit_total_acc ' ] [ xcp_index ] = xcp_detail
2024-01-24 05:19:20 -06:00
return gpu_metrics_output
2022-11-09 17:32:55 +01:00
2023-02-27 04:12:11 -05:00
def amdsmi_get_gpu_od_volt_curve_regions (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , num_regions : int
2022-11-09 17:32:55 +01:00
) - > List [ Dict [ str , Any ] ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-09 17:32:55 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-09 17:32:55 +01:00
)
if not isinstance ( num_regions , int ) :
raise AmdSmiParameterException ( num_regions , int )
2022-12-28 10:19:27 +01:00
region_count = ctypes . c_uint32 ( num_regions )
2022-11-09 17:32:55 +01:00
buffer = ( amdsmi_wrapper . amdsmi_freq_volt_region_t * num_regions ) ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_od_volt_curve_regions (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( region_count ) , buffer
2022-11-09 17:32:55 +01:00
)
)
result = [ ]
2024-01-30 20:15:11 -06:00
for index in range ( region_count . value ) :
2022-11-09 17:32:55 +01:00
result . extend (
[
{
" freq_range " : {
2024-01-30 20:15:11 -06:00
" lower_bound " : buffer [ index ] . freq_range . lower_bound ,
" upper_bound " : buffer [ index ] . freq_range . upper_bound ,
2022-11-09 17:32:55 +01:00
} ,
" volt_range " : {
2024-01-30 20:15:11 -06:00
" lower_bound " : buffer [ index ] . volt_range . lower_bound ,
" upper_bound " : buffer [ index ] . volt_range . upper_bound ,
2022-11-09 17:32:55 +01:00
} ,
}
]
)
return result
2023-02-27 01:04:25 -05:00
def amdsmi_get_gpu_power_profile_presets (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , sensor_idx : int
2023-02-25 05:26:14 -05:00
) - > Dict [ str , Any ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2023-02-25 05:26:14 -05:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2023-02-25 05:26:14 -05:00
)
if not isinstance ( sensor_idx , int ) :
raise AmdSmiParameterException ( sensor_idx , int )
status = amdsmi_wrapper . amdsmi_power_profile_status_t ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_power_profile_presets (
2023-02-25 05:28:40 -05:00
processor_handle , sensor_idx , ctypes . byref ( status )
2023-02-25 05:26:14 -05:00
)
)
return {
" available_profiles " : status . available_profiles ,
" current " : status . current ,
" num_profiles " : status . num_profiles ,
}
2023-02-27 10:39:42 -05:00
def amdsmi_get_gpu_ecc_count (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , block : AmdSmiGpuBlock
2022-11-10 10:30:10 +01:00
) - > Dict [ str , int ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:30:10 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:30:10 +01:00
)
if not isinstance ( block , AmdSmiGpuBlock ) :
raise AmdSmiParameterException ( block , AmdSmiGpuBlock )
ec = amdsmi_wrapper . amdsmi_error_count_t ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_ecc_count (
2023-02-25 05:28:40 -05:00
processor_handle , block , ctypes . byref ( ec ) )
2022-11-10 10:30:10 +01:00
)
return {
" correctable_count " : ec . correctable_count ,
" uncorrectable_count " : ec . uncorrectable_count ,
2024-03-26 03:33:19 -05:00
" deferred_count " : ec . deferred_count ,
2022-11-10 10:30:10 +01:00
}
2023-02-27 10:39:42 -05:00
def amdsmi_get_gpu_ecc_enabled (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:30:10 +01:00
) - > int :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:30:10 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:30:10 +01:00
)
blocks = ctypes . c_uint64 ( 0 )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_ecc_enabled (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( blocks ) )
2022-11-10 10:30:10 +01:00
)
return blocks . value
2023-02-27 10:39:42 -05:00
def amdsmi_get_gpu_ecc_status (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t , block : AmdSmiGpuBlock
2022-11-10 10:30:10 +01:00
) - > AmdSmiRasErrState :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:30:10 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:30:10 +01:00
)
if not isinstance ( block , AmdSmiGpuBlock ) :
raise AmdSmiParameterException ( block , AmdSmiGpuBlock )
state = amdsmi_wrapper . amdsmi_ras_err_state_t ( )
_check_res (
2023-06-02 01:19:26 -05:00
amdsmi_wrapper . amdsmi_get_gpu_ecc_status (
2023-02-25 05:28:40 -05:00
processor_handle , block , ctypes . byref ( state )
2022-11-10 10:30:10 +01:00
)
)
return AmdSmiRasErrState ( state . value )
2025-06-11 17:19:02 -05:00
def amdsmi_status_code_to_string ( status : amdsmi_wrapper . amdsmi_status_t ) - > Union [ str , bytes , None ] :
2022-11-10 10:30:10 +01:00
if not isinstance ( status , amdsmi_wrapper . amdsmi_status_t ) :
raise AmdSmiParameterException ( status , amdsmi_wrapper . amdsmi_status_t )
2025-10-06 14:50:00 -05:00
status_string_p_p = ctypes . pointer ( ctypes . pointer ( ctypes . c_char ( ) ) )
2023-04-13 15:21:08 +02:00
2023-05-31 10:30:59 +02:00
_check_res ( amdsmi_wrapper . amdsmi_status_code_to_string (
2023-04-13 15:21:08 +02:00
status , status_string_p_p ) )
2022-11-10 10:30:10 +01:00
2023-04-13 15:21:08 +02:00
return amdsmi_wrapper . string_cast ( status_string_p_p . contents )
2022-11-10 10:30:10 +01:00
2023-02-27 01:35:46 -05:00
def amdsmi_get_gpu_compute_process_info ( ) - > List [ Dict [ str , int ] ] :
2022-11-10 10:30:10 +01:00
num_items = ctypes . c_uint32 ( 0 )
2025-06-11 17:19:02 -05:00
nullptr = POINTER ( amdsmi_wrapper . amdsmi_process_info_t ) ( )
2022-11-10 10:30:10 +01:00
_check_res (
2023-02-27 01:35:46 -05:00
amdsmi_wrapper . amdsmi_get_gpu_compute_process_info (
2022-11-22 15:21:21 +01:00
nullptr , ctypes . byref ( num_items ) )
2022-11-10 10:30:10 +01:00
)
procs = ( amdsmi_wrapper . amdsmi_process_info_t * num_items . value ) ( )
_check_res (
2023-02-27 01:35:46 -05:00
amdsmi_wrapper . amdsmi_get_gpu_compute_process_info (
2022-11-22 15:21:21 +01:00
procs , ctypes . byref ( num_items ) )
2022-11-10 10:30:10 +01:00
)
return [
{
" process_id " : proc . process_id ,
" vram_usage " : proc . vram_usage ,
" sdma_usage " : proc . sdma_usage ,
" cu_occupancy " : proc . cu_occupancy ,
2025-10-28 14:49:03 -05:00
" evicted_time " : proc . evicted_time ,
2022-11-10 10:30:10 +01:00
}
for proc in procs
]
2023-02-27 01:35:46 -05:00
def amdsmi_get_gpu_compute_process_info_by_pid ( pid : int ) - > Dict [ str , int ] :
2022-11-10 10:30:10 +01:00
if not isinstance ( pid , int ) :
raise AmdSmiParameterException ( pid , int )
proc = amdsmi_wrapper . amdsmi_process_info_t ( )
_check_res (
2023-02-27 01:35:46 -05:00
amdsmi_wrapper . amdsmi_get_gpu_compute_process_info_by_pid (
2022-11-10 10:30:10 +01:00
ctypes . c_uint32 ( pid ) , ctypes . byref ( proc )
)
)
return {
" process_id " : proc . process_id ,
" vram_usage " : proc . vram_usage ,
" sdma_usage " : proc . sdma_usage ,
" cu_occupancy " : proc . cu_occupancy ,
2025-10-28 14:49:03 -05:00
" evicted_time " : proc . evicted_time ,
2022-11-10 10:30:10 +01:00
}
2023-02-27 01:44:06 -05:00
def amdsmi_get_gpu_compute_process_gpus ( pid : int ) - > List [ int ] :
2022-11-10 10:30:10 +01:00
if not isinstance ( pid , int ) :
raise AmdSmiParameterException ( pid , int )
num_devices = ctypes . c_uint32 ( 0 )
2025-06-11 17:19:02 -05:00
nullptr = POINTER ( ctypes . c_uint32 ) ( )
2022-11-10 10:30:10 +01:00
_check_res (
2023-02-27 01:44:06 -05:00
amdsmi_wrapper . amdsmi_get_gpu_compute_process_gpus (
2022-11-10 10:30:10 +01:00
pid , nullptr , ctypes . byref ( num_devices )
)
)
dv_indices = ( ctypes . c_uint32 * num_devices . value ) ( )
_check_res (
2023-02-27 01:44:06 -05:00
amdsmi_wrapper . amdsmi_get_gpu_compute_process_gpus (
2022-11-10 10:30:10 +01:00
pid , dv_indices , ctypes . byref ( num_devices )
)
)
return [ dv_index . value for dv_index in dv_indices ]
2023-02-27 02:10:20 -05:00
def amdsmi_gpu_xgmi_error_status (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:30:10 +01:00
) - > AmdSmiXgmiStatus :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:30:10 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:30:10 +01:00
)
status = amdsmi_wrapper . amdsmi_xgmi_status_t ( )
_check_res (
2023-02-27 02:10:20 -05:00
amdsmi_wrapper . amdsmi_gpu_xgmi_error_status (
2023-02-25 05:28:40 -05:00
processor_handle , ctypes . byref ( status ) )
2022-11-10 10:30:10 +01:00
)
2025-10-06 14:50:00 -05:00
#return AmdSmiXgmiStatus(status.value).value
return AmdSmiXgmiStatus ( status . value )
2022-11-10 10:30:10 +01:00
2023-02-28 01:59:12 -05:00
def amdsmi_reset_gpu_xgmi_error (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 10:30:10 +01:00
) - > None :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 10:30:10 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 10:30:10 +01:00
)
2023-02-28 01:59:12 -05:00
_check_res ( amdsmi_wrapper . amdsmi_reset_gpu_xgmi_error ( processor_handle ) )
2022-11-10 16:18:27 +01:00
2023-02-25 08:11:53 -05:00
def amdsmi_get_gpu_memory_reserved_pages (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2022-11-10 16:18:27 +01:00
) - > Union [ list , str ] :
2023-02-25 05:28:40 -05:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
2022-11-10 16:18:27 +01:00
raise AmdSmiParameterException (
2023-02-25 05:28:40 -05:00
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
2022-11-10 16:18:27 +01:00
)
num_pages = ctypes . c_uint32 ( )
2025-06-11 17:19:02 -05:00
nullptr = POINTER ( amdsmi_wrapper . amdsmi_retired_page_record_t ) ( )
2022-11-10 16:18:27 +01:00
_check_res (
2023-02-25 08:11:53 -05:00
amdsmi_wrapper . amdsmi_get_gpu_memory_reserved_pages (
2024-04-26 02:54:25 -05:00
processor_handle , ctypes . byref ( num_pages ) , nullptr
2022-11-10 16:18:27 +01:00
)
)
if num_pages . value == 0 :
2024-04-26 02:54:25 -05:00
return [ ]
2022-11-10 16:18:27 +01:00
2025-10-06 14:50:00 -05:00
mem_reserved_pages = ( amdsmi_wrapper . amdsmi_retired_page_record_t * num_pages . value ) ( )
2024-04-26 02:54:25 -05:00
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_memory_reserved_pages (
processor_handle , ctypes . byref ( num_pages ) , mem_reserved_pages
)
)
return _format_bad_page_info ( mem_reserved_pages , num_pages )
2023-11-22 03:32:15 -06:00
def amdsmi_get_gpu_metrics_header_info (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2023-11-22 03:32:15 -06:00
) - > Dict [ str , int ] :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
header_info = amdsmi_wrapper . amd_metrics_table_header_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_metrics_header_info (
2024-09-11 23:26:30 -05:00
processor_handle , ctypes . byref ( header_info )
2023-11-22 03:32:15 -06:00
)
)
return {
2024-09-11 23:26:30 -05:00
" structure_size " : header_info . structure_size ,
" format_revision " : header_info . format_revision ,
" content_revision " : header_info . content_revision
2023-11-22 03:32:15 -06:00
}
2024-08-26 05:29:24 -04:00
2025-02-26 05:45:58 -06:00
2024-08-26 05:29:24 -04:00
def amdsmi_get_link_topology_nearest (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t ,
2024-08-26 05:29:24 -04:00
link_type : AmdSmiLinkType ,
) - > Dict [ str , Any ] :
2025-12-08 12:57:23 -06:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( link_type , AmdSmiLinkType ) :
raise AmdSmiParameterException ( link_type , AmdSmiLinkType )
2024-08-26 05:29:24 -04:00
topology_nearest_list = amdsmi_wrapper . amdsmi_topology_nearest_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_link_topology_nearest (
processor_handle ,
link_type ,
ctypes . byref ( topology_nearest_list )
)
)
device_list = [ ]
for index in range ( topology_nearest_list . count ) :
device_list . append ( topology_nearest_list . processor_list [ index ] )
return {
' processor_list ' : device_list
}
2025-01-30 19:12:03 -05:00
2025-04-01 16:57:23 -05:00
def amdsmi_get_gpu_virtualization_mode (
2025-10-06 14:50:00 -05:00
processor_handle : processor_handle_t
2025-01-31 18:34:01 -05:00
) - > Dict [ str , AmdSmiVirtualizationMode ] :
2025-01-30 19:12:03 -05:00
2025-12-08 12:57:23 -06:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-01-30 19:12:03 -05:00
# make info struct here
2025-01-31 18:34:01 -05:00
mode = amdsmi_wrapper . amdsmi_virtualization_mode_t ( )
2025-01-30 19:12:03 -05:00
# call lib function here
_check_res (
2025-01-31 18:34:01 -05:00
amdsmi_wrapper . amdsmi_get_gpu_virtualization_mode (
2025-01-30 19:12:03 -05:00
processor_handle ,
2025-01-31 18:34:01 -05:00
ctypes . byref ( mode )
2025-01-30 19:12:03 -05:00
)
)
return {
2025-01-31 18:34:01 -05:00
" mode " : AmdSmiVirtualizationMode ( mode . value )
2025-01-30 19:12:03 -05:00
}
2025-02-26 05:45:58 -06:00
2025-11-26 08:33:27 -06:00
def amdsmi_get_gpu_ptl_state (
processor_handle : processor_handle_t
) - > bool :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
is_ptl_enabled = ctypes . c_bool ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_ptl_state (
processor_handle , ctypes . byref ( is_ptl_enabled )
)
)
return is_ptl_enabled . value
def amdsmi_set_gpu_ptl_state (
processor_handle : processor_handle_t ,
state : int
) - > None :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res (
amdsmi_wrapper . amdsmi_set_gpu_ptl_state (
processor_handle , state
)
)
def amdsmi_get_gpu_ptl_formats (
processor_handle : processor_handle_t
2025-12-04 09:29:01 -06:00
) - > Tuple [ int , int ] :
2025-11-26 08:33:27 -06:00
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
data_format1 = amdsmi_wrapper . amdsmi_ptl_data_format_t ( )
data_format2 = amdsmi_wrapper . amdsmi_ptl_data_format_t ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_ptl_formats (
processor_handle , ctypes . byref ( data_format1 ) , ctypes . byref ( data_format2 )
)
)
return int ( data_format1 . value ) , int ( data_format2 . value )
def amdsmi_set_gpu_ptl_formats (
processor_handle : processor_handle_t ,
fmt1 : AmdSmiPtlData ,
fmt2 : AmdSmiPtlData ,
) - > None :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
for fmt in ( fmt1 , fmt2 ) :
if not isinstance ( fmt , AmdSmiPtlData ) :
raise AmdSmiParameterException ( fmt , AmdSmiPtlData )
if fmt is AmdSmiPtlData . INVALID :
raise AmdSmiParameterException ( fmt , " A valid PTL data format (not INVALID) " )
c_fmt1 = amdsmi_wrapper . amdsmi_ptl_data_format_t ( int ( fmt1 ) )
c_fmt2 = amdsmi_wrapper . amdsmi_ptl_data_format_t ( int ( fmt2 ) )
_check_res (
amdsmi_wrapper . amdsmi_set_gpu_ptl_formats (
processor_handle , c_fmt1 , c_fmt2 )
)
2025-02-26 05:45:58 -06:00
### Non C-Lib APIs ###
def amdsmi_get_rocm_version ( ) - > Tuple [ bool , str ] :
"""
Get the ROCm version for the rocm-core library.
This function attempts to retrieve the ROCm version by loading the `librocm-core.so` shared library
and calling its `getROCmVersion` function. The version is returned as a string in the format " major.minor.patch " .
Returns:
Tuple[bool, str]: A tuple containing a boolean and a string.
- The boolean indicates whether the operation was successful.
- The string contains the ROCm version if successful, or an error message if not.
Raises:
Exception: If there is an error loading the shared library or calling the function.
Example:
rocm_lib_status, version_message = amdsmi_get_rocm_version()
if rocm_lib_status:
print(f " ROCm version: {version_message} " )
else:
print(f " Error: {version_message} " )
"""
# librocm-core.so can be located in found using several different methods.
# Look for it with below priority:
# 1. ROCM_HOME/ROCM_PATH environment variables
# - ROCM_HOME/lib
# - ROCM_PATH/lib (usually set to /opt/rocm/)
# 2. Decided by the linker
# - LD_LIBRARY_PATH env var
# - defined path in /etc/ld.so.conf.d/
# 3. Relative to amdsmi_wrapper.py in /opt/rocm/share/amd_smi
# - parent directory
try :
possible_locations = list ( )
2025-10-29 18:10:38 -05:00
# 0. Relative to amdsmi_interface.py in TheRock:
# `amdsmi_interface.py` is located in
# `_rocm_sdk_core/share/amd_smi/amdsmi`, libraries are in
# `_rocm_sdk_core/lib`.
2025-11-06 03:57:31 -06:00
librocm_core_path = Path ( __file__ ) . resolve ( ) . parent . parent . parent . parent / " lib/librocm-core.so.1 "
2025-10-29 18:10:38 -05:00
possible_locations . append ( librocm_core_path )
2025-02-26 05:45:58 -06:00
# 1.
rocm_path = os . getenv ( " ROCM_HOME " , os . getenv ( " ROCM_PATH " ) )
if rocm_path :
possible_locations . append ( os . path . join ( rocm_path , " lib/librocm-core.so " ) )
# Check if /opt/rocm/lib/librocm-core.so exists and add it to the list
if os . path . exists ( " /opt/rocm/lib/librocm-core.so " ) :
possible_locations . append ( " /opt/rocm/lib/librocm-core.so " )
# 2.
possible_locations . append ( " librocm-core.so " )
# 3.
librocm_core_parent_dir = Path ( __file__ ) . resolve ( ) . parent . parent . parent / " lib " / " librocm-core.so "
possible_locations . append ( librocm_core_parent_dir )
for librocm_core_file_path in possible_locations :
try :
librocm_core = ctypes . CDLL ( librocm_core_file_path )
VerErrors = ctypes . c_uint32
get_rocm_core_version = librocm_core . getROCmVersion
get_rocm_core_version . restype = VerErrors
2025-06-11 17:19:02 -05:00
get_rocm_core_version . argtypes = [ POINTER ( ctypes . c_uint32 ) , POINTER ( ctypes . c_uint32 ) , POINTER ( ctypes . c_uint32 ) ]
2025-02-26 05:45:58 -06:00
# call the function
major = ctypes . c_uint32 ( )
minor = ctypes . c_uint32 ( )
patch = ctypes . c_uint32 ( )
if get_rocm_core_version ( ctypes . byref ( major ) , ctypes . byref ( minor ) , ctypes . byref ( patch ) ) == 0 :
return True , f " { major . value } . { minor . value } . { patch . value } "
else :
return False , " Failed to unpack ROCm version "
2025-10-06 14:50:00 -05:00
except OSError :
2025-02-26 05:45:58 -06:00
continue
# If we hit here, we were unable to find the librocm-core.so file
return False , " Could not find librocm-core.so "
except Exception as e :
return False , f " Unable to detect ROCm installation, Unknown Error: { e } "
2025-07-15 19:35:54 -05:00
2025-10-06 14:50:00 -05:00
def amdsmi_get_cpu_handles ( ) - > Dict [ str , Any ] :
cpu_handles = amdsmi_get_cpusocket_handles ( )
return { ' cpu_count ' : len ( cpu_handles ) , ' processor_handles ' : cpu_handles }
def amdsmi_get_esmi_err_msg ( status : AmdSmiStatus ) - > str :
if not isinstance ( status , AmdSmiStatus ) :
raise AmdSmiParameterException ( status , AmdSmiStatus )
# Create a pointer to a pointer to char (char**)
status_string_p_p = ctypes . pointer ( ctypes . pointer ( ctypes . c_char ( ) ) )
_check_res (
amdsmi_wrapper . amdsmi_get_esmi_err_msg (
status . value ,
status_string_p_p
)
)
# Use string_cast helper function if available in wrapper
if not status_string_p_p . contents :
pass
elif hasattr ( amdsmi_wrapper , ' string_cast ' ) :
error_msg = amdsmi_wrapper . string_cast ( status_string_p_p . contents )
if isinstance ( error_msg , str ) :
return error_msg
else :
# Manual string extraction
error_msg = ctypes . string_at ( status_string_p_p . contents ) . decode ( ' utf-8 ' )
return error_msg
return " Unknown error "
def amdsmi_get_gpu_event_notification (
timeout_ms : int = 1000
) - > Dict [ str , Any ] :
if not isinstance ( timeout_ms , int ) :
raise AmdSmiParameterException ( timeout_ms , int )
# Convert timeout to C type
timeout_ms_c = ctypes . c_int32 ( timeout_ms )
# Initialize output parameters
num_elem = ctypes . c_uint32 ( MAX_NUM_PROCESSES )
num_elem_p = ctypes . pointer ( num_elem )
# Create array for event notification data
data_array = ( amdsmi_wrapper . amdsmi_evt_notification_data_t * MAX_NUM_PROCESSES ) ( )
_check_res (
amdsmi_wrapper . amdsmi_get_gpu_event_notification (
timeout_ms_c , num_elem_p , data_array
)
)
results = [ ]
for i in range ( num_elem_p . contents . value ) :
entry = {
' processor_handle ' : data_array [ i ] . processor_handle ,
' event ' : data_array [ i ] . event ,
' message ' : data_array [ i ] . message . decode ( ' utf-8 ' ) if data_array [ i ] . message else " "
}
results . append ( entry )
result = {
' num_elem ' : num_elem_p . contents . value ,
' data ' : results
}
return result
def amdsmi_get_gpu_revision ( processor_handle : processor_handle_t ) - > str :
2025-07-15 19:35:54 -05:00
"""
Get the GPU revision for a given processor handle.
Parameters:
processor_handle (amdsmi_processor_handle): The processor handle for the GPU.
Returns:
str: The GPU revision as a string.
Raises:
AmdSmiParameterException: If the processor handle is invalid.
AmdSmiLibraryException: If the underlying library call fails.
"""
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
2025-10-06 14:50:00 -05:00
revision_16 = ctypes . c_uint16 ( )
_check_res ( amdsmi_wrapper . amdsmi_get_gpu_revision ( processor_handle , ctypes . byref ( revision_16 ) ) )
return _pad_hex_value ( hex ( revision_16 . value ) , 2 )
def amdsmi_get_processor_count_from_handles ( processor_handles_list ) - > Dict [ str , int ] :
if not isinstance ( processor_handles_list , list ) :
raise AmdSmiParameterException ( processor_handles_list , list )
# Convert Python list to C array
processor_count = len ( processor_handles_list )
processor_handles_array = ( amdsmi_wrapper . amdsmi_processor_handle * processor_count ) ( )
for i , handle in enumerate ( processor_handles_list ) :
processor_handles_array [ i ] = handle
processor_count_p = ctypes . pointer ( ctypes . c_uint32 ( processor_count ) )
nr_cpusockets = ctypes . pointer ( ctypes . c_uint32 ( 0 ) )
nr_cpucores = ctypes . pointer ( ctypes . c_uint32 ( 0 ) )
nr_gpus = ctypes . pointer ( ctypes . c_uint32 ( 0 ) )
_check_res (
amdsmi_wrapper . amdsmi_get_processor_count_from_handles (
processor_handles_array , processor_count_p , nr_cpusockets , nr_cpucores , nr_gpus
)
)
return {
' nr_cpusockets ' : nr_cpusockets . contents . value ,
' nr_cpucores ' : nr_cpucores . contents . value ,
' nr_gpus ' : nr_gpus . contents . value
}
def amdsmi_get_processor_handles_by_type ( socket_handle : socket_handle_t , processor_type : AmdSmiProcessorType ) :
if not isinstance ( socket_handle , amdsmi_wrapper . amdsmi_socket_handle ) :
raise AmdSmiParameterException ( socket_handle , amdsmi_wrapper . amdsmi_socket_handle )
if not isinstance ( processor_type , AmdSmiProcessorType ) :
raise AmdSmiParameterException ( processor_type , AmdSmiProcessorType )
processor_handles = ( amdsmi_wrapper . amdsmi_processor_handle * MAX_NUM_PROCESSES ) ( )
processor_count = ctypes . c_uint32 ( 0 )
ptr_processor_count = ctypes . pointer ( processor_count )
2025-07-15 19:35:54 -05:00
_check_res (
2025-10-06 14:50:00 -05:00
amdsmi_wrapper . amdsmi_get_processor_handles_by_type (
socket_handle , processor_type , processor_handles , ptr_processor_count
2025-07-15 19:35:54 -05:00
)
)
2025-10-06 14:50:00 -05:00
entry = [ ]
for i in range ( ptr_processor_count . contents . value ) :
entry . append ( processor_handles [ i ] )
return {
' processor_handles ' : entry ,
' processor_count ' : ptr_processor_count . contents . value
}
def amdsmi_gpu_validate_ras_eeprom ( processor_handle : processor_handle_t ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res ( amdsmi_wrapper . amdsmi_gpu_validate_ras_eeprom ( processor_handle ) )
def amdsmi_init_gpu_event_notification ( processor_handle : processor_handle_t ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res ( amdsmi_wrapper . amdsmi_init_gpu_event_notification ( processor_handle ) )
def amdsmi_set_gpu_event_notification_mask ( processor_handle : processor_handle_t , mask : int ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
if not isinstance ( mask , int ) :
raise AmdSmiParameterException ( mask , int )
mask_64 = ctypes . c_uint64 ( mask )
_check_res ( amdsmi_wrapper . amdsmi_set_gpu_event_notification_mask ( processor_handle , mask_64 ) )
def amdsmi_stop_gpu_event_notification (
processor_handle : processor_handle_t
) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException (
processor_handle , amdsmi_wrapper . amdsmi_processor_handle
)
_check_res ( amdsmi_wrapper . amdsmi_stop_gpu_event_notification ( processor_handle ) )
def amdsmi_get_gpu_busy_percent ( processor_handle : processor_handle_t ) :
if not isinstance ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle ) :
raise AmdSmiParameterException ( processor_handle , amdsmi_wrapper . amdsmi_processor_handle )
gpu_busy_percent = ctypes . c_uint32 ( 0 )
_check_res ( amdsmi_wrapper . amdsmi_get_gpu_busy_percent ( processor_handle , ctypes . byref ( gpu_busy_percent ) ) )
2025-12-08 12:57:23 -06:00
return gpu_busy_percent . value