Revert "Added AMD-SMI Linux Baremetal"

This reverts commit 013400bee7.

Reason for revert: Branch is still WIP

Change-Id: I75eec813b3d81049f033fe0a534251bd69eeca0e
This commit is contained in:
Maisam Arif
2023-01-19 11:41:42 -05:00
committed by Dmitrii Galantsev
parent ed8f865341
commit 6aa91da74c
11 changed files with 0 additions and 2023 deletions
-96
View File
@@ -1,96 +0,0 @@
import logging
import platform
import re
class BDF(object):
""" BDF Class to cast and compare BDF objects using built-in python comparators
Useful for validating a BDF string and converting it to a BDF object
This allows us to handle BDF objects in a pythonic way
Attributes:
__eq__: The equals comparator
__: An integer count of the eggs we have laid.
"""
def __init__(self, bdf):
"""Init a BDF object"""
if isinstance(bdf, BDF):
self.segment, self.bus, self.device, self.function = tuple(bdf)
else:
if bdf.startswith("BDF("):
bdf = bdf.replace('BDF(', '').replace(')', '')
# Tell if this is baremetal vs Virtualization
self.operating_system = platform.system()
try:
bdf_components = [int(x, 16) for x in re.split('[:.]', bdf)]
except ValueError as e:
logging.error(f"Invalid string passed: {bdf}")
raise e
self.segment = bdf_components[0] if len(bdf_components) == 4 else 0
self.bus, self.device, self.function = bdf_components[-3:]
if self.segment > 65535:
raise ValueError("BDF Segment can't be greater than 65535")
if self.bus > 255:
raise ValueError("BDF Bus can't be greater than 255")
if self.device > 31:
raise ValueError("BDF Device can't be greater than 31")
if self.function > 7:
raise ValueError("BDF Function can't be greater than 7")
def __eq__(self, passed_bdf):
"""Overrides the == operator and allows for BDF objects to be compared to BDF strings"""
# Only accept strings and BDF objects
if isinstance(passed_bdf, str):
if passed_bdf == '':
return False
passed_bdf = BDF(passed_bdf)
elif not isinstance(passed_bdf, BDF):
return False
if self.segment == passed_bdf.segment and \
self.bus == passed_bdf.bus and \
self.device == passed_bdf.device and \
self.function == passed_bdf.function:
return True
else:
return False
def __ne__(self, passed_bdf):
"""Overrides the != operator and allows for BDF objects to be compared to BDF strings"""
# Since we overrided the == operator we can use that to make this simple
return not self == passed_bdf
def __add__(self, passed_bdf):
"""Overrides the + operator and allows for concatenation"""
return str(self) + passed_bdf
def __radd__(self, passed_bdf):
"""Overrides the + operator and allows for concatenation"""
return passed_bdf + str(self)
def __str__(self):
"""Cast BDF object to a string"""
return "{:04X}:{:02X}:{:02X}:{}".format(self.segment, self.bus, self.device, self.function)
def __repr__(self):
"""How the BDF object is represented"""
return f"BDF({self})"
def __iter__(self):
"""Make the BDF object iterable over its 4 values"""
yield from (self.segment, self.bus, self.device, self.function)
def __contains__(self, passed_bdf):
"""Overrided the 'in' comparator in python"""
passed_bdf = str(BDF(passed_bdf))
bdf_regex = "(?:[0-6]?[0-9a-fA-F]{1,4}:)?[0-2]?[0-9a-fA-F]{1,2}:[0-9a-fA-F]{1,2}\.[0-7]"
for match in re.findall(bdf_regex, passed_bdf):
if self == match:
return True
return False
-1
View File
@@ -1 +0,0 @@
__version__ = "0.0.1"
-31
View File
@@ -1,31 +0,0 @@
#!/usr/bin/python3
# from amd_smi_init import *
from amd_smi_commands import AMD_SMI_Commands
from amd_smi_parser import AMD_SMI_Parser
# sudo /src/out/ubuntu-20.04/20.04/bin/rocm-smi -bc --json | python -m json.tool
if __name__ == "__main__":
amd_smi_commands = AMD_SMI_Commands()
amd_smi_parser = AMD_SMI_Parser(amd_smi_commands.version,
amd_smi_commands.discovery,
amd_smi_commands.static,
amd_smi_commands.firmware,
amd_smi_commands.bad_pages,
amd_smi_commands.metric,
amd_smi_commands.process,
amd_smi_commands.profile,
amd_smi_commands.event,
amd_smi_commands.topology,
amd_smi_commands.set_value,
amd_smi_commands.reset,
amd_smi_commands.misc,
amd_smi_commands.gpu_v)
args = amd_smi_parser.parse_args()
args.func(args) # This needs to be there to handle subparsers with no subcommands
# AMDSMI logger print out json, csv, or string
-91
View File
@@ -1,91 +0,0 @@
#!/usr/bin/python3
import platform
import signal
import subprocess
import sys
import time
import traceback
from pathlib import Path
# from amd_smi_init import *
from BDF import BDF
from _version import __version__
from amd_smi_logger import AMD_SMI_Logger
class AMD_SMI_Commands(object):
# def __init__(self, amd_smi_logger) -> None:
# logger = amd_smi_logger
# # Make an AMD-SMI-Object-Logger only with the commands object on init
# # Call the logger when we want to store a print:
# # self.logger.store_output(gpu_id, string) # store in ordered dict
# Every function prints the logger at the end
# logger.printoutput(args.json, args.csv) # Which in Logger handles and checks for json or csv
# Check if init can accept args given, if so then init can be used to call watch functions for looping
def version(self, args):
kernel_version = 123
print(f'AMD-SMI version: {__version__} | Kernel version: {kernel_version}')
def discovery(self, args):
print('discovery test')
def static(self, args):
#This is where the arg handling comes through
print(args.asic)
print(args.bus)
print(args.driver)
print('static test')
def firmware(self, args):
print('firmware test')
def bad_pages(self, args):
# Retired Pages
print('Bad Pages test')
def metric(self, args):
print('Metric test')
def process(self, args):
print('Process Test')
def profile(self, args):
print('Profile test')
def event(self, args):
print('event test')
def topology(self, args):
print('topology test')
def set_value(self, args):
print('set_value test')
def reset(self, args):
print('reset test')
def misc(self, args):
print('misc test')
def gpu_v(self, args):
print('misc test')
-151
View File
@@ -1,151 +0,0 @@
#!/usr/bin/python3
import argparse
import platform
import signal
import subprocess
import sys
import time
import traceback
import logging
from pathlib import Path
from BDF import BDF
from amd_smi_init import *
class AMD_SMI_Helpers(object):
def __init__(self) -> None:
# implement basic config for debug logs
self.operating_system = platform.system()
self._is_hypervisor = False
self._is_virtual_os = False
self._is_baremetal = False
self._is_linux = False
self._is_windows = False
self.virtual_operating_systems_product_names = ['KVM', 'VirtualBox', 'VMware'] #@TODO get KVM example
if self.operating_system.startswith('Linux'):
self._is_linux = True
# logging.debug(f'whatever:{self._is_linux}')
# KVM hypervisor check @TODO
product_name = ''
product_name_path = Path('/sys/class/dmi/id/product_name')
if product_name_path.exists():
product_name = product_name_path.read_text().strip()
if product_name == '':
# Unable to determine product_name default to baremetal
self._is_baremetal = True
else:
for vm_os in self.virtual_operating_systems_product_names:
if product_name.startswith(vm_os):
# Log that this is a virtual OS
self._is_virtual_os = True
break
# The current way I determine if a system is baremetal by deduction of the other two arguments
self._is_baremetal = not self._is_hypervisor and not self._is_virtual_os
if self.operating_system.startswith('VMkernel'):
self._is_hypervisor = True
if self.operating_system.startswith('Window'):
# Check Powershell for Hyper-V enablement
self._is_windows = True
# Get-CimInstance -ClassName Win32_ComputerSystem Manufacturer
# if self.product_name == '' and not self._is_hypervisor:
# self._is_virtual_os = any(self.product_name.startswith(virtual_os) for virtual_os in self.virtual_operating_systems)
# self.operating_system = ''
def os_info(self):
# Return OS info
# operating_system =
# if sys.platform.startswith('win'):
# elif sys.platform.startswith('linux'):
return True
def is_virtual_os(self):
return self._is_virtual_os
def is_hypervisor(self):
# Returns True if hypervisor is enabled on the system
return self._is_hypervisor
def is_baremetal(self):
# Returns True if system is baremetal, if system is hypervisor this should return False
return self._is_baremetal
def is_linux(self):
return self._is_linux
def is_windows(self):
return self._is_windows
def get_gpu_choices(self):
# Return in format {gpu_index : (BDF, UUID)}
gpu_choices = {}
gpu_index = '1'
gpu_bdf = BDF('0000:23:00.0')
gpu_uuid = '1234'
gpu_choices[gpu_index] = (gpu_bdf, gpu_uuid)
return gpu_choices
def get_devices(self):
pass
def get_device_from_socket(self):
pass
def get_amd_gpu_bdfs(self):
pass
def get_amd_cpu_bdfs(self):
pass
# def getBus(device):
# """ Return the bus identifier of a given device
# @param device: DRM device identifier
# """
# bdfid = c_uint64(0)
# ret = rocmsmi.rsmi_dev_pci_id_get(device, byref(bdfid))
# # BDFID = ((DOMAIN & 0xffffffff) << 32) | ((BUS & 0xff) << 8) |((DEVICE & 0x1f) <<3 ) | (FUNCTION & 0x7)
# domain = (bdfid.value >> 32) & 0xffffffff
# bus = (bdfid.value >> 8) & 0xff
# device = (bdfid.value >> 3) & 0x1f
# function = bdfid.value & 0x7
# pic_id = '{:04X}:{:02X}:{:02X}.{:0X}'.format(domain, bus, device, function)
# if rsmi_ret_ok(ret, device):
# return pic_id
-71
View File
@@ -1,71 +0,0 @@
#!/usr/bin/python3
### Handle init singularly
# Python imports module does not re-execute code on import
import atexit
import logging
import signal
import sys
from pathlib import Path
# Handle bindings for windows, Hyper-v and KVM seperately
from amdsmiBindings import *
# Using basic python logging for user errors and development
# logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) # Logging for Development
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.ERROR) # User level logging
# On initial import set initialized variable
amd_smi_initialized = False
def check_return(return_code, error_statment): #@TODO would raising an exception be better?
if return_code != amdsmi_status.AMDSMI_STATUS_SUCCESS:
logging.error(error_statment)
sys.exit(return_code)
def check_amdgpu_driver(): #@TODO Handle KVM logic
""" Returns true if amdgpu is found in the list of initialized modules """
amd_gpu_status_file = Path("/sys/module/amdgpu/initstate")
if amd_gpu_status_file.exists():
if amd_gpu_status_file.read_text().strip() == 'live':
return True
return False
def init_amd_smi(flag=amdsmi_init_flags.AMD_SMI_INIT_AMD_GPUS):
""" Initializes AMD-SMI """
# Check if amdgpu driver is up
if check_amdgpu_driver():
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
init_status = amdsmi.amdsmi_init(flag)
check_return(return_code=init_status, error_statment=f'AMD SMI initialization returned {init_status} (the expected value is {amdsmi_status_t.AMDSMI_STATUS_SUCCESS})')
logging.info('amd-smi initialized successfully')
else:
logging.error('Driver not initialized (amdgpu not found in modules)')
exit(-1)
def amdsmi_shut_down():
""" Shutdown AMD-SMI """
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
shut_down_status = amdsmi.amdsmi_shut_down()
check_return(return_code=shut_down_status, error_statment=f'AMD SMI Shutdown code returned {shut_down_status} (the expected value is {amdsmi_status_t.AMDSMI_STATUS_SUCCESS})')
logging.debug('amd-smi shutdown successfully')
def signal_handler(sig, frame):
logging.debug(f'Handling signal: {sig}')
sys.exit(0)
if not amd_smi_initialized:
init_amd_smi()
amd_smi_initialized = True
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
atexit.register(amdsmi_shut_down)
-46
View File
@@ -1,46 +0,0 @@
#!/usr/bin/python3
# import orderedDict
import json
class AMD_SMI_Logger(object):
def __init__(self) -> None:
# self.card = {}
# backwards compatability
pass
def store_output(self, target_device, log):
pass
def print_output(self, format=''):
# JSON, CSV, text
# split into 3 different formats
# for elem in self.cards:
# print pretty
pass
def print_json(self):
# json_data = '[{"ID":10,"Name":"Pankaj","Role":"CEO"},' \
# '{"ID":20,"Name":"David Lee","Role":"Editor"}]'
# json_object = json.loads(json_data)
# json_formatted_str = json.dumps(json_object, indent=2)
# print(json_formatted_str)
pass
def print_csv(self):
# # Opening JSON file and loading the data
# # into the variable data
# with open('data.json') as json_file:
# data = json.load(json_file)
# employee_data = data['emp_details']
# # now we will open a file for writing
# data_file = open('data_file.csv', 'w')
# # create the csv writer object
# csv_writer = csv.writer(data_file)
pass
-97
View File
@@ -1,97 +0,0 @@
#!/usr/bin/python3
import argparse
import platform
import signal
import subprocess
import sys
import time
import traceback
from pathlib import Path
import BDF
from amd_smi_init import *
class AMD_SMI_Modules(object):
def __init__(self) -> None:
pass
def get_socket_handles(self):
### Returns tuple of (int, list of ctypes: socket_handles)
socket_count = c_uint32(0)
return_code = amdsmi.amdsmi_get_socket_handles(byref(socket_count), None)
check_return(return_code=return_code, error_statment="Invalid get_socket_handles request")
sockets = [0] * socket_count.value # 1
socket_handles = (c_void_p * socket_count.value)(*sockets) # That is a pointer, not a multiplication
return_code = amdsmi.amdsmi_get_socket_handles(byref(socket_count), socket_handles)
check_return(return_code=return_code, error_statment=f"Invalid get_socket_handles with {socket_count.value} sockets")
return (socket_count.value, socket_handles)
def get_device_handles(self, socket_handle):
"""Gets the Device Handles that are in the current socket"""
### Returns tuple of (int, list of ctypes: device_handles)
device_count = c_uint32(0)
return_code = amdsmi.amdsmi_get_device_handles(socket_handle, byref(device_count), None)
check_return(return_code=return_code, error_statment="Invalid get_device_handles request")
devices = [0] * device_count.value
device_handles = (c_void_p * len(devices))(*devices)
return_code = amdsmi.amdsmi_get_device_handles(socket_handle, byref(device_count), byref(device_handles))
check_return(return_code=return_code, error_statment=f"Invalid get_device_handles with {device_count.value} devices")
return (device_count.value, device_handles)
def get_socket_info(self, socket_handle):
""" Given a socket_handle, return the socket_info, which is just a BDF object"""
socket_info = create_string_buffer(128) # createstringbuffer or something??? c_char_p
return_code = amdsmi.amdsmi_get_socket_info(socket_handle, byref(socket_info), c_size_t(128))
check_return(return_code=return_code, error_statment="Invalid get_socket_info request")
socket_bdf = BDF.BDF(socket_info.value.decode())
return(socket_bdf)
def get_device_type(self, device_handle, format=True):
# format: True for string; False for int
# Returns device_type string for the given device_handle
dev_type = c_int(0)
return_code = amdsmi.amdsmi_get_device_type(device_handle, byref(dev_type))
check_return(return_code=return_code, error_statment="Invalid get_device_type request")
if format == True: # Return string
return device_type__enumvalues[dev_type.value]
return dev_type.value # Return int
def get_device_bdf(self, device_handle):
# class amdsmi_bdf_t (Union):
# _fields_ = [
# ('bdf_submodule', bdf_submodule),
# ('as_uint', c_uint64)
# ]
bdf = amdsmi_bdf_t()
# bdf.bdf_submodule
return_code = amdsmi.amdsmi_get_device_bdf(device_handle, bdf)
check_return(return_code=return_code, error_statment="Invalid amdsmi_get_device_bdf request")
return (bdf)
def get_device_handle_from_bdf(self, bdf):
pass
def get_fan_speed(self, bdf):
pass
def show_retired_pages(self):
# num_pages = c_uint32()
# records = rsmi_retired_page_record_t()
pass
-619
View File
@@ -1,619 +0,0 @@
#!/usr/bin/python3
import argparse
import platform
from _version import __version__
from amd_smi_helpers import AMD_SMI_Helpers
# sudo /src/out/ubuntu-20.04/20.04/bin/rocm-smi -bc --json | python -m json.tool
class AMD_SMI_Parser(argparse.ArgumentParser):
def __init__(self, version, discovery, static, firmware, bad_pages, metric,
process, profile, event,topology, set_value, reset, misc, gpu_v):
# Helper variables
self.amd_smi_helpers = AMD_SMI_Helpers()
self.gpu_choices = self.amd_smi_helpers.get_gpu_choices()
self.vf_choices = ['3','2','1']
# Adjust argument parser options
super().__init__(
formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90),
# formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description=f'AMD System Management Interface | Version: {__version__}', #@TODO add the enviornment
add_help=True,
prog='amd-smi')
# Setup subparsers
subparsers = self.add_subparsers(
title="AMD-SMI Commands",
parser_class=argparse.ArgumentParser,
required=True,
help='Descriptions:',
# dest='cmd',
metavar="")
# Add all subparsers
# Add --json, --csv,--file,--loglevel, watch, watch_time, & iterations && backwards compatability --gpuvsmi --rocmsmi
self.add_version_parser(subparsers, version)
self.add_discovery_parser(subparsers, discovery)
self.add_static_parser(subparsers, static)
self.add_firmware_parser(subparsers, firmware)
self.add_bad_pages_parser(subparsers, bad_pages)
self.add_metric_parser(subparsers, metric)
self.add_process_parser(subparsers, process)
self.add_profile_parser(subparsers, profile)
self.add_event_parser(subparsers, event)
self.add_topology_parser(subparsers, topology)
# self.add_set_value_parser(subparsers, set_value)
self.add_reset_parser(subparsers, reset)
self.add_misc_parser(subparsers, misc)
self.add_gpu_v_parser(subparsers, misc)
def add_version_parser(self, subparsers, func):
# Subparser help text
version_help = "Display version information"
# Create version subparser
version_parser = subparsers.add_parser('version', help=version_help, description=None)
version_parser._optionals.title = None
version_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
version_parser.set_defaults(func=func)
def add_discovery_parser(self, subparsers, func):
# Subparser help text
discovery_help = "Display discovery information"
discovery_subcommand_help = """Lists all the devices on the system and the links between devices.
Lists all the sockets and for each socket, GPUs and/or CPUs associated to
that socket alongside some basic information for each device.
In virtualization environment, it can also list VFs associated to each
GPU with some basic information for each VF."""
# Create discovery subparser
discovery_parser = subparsers.add_parser('discovery', help=discovery_help, description=discovery_subcommand_help)
discovery_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
discovery_parser.set_defaults(func=func)
def add_static_parser(self, subparsers, func):
# Subparser help text
static_help = "Gets static information about the specified GPU"
static_subcommand_help = """If no argument is provided, return static information for all GPUs on the system.
If no static argument is specified all static information will be displayed."""
static_optionals_title = "Static Arguments"
# Optional arguments help text
gpu_help = "Select a GPU from the possible choices"
vf_help = """Gets general information about the specified VF (timeslice, fb info, …).
Available only on virtualization OSs"""
asic_help = "All asic information"
bus_help = "All bus information"
vbios_help = "All video bios information (if available)"
limit_help = "All limit metric values (i.e. power and thermal limits)"
driver_help = "Displays driver version"
caps_help = "All caps information"
# Options arguments help text for Hypervisors and Baremetal
ras_help = "Displays RAS features information"
board_help = "All board information" # Linux Baremetal only @TODO is applicable to Azure
# Options arguments help text for Hypervisors
dfc_help = "All DFC FW table information"
fb_help = "Displays Frame Buffer information"
num_vf_help = "Displays number of supported and enabled VFs"
# Create static subparser
static_parser = subparsers.add_parser('static', help=static_help, description=static_subcommand_help)
static_parser._optionals.title = static_optionals_title
static_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
static_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = static_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
static_parser.add_argument('-a', '--asic', action='store_true', required=False, help=asic_help)
static_parser.add_argument('-b', '--bus', action='store_true', required=False, help=bus_help)
static_parser.add_argument('-v', '--vbios', action='store_true', required=False, help=vbios_help)
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
static_parser.add_argument('-d', '--driver', action='store_true', required=False, help=driver_help)
static_parser.add_argument('-c', '--caps', action='store_true', required=False, help=caps_help)
# Options to display on Hypervisors and Baremetal
if self.amd_smi_helpers.is_hypervisor() or self.amd_smi_helpers.is_baremetal():
static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help)
if self.amd_smi_helpers.is_linux(): #@TODO Check if applicable to Azure
static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help)
# Options to only display on a Hypervisor
if self.amd_smi_helpers.is_hypervisor():
device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices)
static_parser.add_argument('-du', '--dfc-ucode', action='store_true', required=False, help=dfc_help)
static_parser.add_argument('-f', '--fb-info', action='store_true', required=False, help=fb_help)
static_parser.add_argument('-n', '--num-vf', action='store_true', required=False, help=num_vf_help)
def add_firmware_parser(self, subparsers, func):
# Subparser help text
firmware_help = "Gets firmware information about the specified GPU"
firmware_subcommand_help = "If no argument is provided, return firmware information for all GPUs on the system."
firmware_optionals_title = "Firmware Arguments"
# Optional arguments help text
gpu_help = "Select a GPU from the possible choices"
vf_help = """Gets general information about the specified VF (timeslice, fb info, …).
Available only on virtualization OSs"""
fw_list_help = "All FW list information"
err_records_help = "All error records information"
# Create firmware subparser
firmware_parser = subparsers.add_parser('firmware', help=firmware_help, description=firmware_subcommand_help)
firmware_parser._optionals.title = firmware_optionals_title
firmware_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
firmware_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = firmware_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
firmware_parser.add_argument('-f', '--fw-list', action='store_true', required=False, help=fw_list_help) # Redundant?
# Options to only display on a Hypervisor
if self.amd_smi_helpers.is_hypervisor():
device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices)
firmware_parser.add_argument('-e', '--error-records', action='store_true', required=False, help=err_records_help)
def add_bad_pages_parser(self, subparsers, func): #@TODO Retired pages?
if not (self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
# The bad_pages subcommand is only applicable to Linux Baremetal systems
return
# Subparser help text
bad_pages_help = "Gets bad page information about the specified GPU"
bad_pages_subcommand_help = "If no argument is provided, return bad page information for all GPUs on the system."
bad_pages_optionals_title = "Bad pages Arguments"
# Optional arguments help text
gpu_help = "Select a GPU from the possible choices"
pending_help = "Displays all pending retired pages"
retired_help = "Displays retired pages" #@TODO Wording
un_res_help = "Displays unreservable pages"
# Create bad_pages subparser
bad_pages_parser = subparsers.add_parser('bad_pages', help=bad_pages_help, description=bad_pages_subcommand_help)
bad_pages_parser._optionals.title = bad_pages_optionals_title
bad_pages_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
bad_pages_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = bad_pages_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
bad_pages_parser.add_argument('-p', '--pending', action='store_true', required=False, help=pending_help)
bad_pages_parser.add_argument('-r', '--retired', action='store_true', required=False, help=retired_help)
bad_pages_parser.add_argument('-u', '--un-res', action='store_true', required=False, help=un_res_help)
def add_metric_parser(self, subparsers, func):
# Subparser help text
metric_help = "Gets metric/performance information about the specified GPU"
metric_subcommand_help = """If no argument is provided, return metric information for all GPUs on the system.
If no metric argument is specified all metric information will be displayed."""
metric_optionals_title = "Metric arguments"
# Optional arguments help text
gpu_help = "Select a GPU from the possible choices"
vf_help = """Gets general information about the specified VF (timeslice, fb info, …).
Available only on virtualization OSs"""
usage_help = "All metrics usage information"
# Help text for Arguments only Available on Virtual OS and Baremetal platforms
fb_usage_help = "Total and used framebuffer"
# Help text for Arguments only on Hypervisor and Baremetal platforms
power_help = "Current power usage"
clock_help = "Average, max, and current clock frequencies"
temperature_help = "Current temperatures"
ecc_help = "Number of ECC errors"
pcie_help = "Current PCIe speed and width"
voltage_help = "Current GPU voltages"
# Help text for Arguments only on Linux Baremetal platforms
fan_help = "Current fan speed"
pcie_usage_help = "Estimated PCIe link usage"
vc_help = "Display voltage curve"
overdrive_help = "Current GPU clock overdrive level"
mo_help = "Current memory clock overdrive level"
perf_level_help = "Current DPM performance level"
replay_count_help = "PCIe replay count"
xgmi_err_help = "XGMI error information since last read"
energy_help = "Amount of energy consumed" #@TODO ? Available only on host Linux Baremetal platforms
# Help text for Arguments only on Hypervisors
schedule_help = "All scheduling information"
guard_help = "All guard information"
guest_help = "All guest data information"
# Create metric subparser
metric_parser = subparsers.add_parser('metric', help=metric_help, description=metric_subcommand_help)
metric_parser._optionals.title = metric_optionals_title
metric_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
metric_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = metric_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
metric_parser.add_argument('-u', '--usage', action='store_true', required=False, help=usage_help)
# Optional Args for Virtual OS and Baremetal systems
if self.amd_smi_helpers.is_virtual_os() or self.amd_smi_helpers.is_baremetal():
metric_parser.add_argument('-b', '--fb-usage', action='store_true', required=False, help=fb_usage_help)
# Optional Args for Hypervisors and Baremetal systems
if self.amd_smi_helpers.is_hypervisor() or self.amd_smi_helpers.is_baremetal():
metric_parser.add_argument('-p', '--power', action='store_true', required=False, help=power_help)
metric_parser.add_argument('-c', '--clock', action='store_true', required=False, help=clock_help)
metric_parser.add_argument('-t', '--temperature', action='store_true', required=False, help=temperature_help)
metric_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
metric_parser.add_argument('-P', '--pcie', action='store_true', required=False, help=pcie_help)
metric_parser.add_argument('-v', '--voltage', action='store_true', required=False, help=voltage_help)
# Optional Args for Linux Baremetal Systems #@TODO Discuss logic if Linux Hypervisors would be allowed to have this
if self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux():
metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help)
metric_parser.add_argument('-s', '--pcie-usage', action='store_true', required=False, help=pcie_usage_help)
metric_parser.add_argument('-V', '--voltage-curve', action='store_true', required=False, help=vc_help)
metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help)
metric_parser.add_argument('-m', '--mem-overdrive', action='store_true', required=False, help=mo_help)
metric_parser.add_argument('-l', '--perf-level', action='store_true', required=False, help=perf_level_help)
metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help)
metric_parser.add_argument('-x', '--xgmi-err', action='store_true', required=False, help=xgmi_err_help)
metric_parser.add_argument('-E', '--energy', action='store_true', required=False, help=energy_help)
# Options to only display to Hypervisors
if self.amd_smi_helpers.is_hypervisor():
device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices)
metric_parser.add_argument('-s', '--schedule', action='store_true', required=False, help=schedule_help)
metric_parser.add_argument('-g', '--guard', action='store_true', required=False, help=guard_help)
metric_parser.add_argument('-G', '--guest', action='store_true', required=False, help=guest_help)
def add_process_parser(self, subparsers, func):
if self.amd_smi_helpers.is_hypervisor():
# Don't add this subparser on Hypervisors
return
# Subparser help text
process_help = "Lists general process information running on the specified GPU"
process_subcommand_help = """If no argument is provided, returns information for all GPUs on the system.
If no argument is provided all process information will be displayed."""
process_optionals_title = "Process arguments"
# Required arguments help text
gpu_help = "Select a GPU from the possible choices"
# Help text for Arguments only on Guest and BM platforms
general_help = "pid, process name, memory usage"
engine_help = "All engine usages"
pid_help = "Gets all process information about the specified process based on Process ID"
name_help = """Gets all process information about the specified process based on Process Name.
If multiple processes have the same name information is returned for all of them.""" #@TODO wording
# Create process subparser
process_parser = subparsers.add_parser('process', help=process_help, description=process_subcommand_help)
process_parser._optionals.title = process_optionals_title
process_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
process_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = process_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
process_parser.add_argument('-g', '--general', action='store_true', required=False, help=general_help)
process_parser.add_argument('-e', '--engine', action='store_true', required=False, help=engine_help)
process_parser.add_argument('-p', '--pid', action='store', required=False, help=pid_help)
process_parser.add_argument('-n', '--name', action='store', required=False, help=name_help)
def add_profile_parser(self, subparsers, func):
if not (self.amd_smi_helpers.is_windows() and self.amd_smi_helpers.is_hypervisor()):
# This subparser only applies to Azure Hyper-V systems
return
# Subparser help text
profile_help = "Displays information about all profiles and current profile"
profile_subcommand_help = "If no argument is provided, returns information for all GPUs on the system."
profile_optionals_title = "Profile Arguments"
# Required arguments help text
gpu_help = "Select a GPU from the possible choices"
# Create profile subparser
profile_parser = subparsers.add_parser('profile', help=profile_help, description=profile_subcommand_help)
profile_parser._optionals.title = profile_optionals_title
profile_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
profile_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = profile_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
def add_event_parser(self, subparsers, func):
if self.amd_smi_helpers.is_linux() and not self.amd_smi_helpers.is_virtual_os():
# This subparser only applies to Linux BareMetal & Linux Hypervisors
return
# Subparser help text
event_help = "Displays event information for the given GPU"
event_subcommand_help = "If no argument is provided, returns event information for all GPUs on the system."
event_optionals_title = "Event Arguments"
# Required arguments help text
gpu_help = "Select a GPU from the possible choices"
# Create event subparser
event_parser = subparsers.add_parser('event', help=event_help, description=event_subcommand_help)
event_parser._optionals.title = event_optionals_title
event_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
event_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = event_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
def add_topology_parser(self, subparsers, func):
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
return
# Subparser help text
topology_help = "Displays topology information of the devices."
topology_subcommand_help = "If no argument is provided, returns information for all GPUs on the system."
topology_optionals_title = "Topology arguments"
# Required arguments help text
gpu_help = "Select a GPU from the possible choices"
# Help text for Arguments only on Guest and BM platforms
topo_access_help = "Displays link accessibility between GPUs"
topo_weight_help = "Displays relative weight between GPUs"
topo_hops_help = "Displays the number of hops between GPUs"
topo_type_help = "Displays the link type between GPUs."
topo_numa_help = "Displays the numa nodes."
# Create topology subparser
topology_parser = subparsers.add_parser('topology', help=topology_help, description=topology_subcommand_help)
topology_parser._optionals.title = topology_optionals_title
topology_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
topology_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = topology_parser.add_mutually_exclusive_group()
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
topology_parser.add_argument('-a', '--topo-access', action='store_true', required=False, help=topo_access_help)
topology_parser.add_argument('-w', '--topo-weight', action='store_true', required=False, help=topo_weight_help)
topology_parser.add_argument('-o', '--topo-hops', action='store_true', required=False, help=topo_hops_help)
topology_parser.add_argument('-t', '--topo-type', action='store_true', required=False, help=topo_type_help)
topology_parser.add_argument('-n', '--topo-numa', action='store_true', required=False, help=topo_numa_help)
def add_set_value_parser(self, subparsers, func):
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
return
# Subparser help text
set_value_help = "Set options for devices."
set_value_subcommand_help = "The user must specify one of the options for the set configuration."
set_value_optionals_title = "Set Arguments"
# Required arguments help text
gpu_help = "Select a GPU from the possible choices"
# Help text for Arguments only on Guest and BM platforms
set_clk_help = "Sets clock frequency levels for specified clocks"
set_sclk_help = "Sets GPU clock frequency levels"
set_mclk_help = "Sets memory clock frequency levels"
set_pcie_help = "Sets PCIe clock frequency levels"
set_slevel_help = "Change GPU clock frequency and voltage for a specific level"
set_mlevel_help = "Change GPU memory frequency and voltage for a specific level"
set_vc_help = "Change SCLK voltage curve for a specified point"
set_srange_help = "Sets min and max SCLK speed"
set_mrange_help = "Sets min and max MCLK speed"
set_fan_help = "Sets GPU fan speed (level or %)"
set_perf_level_help = "Sets performance level"
set_overdrive_help = "Set GPU overdrive level"
set_mem_overdrive_help = "Set memory overclock overdrive level"
set_power_overdrive_help = "Set the maximum GPU power using power overdrive in Watts"
set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes"
set_perf_det_help = "Set GPU clock frequency limit to get minimal performance variation"
ras_enable_help = "Enable RAS for specified block and error type"
ras_disable_help = "Disable RAS for specified block and error type."
ras_inject_help = "Inject RAS poison for specified block"
# -c, --setclk <type> <level>
# .
# -s, --setsclk <level>
# .
# -m, --setmclk <type> <level>
# .
# -p, --setpcie <level>
# .
# -S, --setslevel <sclk_level> <sclk> <svolt>
# .
# -M, --setmlevel <mclk_level> <mclk> <mvolt>
# .
# -v, --setvc <point> <sclk> <svolt>
# .
# -r, --setsrange <sclk_min> <sclk_max>
#
# -R, --setmrange <mclk_min> <mclk_max>
# .
# -f, --setfan <level>
#
# -pl, --setperflevel <level>
#
# -o, --setoverdrive %
# Set GPU overdrive level.
# -O, --setmemoverdrive %
# Set memory overclock overdrive level.
# -po, --setpoweroverdrive <power>
# Set the maximum GPU power using power overdrive in Watts.
# -P, --setprofile <profile>
# Set power profile level (#) or a quoted string of custom profile attributes (“ # # # # “)
# -pd, --setperfdet <sclk>
# Set GPU clock frequency limit to get minimal performance variation.
# -re, --rasenable <block> <err_type>
# Enable RAS for specified block and error type.
# -rd, --rasdisable <block> <err_type>
# Disable RAS for specified block and error type.
# -ri, --rasinject <block>
# Inject RAS poison for specified block
# Create set_value subparser
set_value_parser = subparsers.add_parser('set', help=set_value_help, description=set_value_subcommand_help)
set_value_parser._optionals.title = set_value_optionals_title
set_value_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
set_value_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = set_value_parser.add_mutually_exclusive_group(required=True)
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
set_value_parser.add_argument('-c', '--setclk', action='store', required=False, help=topo_access_help)
set_value_parser.add_argument('-s', '--topo-weight', action='store', required=False, help=topo_weight_help)
set_value_parser.add_argument('-m', '--topo-hops', action='store', required=False, help=topo_hops_help)
set_value_parser.add_argument('-p', '--topo-type', action='store', required=False, help=topo_type_help)
set_value_parser.add_argument('-S', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-M', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-v', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-r', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-R', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-f', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-pl', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-o' '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-O', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-po', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-P', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-pd', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-re', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-rd', '--topo-numa', action='store', required=False, help=topo_numa_help)
set_value_parser.add_argument('-ri', '--topo-numa', action='store', required=False, help=topo_numa_help)
def add_reset_parser(self, subparsers, func):
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
return
# Subparser help text
reset_help = "Reset options for devices."
reset_subcommand_help = "The user must specify one of the options to reset devices."
reset_optionals_title = "Reset Arguments"
# Required arguments help text
gpu_help = "Select a GPU from the possible choices"
# Help text for Arguments only on Guest and BM platforms
gpureset_help = "Reset the specified GPU"
resetclk_help = "Reset clocks and overdrive to default"
resetfans_help = "Reset fans to automatic (driver) control"
resetprofile_help = "Reset power profile back to default"
resetpoweroverdrive_help = "Set the maximum GPU power back to the device default state"
resetxgmierr_help = "Reset XGMI error counts"
resetperfdet_help = "Disable performance determinism"
# Create reset subparser
reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help)
reset_parser._optionals.title = reset_optionals_title
reset_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
reset_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = reset_parser.add_mutually_exclusive_group(required=True)
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
reset_parser.add_argument('-g', '--gpureset', action='store_true', required=False, help=gpureset_help)
reset_parser.add_argument('-c', '--resetclk', action='store_true', required=False, help=resetclk_help)
reset_parser.add_argument('-f', '--resetfans', action='store_true', required=False, help=resetfans_help)
reset_parser.add_argument('-p', '--resetprofile', action='store_true', required=False, help=resetprofile_help)
reset_parser.add_argument('-o', '--resetpoweroverdrive', action='store_true', required=False, help=resetpoweroverdrive_help)
reset_parser.add_argument('-x', '--resetxgmierr', action='store_true', required=False, help=resetxgmierr_help)
reset_parser.add_argument('-d', '--resetperfdet', action='store_true', required=False, help=resetperfdet_help)
def add_misc_parser(self, subparsers, func):
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
return
# Subparser help text
misc_help = "The miscellaneous options"
misc_subcommand_help = "The user must specify one of the options to reset devices."
misc_optionals_title = "Misc Arguments"
# Optional arguments help text
gpu_help = "Select a GPU from the possible choices"
load_help = "Load clock, fan, performance, and profile settings from a given file."
save_help = "Save clock, fan, performance, and profile settings to a given file."
# Create misc subparser
misc_parser = subparsers.add_parser('misc', help=misc_help, description=misc_subcommand_help)
misc_parser._optionals.title = misc_optionals_title
misc_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
misc_parser.set_defaults(func=func)
# Mutually Exclusive Args within the subparser
device_args = misc_parser.add_mutually_exclusive_group(required=True)
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# Optional Args
misc_parser.add_argument('-l', '--load', action='store', type=open, required=False, help=load_help)
misc_parser.add_argument('-s', '--save', action='store', type=open, required=False, help=save_help)
# def add_gpu_v_parser(self, subparsers, func):
# if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
# # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
# return
# # Subparser help text
# gpu_v_help = "The gpu_v options"
# gpu_v_subcommand_help = "The user must specify one of the options to reset devices."
# gpu_v_optionals_title = "gpu_v Arguments"
# # Optional arguments help text
# gpu_help = "Select a GPU from the possible choices"
# load_help = "Load clock, fan, performance, and profile settings from a given file."
# save_help = "Save clock, fan, performance, and profile settings to a given file."
# # Create gpu_v subparser
# gpu_v_parser = subparsers.add_parser('gpu_v', help=gpu_v_help, description=gpu_v_subcommand_help)
# gpu_v_parser._optionals.title = gpu_v_optionals_title
# gpu_v_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
# gpu_v_parser.set_defaults(func=func)
# # Mutually Exclusive Args within the subparser
# device_args = gpu_v_parser.add_mutually_exclusive_group(required=True)
# device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
# # Optional Args
# gpu_v_parser.add_argument('-l', '--load', action='store', type=open, required=False, help=load_help)
# gpu_v_parser.add_argument('-s', '--save', action='store', type=open, required=False, help=save_help)
-818
View File
@@ -1,818 +0,0 @@
#!/usr/bin/env python3
"""! @brief AMD_SMI FFI"""
from ctypes import *
from enum import Enum
import os
path_amdsmi = '/opt/rocm/lib/libamd_smi64.so' #@TODO make this dynamic
try:
cdll.LoadLibrary(path_amdsmi)
amdsmi = CDLL(path_amdsmi)
except OSError:
print("Unable to load libamd_smi64.so library\n")
exit(1)
##
# @brief Initialization flags
#
# may be OR'd together and passed to smi.amdsmi_init()
##
class amdsmi_init_flags(c_int):
AMD_SMI_INIT_ALL_DEVICES = 0x0 # Default option
AMD_SMI_INIT_AMD_CPUS = (1 << 0)
AMD_SMI_INIT_AMD_GPUS = (1 << 1)
AMD_SMI_INIT_NON_AMD_CPUS = (1 << 2)
AMD_SMI_INIT_NON_AMD_GPUS = (1 << 3)
# Maximum size definitions GPUVSMI
AMDSMI_MAX_MM_IP_COUNT = 8
AMDSMI_MAX_DATE_LENGTH = 32 # YYYY-MM-DD:HH:MM:SS.MSC #
AMDSMI_MAX_STRING_LENGTH = 64
AMDSMI_NORMAL_STRING_LENGTH = 32
AMDSMI_MAX_DEVICES = 32
AMDSMI_MAX_NAME = 32
AMDSMI_MAX_DRIVER_VERSION_LENGTH = 80
AMDSMI_PRODUCT_NAME_LENGTH = 128
AMDSMI_MAX_CONTAINER_TYPE = 2
AMDSMI_GPU_UUID_SIZE = 38
class amdsmi_mm_ip(c_int):
MM_UVD = 0
MM_VCE = 1
MM_VCN = 2
MM__MAX = 3
class amdsmi_container_types(c_int):
CONTAINER_LXC = 0
CONTAINER_DOCKER = 1
# ! opaque handler point to underlying implementation
amdsmi_device_handle = POINTER(c_uint)
amdsmi_socket_handle = POINTER(c_uint)
class device_type(c_int):
UNKNOWN = 0
AMD_GPU = 1
AMD_CPU = 2
NON_AMD_GPU = 3
NON_AMD_CPU = 4
device_type__enumvalues = {
0: 'UNKNOWN',
1: 'AMD_GPU',
2: 'AMD_CPU',
3: 'NON_AMD_GPU',
4: 'NON_AMD_CPU',
}
#Error codes retured by amd_smi_lib functions
class amdsmi_status(c_int):
AMDSMI_STATUS_SUCCESS = 0 # Call succeeded
AMDSMI_STATUS_INVAL = 1 # Invalid parameters
AMDSMI_STATUS_NOT_SUPPORTED = 2 # Command not supported
AMDSMI_STATUS_FILE_ERROR = 3 # Problem accessing a file.
AMDSMI_STATUS_NO_PERM = 4 # Permission Denied
AMDSMI_STATUS_OUT_OF_RESOURCES = 5 # Not enough memory
AMDSMI_STATUS_INTERNAL_EXCEPTION = 6 # An internal exception was caught
AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS = 7 # The provided input is out of allowable or safe range
AMDSMI_STATUS_INIT_ERROR = 8 # An error occurred when initializing internal data structures
AMDSMI_STATUS_NOT_YET_IMPLEMENTED = 9 # Not implemented yet
AMDSMI_STATUS_NOT_FOUND = 10 # Device Not found
AMDSMI_STATUS_INSUFFICIENT_SIZE = 11 # Not enough resources were available for the operation
AMDSMI_STATUS_INTERRUPT = 12 # An interrupt occurred during execution of function
AMDSMI_STATUS_UNEXPECTED_SIZE = 13 # An unexpected amount of data was read
AMDSMI_STATUS_NO_DATA = 14 # No data was found for a given input
AMDSMI_STATUS_UNEXPECTED_DATA = 15 # The data read or provided to function is not what was expected
AMDSMI_STATUS_BUSY = 16 # Device busy
AMDSMI_STATUS_REFCOUNT_OVERFLOW = 17 # An internal reference counter exceeded INT32_MAX
AMDSMI_LIB_START = 1000
AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START # Fail to load lib
AMDSMI_STATUS_FAIL_LOAD_SYMBOL = 1001
AMDSMI_STATUS_DRM_ERROR = 1002 # Error when call libdrm
AMDSMI_STATUS_IO = 1003 # Error
AMDSMI_STATUS_FAULT = 1004 # Bad address
AMDSMI_STATUS_API_FAILED = 1005 # API call failed
AMDSMI_STATUS_TIMEOUT = 1006 # Timeout in API call
AMDSMI_STATUS_NO_SLOT = 1007 # No more free slot
AMDSMI_STATUS_RETRY = 1008 # Retry operation
AMDSMI_STATUS_NOT_INIT = 1009 # Device not initialized
AMDSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF # An unknown error occurred
amdsmi_status_t = amdsmi_status
#Clock types
class amdsmi_clk_type (c_int):
CLK_TYPE_SYS = 0x0, # System clock
CLK_TYPE_FIRST = CLK_TYPE_SYS
CLK_TYPE_GFX = CLK_TYPE_SYS
CLK_TYPE_DF = 0x1 # Data Fabric clock (for ASICs
# running on a separate clock)
CLK_TYPE_DCEF = 0x2 # Display Controller Engine clock
CLK_TYPE_SOC = 0x3
CLK_TYPE_MEM = 0x4
CLK_TYPE_PCIE = 0x5
CLK_TYPE_VCLK0 = 0x6
CLK_TYPE_VCLK1 = 0x7
CLK_TYPE_DCLK0 = 0x8
CLK_TYPE_DCLK1 = 0x9
CLK_TYPE__MAX = CLK_TYPE_DCLK1
amdsmi_clk_type_t = amdsmi_clk_type
#This enumeration is used to indicate from which part of the device a
# temperature reading should be obtained
class amdsmi_temperature_type (c_int):
TEMPERATURE_TYPE_EDGE = 0
TEMPERATURE_TYPE_FIRST = TEMPERATURE_TYPE_EDGE
TEMPERATURE_TYPE_JUNCTION = 1
TEMPERATURE_TYPE_VRAM = 2
TEMPERATURE_TYPE_HBM_0 = 3
TEMPERATURE_TYPE_HBM_1 = 4
TEMPERATURE_TYPE_HBM_2 = 5
TEMPERATURE_TYPE_HBM_3 = 6
TEMPERATURE_TYPE_PLX = 7
TEMPERATURE_TYPE__MAX = TEMPERATURE_TYPE_PLX
#The values of this enum are used to identify the various firmware
#blocks.
class amdsmi_fw_block_t (c_int):
FW_ID_SMU = 1
FW_ID_FIRST = FW_ID_SMU
FW_ID_CP_CE = 2
FW_ID_CP_PFP = 3
FW_ID_CP_ME = 4
FW_ID_CP_MEC_JT1 = 5
FW_ID_CP_MEC_JT2 = 6
FW_ID_CP_MEC1 = 7
FW_ID_CP_MEC2 = 8
FW_ID_RLC = 9
FW_ID_SDMA0 = 10
FW_ID_SDMA1 = 11
FW_ID_SDMA2 = 12
FW_ID_SDMA3 = 13
FW_ID_SDMA4 = 14
FW_ID_SDMA5 = 15
FW_ID_SDMA6 = 16
FW_ID_SDMA7 = 17
FW_ID_VCN = 18
FW_ID_UVD = 19
FW_ID_VCE = 20
FW_ID_ISP = 21
FW_ID_DMCU_ERAM = 22 # eRAM
FW_ID_DMCU_ISR = 23 # ISR
FW_ID_RLC_RESTORE_LIST_GPM_MEM = 24
FW_ID_RLC_RESTORE_LIST_SRM_MEM = 25
FW_ID_RLC_RESTORE_LIST_CNTL = 26
FW_ID_RLC_V = 27
FW_ID_MMSCH = 28
FW_ID_PSP_SYSDRV = 29
FW_ID_PSP_SOSDRV = 30
FW_ID_PSP_TOC = 31
FW_ID_PSP_KEYDB = 32
FW_ID_DFC = 33
FW_ID_PSP_SPL = 34
FW_ID_DRV_CAP = 35
FW_ID_MC = 36
FW_ID_PSP_BL = 37
FW_ID_CP_PM4 = 38
FW_ID_ASD = 39
FW_ID_TA_RAS = 40
FW_ID_XGMI = 41
FW_ID_RLC_SRLG = 42
FW_ID_RLC_SRLS = 43
FW_ID_SMC = 44
FW_ID_DMCU = 45
FW_ID__MAX = 46
#This structure represents a range (e.g., frequencies or voltages)
class amdsmi_range_t (Structure):
_fields_ = [
('lower_bound', c_uint64),
('upper_bound', c_uint64),
]
class amdsmi_xgmi_info_t (Structure):
_fields_ = [
('xgmi_lanes', c_uint8),
('xgmi_hive_id', c_uint64),
('xgmi_node_id', c_uint64),
('index', c_uint32),
]
#GPU Capability info
class gfx (Structure):
_fields_ = [
('gfxip_major', c_uint32),
('gfxip_minor', c_uint32),
('gfxip_cu_count', c_uint16)]
class mm (Structure):
_fields_ = [
('mm_ip_count', c_uint8),
('mm_ip_list', c_uint8 * AMDSMI_MAX_MM_IP_COUNT)
]
class amdsmi_gpu_caps_t (Structure):
_fields_ = [
('gfx', gfx),
('mm', mm),
('ras_supported', c_bool),
('max_vf_num', c_uint8),
('gfx_ip_count', c_uint32),
('dma_ip_count', c_uint32)
]
class amdsmi_vram_info (Structure):
_fields_ = [
('vram_total', c_uint32),
('vram_used', c_uint32),
]
class amdsmi_frequency_range_t(Structure):
_fields_ = [
('supported_freq_range', amdsmi_range_t),
('current_freq_range', amdsmi_range_t),
]
class bdf_submodule (Structure):
_fields_ = [
('function_number', c_uint64, 3),
('device_number', c_uint64, 5),
('bus_number', c_uint64, 8),
('domain_number', c_uint64, 48),
]
class amdsmi_bdf_t (Union):
_fields_ = [
('bdf_submodule', bdf_submodule),
('as_uint', c_uint64)
]
class amdsmi_power_cap_info_t (Structure):
_fields_ = [
('power_cap', c_uint64),
('default_power_cap', c_uint64),
('dpm_cap', c_uint64),
('min_power_cap', c_uint64),
('max_power_cap', c_uint64)
]
class amdsmi_vbios_info_t (Structure):
_fields_ =[
('name', c_char * AMDSMI_MAX_STRING_LENGTH),
('vbios_version', c_uint32),
('build_date', c_char * AMDSMI_MAX_DATE_LENGTH),
('part_number', c_char * AMDSMI_MAX_STRING_LENGTH),
('vbios_version_string', c_char * AMDSMI_NORMAL_STRING_LENGTH)
]
class fw_info_list (Structure):
_fields_ = [
('fw_id', amdsmi_fw_block_t),
('fw_version', c_uint64)
]
class amdsmi_fw_info_t (Structure):
_fields_ =[
('num_fw_info', c_uint8),
('fw_info_list', fw_info_list * amdsmi_fw_block_t.FW_ID__MAX)
]
class amdsmi_asic_info_t (Structure):
_fields_ = [
('market_name', c_char * AMDSMI_MAX_STRING_LENGTH),
('family', c_uint32),
('vendor_id', c_uint32),
('subvendor_id', c_uint32),
('device_id', c_uint64),
('rev_id', c_uint32),
('asic_serial', c_char * AMDSMI_NORMAL_STRING_LENGTH)
]
class amdsmi_board_info (Structure):
_fields_ = [
('serial_number', c_uint64),
('is_master', c_bool),
('model_number', c_char * AMDSMI_NORMAL_STRING_LENGTH),
('product_serial', c_char * AMDSMI_NORMAL_STRING_LENGTH),
('fru_id', c_char * AMDSMI_NORMAL_STRING_LENGTH),
('product_name', c_char * AMDSMI_PRODUCT_NAME_LENGTH),
('manufacturer_name', c_char * AMDSMI_NORMAL_STRING_LENGTH),
]
class amdsmi_temperature_t (Structure):
_fields_ = [
('cur_temp', c_uint32)
]
class amdsmi_temperature_limit_t (Structure):
_fields_ = [
('limit', c_uint32)
]
class amdsmi_power_limit_t (Structure):
_fields_ = [
('limit', c_uint32)
]
class amdsmi_power_measure (Structure):
_fields_ = [
('average_socket_power', c_uint32),
('energy_accumulator', c_uint64),
('voltage_gfx', c_uint32),
('voltage_soc', c_uint32),
('voltage_mem', c_uint32),
]
class amdsmi_clk_measure_t (Structure):
_fields_ = [
('cur_clk', c_uint32),
('avg_clk', c_uint32),
('min_clk', c_uint32),
('max_clk', c_uint32)
]
class amdsmi_engine_usage_t (Structure):
_fields_ = [
('gfx_activity', c_uint32),
('umc_activity', c_uint32),
('mm_activity', c_uint32 * AMDSMI_MAX_MM_IP_COUNT)
]
amdsmi_process_handle = c_uint32
class memory_usage (Structure):
_fields_ = [
('gtt_mem', c_uint64),
('cpu_mem', c_uint64),
('vram_mem', c_uint64)
]
class engine_usage (Structure):
_fields_ = [
('gfx', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
('compute', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
('sdma', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
('enc', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
('dec',c_uint16 * AMDSMI_MAX_MM_IP_COUNT)
]
class amdsmi_proc_info_t(Structure):
_fields_ = [
('name', c_char * AMDSMI_NORMAL_STRING_LENGTH),
('pid', amdsmi_process_handle),
('mem', c_uint64),
('engine_usage', engine_usage),
('memory_usage', memory_usage),
('container_name', c_char * AMDSMI_NORMAL_STRING_LENGTH)
]
amdsmi_process_info = amdsmi_proc_info_t
# Guaranteed maximum possible number of supported frequencies
AMDSMI_MAX_NUM_FREQUENCIES = 32
# The number of points that make up a voltage-frequency curve definition
AMDSMI_NUM_VOLTAGE_CURVE_POINTS = 3
class amdsmi_dev_perf_level_t (c_int):
AMDSMI_DEV_PERF_LEVEL_AUTO = 0 # Performance level is "auto"
AMDSMI_DEV_PERF_LEVEL_FIRST = AMDSMI_DEV_PERF_LEVEL_AUTO
AMDSMI_DEV_PERF_LEVEL_HIGH = 1 # Keep PowerPlay levels "high", regardless of workload
AMDSMI_DEV_PERF_LEVEL_MANUAL = 2 # Only use values defined by manually setting the AMDSMI_CLK_TYPE_SYS speed
AMDSMI_DEV_PERF_LEVEL_STABLE_STD = 3 # Stable power state with profiling clocks
AMDSMI_DEV_PERF_LEVEL_STABLE_PEAK = 4 # Stable power state with peak clocks
AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK = 5 # Stable power state with minimum memory clock
AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK = 6 # Stable power state with minimum system clock
AMDSMI_DEV_PERF_LEVEL_DETERMINISM = 7 # Performance determinism state
AMDSMI_DEV_PERF_LEVEL_LAST = AMDSMI_DEV_PERF_LEVEL_DETERMINISM
AMDSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 # Unknown performance level
amdsmi_dev_perf_level = amdsmi_dev_perf_level_t
class amdsmi_sw_component_t (c_int):
AMDSMI_SW_COMP_FIRST = 0x0
AMDSMI_SW_COMP_DRIVER = AMDSMI_SW_COMP_FIRST # Driver
AMDSMI_SW_COMP_LAST = AMDSMI_SW_COMP_DRIVER
amdsmi_event_handle_t = c_uint64
#Event Groups
# Enum denoting an event group. The value of the enum is the
# base value for all the event enums in the group.
class amdsmi_event_group_t (c_int):
AMDSMI_EVNT_GRP_XGMI = 0 # Data Fabric(XGMI) related events
AMDSMI_EVNT_GRP_XGMI_DATA_OUT = 10 # XGMI Outbound data
AMDSMI_EVNT_GRP_INVALID = 0xFFFFFFFF
# Event types
# Event type enum. Events belonging to a particular event group
# ::amdsmi_event_group_t should begin enumerating at the ::amdsmi_event_group_t
# value for that group.
class amdsmi_event_type_t (c_int):
AMDSMI_EVNT_FIRST = amdsmi_event_group_t.AMDSMI_EVNT_GRP_XGMI
AMDSMI_EVNT_XGMI_FIRST = amdsmi_event_group_t.AMDSMI_EVNT_GRP_XGMI
AMDSMI_EVNT_XGMI_0_NOP_TX = AMDSMI_EVNT_XGMI_FIRST # NOPs sent to neighbor 0
AMDSMI_EVNT_XGMI_0_REQUEST_TX = 1
AMDSMI_EVNT_XGMI_0_RESPONSE_TX = 2
AMDSMI_EVNT_XGMI_0_BEATS_TX = 3
AMDSMI_EVNT_XGMI_1_NOP_TX = 4
AMDSMI_EVNT_XGMI_1_REQUEST_TX = 5
AMDSMI_EVNT_XGMI_1_RESPONSE_TX = 6
AMDSMI_EVNT_XGMI_1_BEATS_TX = 7
AMDSMI_EVNT_XGMI_LAST = 7
AMDSMI_EVNT_XGMI_DATA_OUT_FIRST = 10
AMDSMI_EVNT_XGMI_DATA_OUT_0 = 10
AMDSMI_EVNT_XGMI_DATA_OUT_1 = 11
AMDSMI_EVNT_XGMI_DATA_OUT_2 = 12
AMDSMI_EVNT_XGMI_DATA_OUT_3 = 13
AMDSMI_EVNT_XGMI_DATA_OUT_4 = 14
AMDSMI_EVNT_XGMI_DATA_OUT_5 = 15
AMDSMI_EVNT_XGMI_DATA_OUT_LAST = AMDSMI_EVNT_XGMI_DATA_OUT_5
AMDSMI_EVNT_LAST = AMDSMI_EVNT_XGMI_DATA_OUT_LAST
class amdsmi_counter_command_t (c_int):
AMDSMI_CNTR_CMD_START = 0
AMDSMI_CNTR_CMD_STOP = 1
class amdsmi_counter_value_t (Structure):
_fields_ = [
('value', c_uint64),
('time_enabled', c_uint64),
('time_running', c_uint64)
]
class amdsmi_evt_notification_type_t (c_int):
AMDSMI_EVT_NOTIF_VMFAULT = 1 # VM page fault
AMDSMI_EVT_NOTIF_FIRST = AMDSMI_EVT_NOTIF_VMFAULT,
AMDSMI_EVT_NOTIF_THERMAL_THROTTLE = 2,
AMDSMI_EVT_NOTIF_GPU_PRE_RESET = 3,
AMDSMI_EVT_NOTIF_GPU_POST_RESET = 4,
AMDSMI_EVT_NOTIF_LAST = AMDSMI_EVT_NOTIF_GPU_POST_RESET
# function to generate event bitmask from event id
def AMDSMI_EVENT_MASK_FROM_INDEX (i):
return c_ulonglong(1 << (i - 1))
MAX_EVENT_NOTIFICATION_MSG_SIZE = 64
# Event notification data returned from event notification API
class amdsmi_evt_notification_data_t (Structure):
_fields_ = [
('device_handle', c_void_p), # Handler of device that corresponds to the event
('event', amdsmi_evt_notification_type_t), # Event type
('message', c_char * MAX_EVENT_NOTIFICATION_MSG_SIZE) # Event message
]
# Temperature Metrics. This enum is used to identify various
# temperature metrics. Corresponding values will be in millidegress Celcius.
class amdsmi_temperature_metric_t (c_int):
AMDSMI_TEMP_CURRENT = 0 # Temperature current value
AMDSMI_TEMP_FIRST = AMDSMI_TEMP_CURRENT
AMDSMI_TEMP_MAX = 1 # Temperature max value
AMDSMI_TEMP_MIN = 2 # Temperature min value
AMDSMI_TEMP_MAX_HYST = 3 # Temperature hysteresis value for max limit (This is an absolute temperature, not a delta)
AMDSMI_TEMP_MIN_HYST = 4 # Temperature hysteresis value for min limit (not a delta)
AMDSMI_TEMP_CRITICAL = 5 # Temperature critical max value, typically greater than corresponding temp_max values.
AMDSMI_TEMP_CRITICAL_HYST = 6 # Temperature hysteresis value for critical limit. (not a delta)
AMDSMI_TEMP_EMERGENCY = 7 # Temperature emergency max value, for chips supporting more than two upper temperature
# limits. Must be equal or greater than corresponding temp_crit values.
AMDSMI_TEMP_EMERGENCY_HYST = 8 # Temperature hysteresis value for emergency limit. (not a delta).
AMDSMI_TEMP_CRIT_MIN = 9 # Temperature critical min value, typically lower than corresponding temperature min values
AMDSMI_TEMP_CRIT_MIN_HYST = 10 # Temperature hysteresis value for critical minimum limit. (not a delta)
AMDSMI_TEMP_OFFSET = 11 # Temperature offset which is added to the temperature reading by the chip.
AMDSMI_TEMP_LOWEST = 12 # Historical minimum temperature.
AMDSMI_TEMP_HIGHEST = 13 # Historical maximum temperature.
AMDSMI_TEMP_LAST = AMDSMI_TEMP_HIGHEST
class amdsmi_voltage_metric_t (c_int):
AMDSMI_VOLT_CURRENT = 0 # Voltage current value.
AMDSMI_VOLT_FIRST = AMDSMI_VOLT_CURRENT
AMDSMI_VOLT_MAX = 1 # Voltage max value.
AMDSMI_VOLT_MIN_CRIT = 2 # Voltage critical min value.
AMDSMI_VOLT_MIN = 3 # Voltage min value.
AMDSMI_VOLT_MAX_CRIT = 4 # Voltage critical max value.
AMDSMI_VOLT_AVERAGE = 5 # Average voltage.
AMDSMI_VOLT_LOWEST = 6 # Historical minimum voltage.
AMDSMI_VOLT_HIGHEST = 7 # Historical maximum voltage.
AMDSMI_VOLT_LAST = AMDSMI_VOLT_HIGHEST
# This ennumeration is used to indicate which type of
# voltage reading should be obtained.
class amdsmi_voltage_type_t (c_int):
AMDSMI_VOLT_TYPE_FIRST = 0
AMDSMI_VOLT_TYPE_VDDGFX = AMDSMI_VOLT_TYPE_FIRST # Vddgfx GPU voltage
AMDSMI_VOLT_TYPE_LAST = AMDSMI_VOLT_TYPE_VDDGFX
AMDSMI_VOLT_TYPE_INVALID = 0xFFFFFFFF # Invalid type
# Pre-set Profile Selections. These bitmasks can be AND'd with the
# ::amdsmi_power_profile_status_t.available_profiles returned from
# ::amdsmi_dev_power_profile_presets_get to determine which power profiles
# are supported by the system.
class amdsmi_power_profile_preset_masks_t (c_int):
AMDSMI_PWR_PROF_PRST_CUSTOM_MASK = 0x1 # Custom Power Profile
AMDSMI_PWR_PROF_PRST_VIDEO_MASK = 0x2 # Video Power Profile
AMDSMI_PWR_PROF_PRST_POWER_SAVING_MASK = 0x4 # Power Saving Profile
AMDSMI_PWR_PROF_PRST_COMPUTE_MASK = 0x8 # Compute Saving Profile
AMDSMI_PWR_PROF_PRST_VR_MASK = 0x10 # VR Power Profile
# 3D Full Screen Power Profile
AMDSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20
AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT = 0x40 # Default Boot Up Profile
AMDSMI_PWR_PROF_PRST_LAST = AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT
# Invalid power profile
AMDSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF
class amdsmi_gpu_block_t (c_int):
AMDSMI_GPU_BLOCK_INVALID = 0x0000000000000000 # Used to indicate an invalid block
AMDSMI_GPU_BLOCK_FIRST = 0x0000000000000001
AMDSMI_GPU_BLOCK_UMC = AMDSMI_GPU_BLOCK_FIRST # UMC block
AMDSMI_GPU_BLOCK_SDMA = 0x0000000000000002 # SDMA block
AMDSMI_GPU_BLOCK_GFX = 0x0000000000000004 # GFX block
AMDSMI_GPU_BLOCK_MMHUB = 0x0000000000000008 # MMHUB block
AMDSMI_GPU_BLOCK_ATHUB = 0x0000000000000010 # ATHUB block
AMDSMI_GPU_BLOCK_PCIE_BIF = 0x0000000000000020 # PCIE_BIF block
AMDSMI_GPU_BLOCK_HDP = 0x0000000000000040 # HDP block
AMDSMI_GPU_BLOCK_XGMI_WAFL = 0x0000000000000080 # XGMI block
AMDSMI_GPU_BLOCK_DF = 0x0000000000000100 # DF block
AMDSMI_GPU_BLOCK_SMN = 0x0000000000000200 # SMN block
AMDSMI_GPU_BLOCK_SEM = 0x0000000000000400 # SEM block
AMDSMI_GPU_BLOCK_MP0 = 0x0000000000000800 # MP0 block
AMDSMI_GPU_BLOCK_MP1 = 0x0000000000001000 # MP1 block
AMDSMI_GPU_BLOCK_FUSE = 0x0000000000002000 # Fuse block
AMDSMI_GPU_BLOCK_LAST = AMDSMI_GPU_BLOCK_FUSE # The highest bit position for supported blocks
AMDSMI_GPU_BLOCK_RESERVED = 0x8000000000000000
class amdsmi_ras_err_state_t (c_int):
AMDSMI_RAS_ERR_STATE_NONE = 0 # No current errors
AMDSMI_RAS_ERR_STATE_DISABLED = 1 # ECC is disabled
AMDSMI_RAS_ERR_STATE_PARITY = 2 # ECC errors present, but type unknown
AMDSMI_RAS_ERR_STATE_SING_C = 3 # Single correctable error
AMDSMI_RAS_ERR_STATE_MULT_UC = 4 # Multiple uncorrectable errors
AMDSMI_RAS_ERR_STATE_POISON = 5 # Firmware detected error and isolated page. Treat as uncorrectable.
AMDSMI_RAS_ERR_STATE_ENABLED = 6 # ECC is enabled
AMDSMI_RAS_ERR_STATE_LAST = AMDSMI_RAS_ERR_STATE_ENABLED
AMDSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF
class amdsmi_memory_type_t (c_int):
AMDSMI_MEM_TYPE_FIRST = 0
AMDSMI_MEM_TYPE_VRAM = AMDSMI_MEM_TYPE_FIRST # VRAM memory
AMDSMI_MEM_TYPE_VIS_VRAM = 1 # VRAM memory that is visible
AMDSMI_MEM_TYPE_GTT = 2 # GTT memory
AMDSMI_MEM_TYPE_LAST = AMDSMI_MEM_TYPE_GTT
class amdsmi_freq_ind_t (c_int):
AMDSMI_FREQ_IND_MIN = 0 # Index used for the minimum frequency value
AMDSMI_FREQ_IND_MAX = 1 # Index used for the maximum frequency value
AMDSMI_FREQ_IND_INVALID = 0xFFFFFFFF # An invalid frequency index
class amdsmi_xgmi_status_t (c_int):
AMDSMI_XGMI_STATUS_NO_ERRORS = 0
AMDSMI_XGMI_STATUS_ERROR = 1
AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS = 2
amdsmi_bit_field_t = c_uint64()
amdsmi_bit_field = amdsmi_bit_field_t
# Reserved Memory Page States
class amdsmi_memory_page_status_t (c_int):
AMDSMI_MEM_PAGE_STATUS_RESERVED = 0 # Reserved. This gpu page is reserved and not available for use
AMDSMI_MEM_PAGE_STATUS_PENDING = 1 # Pending. This gpu page is marked as bad
# and will be marked reserved at the next window.
AMDSMI_MEM_PAGE_STATUS_UNRESERVABLE = 2 # Unable to reserve this page
# Types for IO Link
class AMDSMI_IO_LINK_TYPE (c_int):
AMDSMI_IOLINK_TYPE_UNDEFINED = 0 # unknown type.
AMDSMI_IOLINK_TYPE_PCIEXPRESS = 1 # PCI Express
AMDSMI_IOLINK_TYPE_XGMI = 2 # XGMI
AMDSMI_IOLINK_TYPE_NUMIOLINKTYPES = 3 # Number of IO Link types
AMDSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF # Max of IO Link types
# The utilization counter type
class AMDSMI_UTILIZATION_COUNTER_TYPE (c_int):
AMDSMI_UTILIZATION_COUNTER_FIRST = 0 # GFX Activity
AMDSMI_COARSE_GRAIN_GFX_ACTIVITY = AMDSMI_UTILIZATION_COUNTER_FIRST
AMDSMI_COARSE_GRAIN_MEM_ACTIVITY = 1 # Memory Activity
AMDSMI_UTILIZATION_COUNTER_LAST = AMDSMI_COARSE_GRAIN_MEM_ACTIVITY
# Reserved Memory Page Record
class amdsmi_utilization_counter_t (Structure):
_fields_=[
('page_address', c_uint64),
('page_size', c_uint64),
('status', amdsmi_memory_page_status_t),
]
# Number of possible power profiles that a system could support
AMDSMI_MAX_NUM_POWER_PROFILES = (sizeof(amdsmi_bit_field_t) * 8)
# This structure contains information about which power profiles are
# supported by the system for a given device, and which power profile is currently active.
class amdsmi_power_profile_status_t (Structure):
_fields_ = [
('available_profiles', c_uint64), # Which profiles are supported by this system
('current', amdsmi_power_profile_preset_masks_t), # Which power profile is currently active
('num_profiles', c_uint32) # How many power profiles are available
]
# This structure holds information about clock frequencies.
class amdsmi_frequencies_t (Structure):
_fields_ = [
('num_supported', c_uint32), # The number of supported frequencies
('current', c_uint32), # The current frequency index
('frequency', c_uint64 * AMDSMI_MAX_NUM_FREQUENCIES), # List of frequencies.
# Only the first num_supported frequencies are valid.
]
#This structure holds information about the possible PCIe
#bandwidths. Specifically, the possible transfer rates and their
#associated numbers of lanes are stored here.
class amdsmi_pcie_bandwidth_t (Structure):
_fields_ = [
('transfer_rate', amdsmi_frequencies_t), # Transfer rates (T/s) that are possible
('lanes', c_uint32 * AMDSMI_MAX_NUM_FREQUENCIES), # List of lanes for corresponding transfer rate.
# Only the first num_supported bandwidths are valid.
]
# This structure holds version information.
class amdsmi_version_t (Structure):
_fields_ = [
('major', c_uint32), # Major version
('minor', c_uint32), # Minor version
('patch', c_uint32), # Patch, build or stepping version
('build', c_char_p), # Build string
]
# This structure represents a point on the frequency-voltage plane.
class amdsmi_od_vddc_point_t (Structure):
_fields_ = [
('frequency', c_uint64), # Frequency coordinate (in Hz)
('voltage', c_uint64), # Voltage coordinate (in mV)
]
# This structure holds 2 ::amdsmi_range_t's, one for frequency and one for
# voltage. These 2 ranges indicate the range of possible values for the
# corresponding ::amdsmi_od_vddc_point_t.
class amdsmi_freq_volt_region_t (Structure):
_fields_ = [
('freq_range', amdsmi_range_t), # The frequency range for this VDDC Curve point
('volt_range', amdsmi_range_t), # The voltage range for this VDDC Curve point
]
# Array of ::AMDSMI_NUM_VOLTAGE_CURVE_POINTS ::amdsmi_od_vddc_point_t's that
# make up the voltage frequency curve points.
class amdsmi_od_volt_curve_t (Structure):
_fields_ = [
# Array of ::AMDSMI_NUM_VOLTAGE_CURVE_POINTS ::amdsmi_od_vddc_point_t's that
# make up the voltage frequency curve points.
('vc_points', amdsmi_od_vddc_point_t * AMDSMI_NUM_VOLTAGE_CURVE_POINTS)
]
# This structure holds the frequency-voltage values for a device.
class amdsmi_od_volt_freq_data_t (Structure):
_fields_ = [
('curr_sclk_range', amdsmi_range_t), # The current SCLK frequency range
('curr_mclk_range', amdsmi_range_t), # The current MCLK frequency range; (upper bound only)
('sclk_freq_limits', amdsmi_range_t), # The range possible of SCLK values
('mclk_freq_limits', amdsmi_range_t), # The range possible of MCLK values
('curve', amdsmi_od_volt_curve_t), # The current voltage curve
('num_regions', c_uint32), # The number of voltage curve regions
]
# The following structures hold the gpu metrics values for a device.
# Size and version information of metrics data
class amd_metrics_table_header_t (Structure):
_fields_ = [
('structure_size', c_uint16),
('format_revision', c_ubyte),
('content_revision', c_ubyte),
]
AMDSMI_GPU_METRICS_API_FORMAT_VER = 1
AMDSMI_GPU_METRICS_API_CONTENT_VER_1 = 1
AMDSMI_GPU_METRICS_API_CONTENT_VER_2 = 2
AMDSMI_GPU_METRICS_API_CONTENT_VER_3 = 3
AMDSMI_NUM_HBM_INSTANCES = 4 # This should match NUM_HBM_INSTANCES
CENTRIGRADE_TO_MILLI_CENTIGRADE = 1000 # Unit conversion factor for HBM temperatures
class amdsmi_gpu_metrics_t (Structure):
_fields_ = [
('common_header', amd_metrics_table_header_t),
# Temperature
('temperature_edge', c_uint16),
('temperature_hotspot', c_uint16),
('temperature_mem', c_uint16),
('temperature_vrgfx', c_uint16),
('temperature_vrsoc', c_uint16),
('temperature_vrmem', c_uint16),
# Utilization
('average_gfx_activity', c_uint16),
('average_umc_activity', c_uint16),
('average_mm_activity', c_uint16),
# Power/Energy
('average_socket_power', c_uint16),
('energy_accumulator', c_uint64),
# Driver attached timestamp (in ns)
('system_clock_counter', c_uint64),
# Average clocks
('average_gfxclk_frequency', c_uint16),
('average_socclk_frequency', c_uint16),
('average_uclk_frequency', c_uint16),
('average_vclk0_frequency', c_uint16),
('average_dclk0_frequency', c_uint16),
('average_vclk1_frequency', c_uint16),
('average_dclk1_frequency', c_uint16),
# Current clocks
('current_gfxclk', c_uint16),
('current_socclk', c_uint16),
('current_uclk', c_uint16),
('current_vclk0', c_uint16),
('current_dclk0', c_uint16),
('current_vclk1', c_uint16),
('current_dclk1', c_uint16),
# Throttle status
('throttle_status', c_uint32),
# Fans
('current_fan_speed', c_uint16),
# Link width/speed
('pcie_link_width', c_uint16), # v1 mod.(8->16)
('pcie_link_speed', c_uint16), # in 0.1 GT/s; v1 mod. (8->16)
('padding', c_uint16), # new in v1
('gfx_activity_acc', c_uint32), # new in v1
('mem_actvity_acc', c_uint32), # new in v1
('temperature_hbm', c_uint16 * AMDSMI_NUM_HBM_INSTANCES) # new in v1
]
# This structure holds error counts.
class amdsmi_error_count_t (Structure):
_fields_ = [
('correctable_count', c_uint64), # Accumulated correctable errors
('uncorrectable_count', c_uint64) # Accumulated uncorrectable errors
]
# This structure holds pcie info.
class amdsmi_pcie_info_t (Structure):
_fields_ = [
('pcie_lanes', c_uint16),
('pcie_speed', c_uint16),
]
class amdsmi_process_info_t (Structure):
_fields_ = [
('process_id', c_uint32), # Process ID
('pasid', c_uint32), # PASID
('vram_usage', c_uint64), # VRAM usage
('sdma_usage', c_uint64), # SDMA usage in microseconds
('cu_occupancy', c_uint32), # Compute Unit usage in percent
]
# Opaque handle to function-support object
class amdsmi_func_id_iter_handle(Structure):
pass
amdsmi_func_id_iter_handle_t = POINTER(amdsmi_func_id_iter_handle)
# Place-holder "variant" for functions that have don't have any variants,
# but do have monitors or sensors.
AMDSMI_DEFAULT_VARIANT = 0xFFFFFFFFFFFFFFFF
class submodule_union(Union):
_fields_ = [
('memory_type', amdsmi_memory_type_t),
('temp_metric', amdsmi_temperature_metric_t),
('evnt_type', amdsmi_event_type_t),
('evnt_group', amdsmi_event_group_t),
('clk_type', amdsmi_clk_type_t),
('fw_block', amdsmi_fw_block_t),
('gpu_block_type', amdsmi_gpu_block_t),
]
class amdsmi_func_id_value_t (Union):
_fields_ = [
('id', c_uint64),
('name', c_char_p),
('submodule', submodule_union)
]
amd_id = amdsmi_func_id_value_t
-2
View File
@@ -259,8 +259,6 @@ int main() {
// Get device type. Since the amdsmi is initialized with
// AMD_SMI_INIT_AMD_GPUS, the device_type must be AMD_GPU.
device_type_t device_type = {};
std::cout << "Device Handle: " << device_handles[j] << std::endl;
ret = amdsmi_get_device_type(device_handles[j], &device_type);
CHK_AMDSMI_RET(ret)
if (device_type != AMD_GPU) {