Revert "Added AMD-SMI Linux Baremetal"
This reverts commit 013400bee7.
Reason for revert: Branch is still WIP
Change-Id: I75eec813b3d81049f033fe0a534251bd69eeca0e
This commit is contained in:
committed by
Dmitrii Galantsev
parent
ed8f865341
commit
6aa91da74c
@@ -1,96 +0,0 @@
|
||||
import logging
|
||||
import platform
|
||||
import re
|
||||
|
||||
|
||||
class BDF(object):
|
||||
""" BDF Class to cast and compare BDF objects using built-in python comparators
|
||||
|
||||
Useful for validating a BDF string and converting it to a BDF object
|
||||
This allows us to handle BDF objects in a pythonic way
|
||||
|
||||
Attributes:
|
||||
__eq__: The equals comparator
|
||||
__: An integer count of the eggs we have laid.
|
||||
"""
|
||||
|
||||
def __init__(self, bdf):
|
||||
"""Init a BDF object"""
|
||||
if isinstance(bdf, BDF):
|
||||
self.segment, self.bus, self.device, self.function = tuple(bdf)
|
||||
else:
|
||||
if bdf.startswith("BDF("):
|
||||
bdf = bdf.replace('BDF(', '').replace(')', '')
|
||||
# Tell if this is baremetal vs Virtualization
|
||||
self.operating_system = platform.system()
|
||||
|
||||
try:
|
||||
bdf_components = [int(x, 16) for x in re.split('[:.]', bdf)]
|
||||
except ValueError as e:
|
||||
logging.error(f"Invalid string passed: {bdf}")
|
||||
raise e
|
||||
|
||||
self.segment = bdf_components[0] if len(bdf_components) == 4 else 0
|
||||
self.bus, self.device, self.function = bdf_components[-3:]
|
||||
if self.segment > 65535:
|
||||
raise ValueError("BDF Segment can't be greater than 65535")
|
||||
if self.bus > 255:
|
||||
raise ValueError("BDF Bus can't be greater than 255")
|
||||
if self.device > 31:
|
||||
raise ValueError("BDF Device can't be greater than 31")
|
||||
if self.function > 7:
|
||||
raise ValueError("BDF Function can't be greater than 7")
|
||||
|
||||
def __eq__(self, passed_bdf):
|
||||
"""Overrides the == operator and allows for BDF objects to be compared to BDF strings"""
|
||||
|
||||
# Only accept strings and BDF objects
|
||||
if isinstance(passed_bdf, str):
|
||||
if passed_bdf == '':
|
||||
return False
|
||||
passed_bdf = BDF(passed_bdf)
|
||||
elif not isinstance(passed_bdf, BDF):
|
||||
return False
|
||||
|
||||
if self.segment == passed_bdf.segment and \
|
||||
self.bus == passed_bdf.bus and \
|
||||
self.device == passed_bdf.device and \
|
||||
self.function == passed_bdf.function:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def __ne__(self, passed_bdf):
|
||||
"""Overrides the != operator and allows for BDF objects to be compared to BDF strings"""
|
||||
# Since we overrided the == operator we can use that to make this simple
|
||||
return not self == passed_bdf
|
||||
|
||||
def __add__(self, passed_bdf):
|
||||
"""Overrides the + operator and allows for concatenation"""
|
||||
return str(self) + passed_bdf
|
||||
|
||||
def __radd__(self, passed_bdf):
|
||||
"""Overrides the + operator and allows for concatenation"""
|
||||
return passed_bdf + str(self)
|
||||
|
||||
def __str__(self):
|
||||
"""Cast BDF object to a string"""
|
||||
return "{:04X}:{:02X}:{:02X}:{}".format(self.segment, self.bus, self.device, self.function)
|
||||
|
||||
def __repr__(self):
|
||||
"""How the BDF object is represented"""
|
||||
return f"BDF({self})"
|
||||
|
||||
def __iter__(self):
|
||||
"""Make the BDF object iterable over its 4 values"""
|
||||
yield from (self.segment, self.bus, self.device, self.function)
|
||||
|
||||
def __contains__(self, passed_bdf):
|
||||
"""Overrided the 'in' comparator in python"""
|
||||
passed_bdf = str(BDF(passed_bdf))
|
||||
|
||||
bdf_regex = "(?:[0-6]?[0-9a-fA-F]{1,4}:)?[0-2]?[0-9a-fA-F]{1,2}:[0-9a-fA-F]{1,2}\.[0-7]"
|
||||
for match in re.findall(bdf_regex, passed_bdf):
|
||||
if self == match:
|
||||
return True
|
||||
return False
|
||||
@@ -1 +0,0 @@
|
||||
__version__ = "0.0.1"
|
||||
@@ -1,31 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# from amd_smi_init import *
|
||||
|
||||
from amd_smi_commands import AMD_SMI_Commands
|
||||
from amd_smi_parser import AMD_SMI_Parser
|
||||
|
||||
|
||||
# sudo /src/out/ubuntu-20.04/20.04/bin/rocm-smi -bc --json | python -m json.tool
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
amd_smi_commands = AMD_SMI_Commands()
|
||||
amd_smi_parser = AMD_SMI_Parser(amd_smi_commands.version,
|
||||
amd_smi_commands.discovery,
|
||||
amd_smi_commands.static,
|
||||
amd_smi_commands.firmware,
|
||||
amd_smi_commands.bad_pages,
|
||||
amd_smi_commands.metric,
|
||||
amd_smi_commands.process,
|
||||
amd_smi_commands.profile,
|
||||
amd_smi_commands.event,
|
||||
amd_smi_commands.topology,
|
||||
amd_smi_commands.set_value,
|
||||
amd_smi_commands.reset,
|
||||
amd_smi_commands.misc,
|
||||
amd_smi_commands.gpu_v)
|
||||
|
||||
args = amd_smi_parser.parse_args()
|
||||
args.func(args) # This needs to be there to handle subparsers with no subcommands
|
||||
# AMDSMI logger print out json, csv, or string
|
||||
@@ -1,91 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import platform
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# from amd_smi_init import *
|
||||
from BDF import BDF
|
||||
from _version import __version__
|
||||
|
||||
from amd_smi_logger import AMD_SMI_Logger
|
||||
|
||||
|
||||
|
||||
class AMD_SMI_Commands(object):
|
||||
# def __init__(self, amd_smi_logger) -> None:
|
||||
# logger = amd_smi_logger
|
||||
# # Make an AMD-SMI-Object-Logger only with the commands object on init
|
||||
# # Call the logger when we want to store a print:
|
||||
# # self.logger.store_output(gpu_id, string) # store in ordered dict
|
||||
# Every function prints the logger at the end
|
||||
# logger.printoutput(args.json, args.csv) # Which in Logger handles and checks for json or csv
|
||||
# Check if init can accept args given, if so then init can be used to call watch functions for looping
|
||||
|
||||
|
||||
def version(self, args):
|
||||
kernel_version = 123
|
||||
print(f'AMD-SMI version: {__version__} | Kernel version: {kernel_version}')
|
||||
|
||||
|
||||
def discovery(self, args):
|
||||
print('discovery test')
|
||||
|
||||
|
||||
def static(self, args):
|
||||
#This is where the arg handling comes through
|
||||
print(args.asic)
|
||||
print(args.bus)
|
||||
print(args.driver)
|
||||
print('static test')
|
||||
|
||||
|
||||
def firmware(self, args):
|
||||
print('firmware test')
|
||||
|
||||
|
||||
def bad_pages(self, args):
|
||||
# Retired Pages
|
||||
print('Bad Pages test')
|
||||
|
||||
|
||||
def metric(self, args):
|
||||
print('Metric test')
|
||||
|
||||
|
||||
def process(self, args):
|
||||
print('Process Test')
|
||||
|
||||
|
||||
def profile(self, args):
|
||||
print('Profile test')
|
||||
|
||||
|
||||
def event(self, args):
|
||||
print('event test')
|
||||
|
||||
|
||||
def topology(self, args):
|
||||
print('topology test')
|
||||
|
||||
|
||||
def set_value(self, args):
|
||||
print('set_value test')
|
||||
|
||||
|
||||
def reset(self, args):
|
||||
print('reset test')
|
||||
|
||||
|
||||
def misc(self, args):
|
||||
print('misc test')
|
||||
|
||||
|
||||
def gpu_v(self, args):
|
||||
print('misc test')
|
||||
|
||||
@@ -1,151 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import platform
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import logging
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from BDF import BDF
|
||||
from amd_smi_init import *
|
||||
|
||||
|
||||
class AMD_SMI_Helpers(object):
|
||||
def __init__(self) -> None:
|
||||
# implement basic config for debug logs
|
||||
self.operating_system = platform.system()
|
||||
self._is_hypervisor = False
|
||||
self._is_virtual_os = False
|
||||
self._is_baremetal = False
|
||||
self._is_linux = False
|
||||
self._is_windows = False
|
||||
|
||||
self.virtual_operating_systems_product_names = ['KVM', 'VirtualBox', 'VMware'] #@TODO get KVM example
|
||||
|
||||
if self.operating_system.startswith('Linux'):
|
||||
self._is_linux = True
|
||||
# logging.debug(f'whatever:{self._is_linux}')
|
||||
# KVM hypervisor check @TODO
|
||||
|
||||
product_name = ''
|
||||
product_name_path = Path('/sys/class/dmi/id/product_name')
|
||||
if product_name_path.exists():
|
||||
product_name = product_name_path.read_text().strip()
|
||||
|
||||
if product_name == '':
|
||||
# Unable to determine product_name default to baremetal
|
||||
self._is_baremetal = True
|
||||
else:
|
||||
for vm_os in self.virtual_operating_systems_product_names:
|
||||
if product_name.startswith(vm_os):
|
||||
# Log that this is a virtual OS
|
||||
self._is_virtual_os = True
|
||||
break
|
||||
|
||||
# The current way I determine if a system is baremetal by deduction of the other two arguments
|
||||
self._is_baremetal = not self._is_hypervisor and not self._is_virtual_os
|
||||
|
||||
|
||||
if self.operating_system.startswith('VMkernel'):
|
||||
self._is_hypervisor = True
|
||||
|
||||
|
||||
if self.operating_system.startswith('Window'):
|
||||
# Check Powershell for Hyper-V enablement
|
||||
self._is_windows = True
|
||||
|
||||
# Get-CimInstance -ClassName Win32_ComputerSystem Manufacturer
|
||||
|
||||
|
||||
# if self.product_name == '' and not self._is_hypervisor:
|
||||
# self._is_virtual_os = any(self.product_name.startswith(virtual_os) for virtual_os in self.virtual_operating_systems)
|
||||
|
||||
|
||||
# self.operating_system = ''
|
||||
|
||||
|
||||
def os_info(self):
|
||||
# Return OS info
|
||||
# operating_system =
|
||||
|
||||
|
||||
# if sys.platform.startswith('win'):
|
||||
|
||||
# elif sys.platform.startswith('linux'):
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def is_virtual_os(self):
|
||||
return self._is_virtual_os
|
||||
|
||||
|
||||
def is_hypervisor(self):
|
||||
# Returns True if hypervisor is enabled on the system
|
||||
return self._is_hypervisor
|
||||
|
||||
|
||||
def is_baremetal(self):
|
||||
# Returns True if system is baremetal, if system is hypervisor this should return False
|
||||
return self._is_baremetal
|
||||
|
||||
|
||||
|
||||
def is_linux(self):
|
||||
return self._is_linux
|
||||
|
||||
|
||||
def is_windows(self):
|
||||
return self._is_windows
|
||||
|
||||
|
||||
def get_gpu_choices(self):
|
||||
# Return in format {gpu_index : (BDF, UUID)}
|
||||
|
||||
gpu_choices = {}
|
||||
gpu_index = '1'
|
||||
gpu_bdf = BDF('0000:23:00.0')
|
||||
gpu_uuid = '1234'
|
||||
gpu_choices[gpu_index] = (gpu_bdf, gpu_uuid)
|
||||
return gpu_choices
|
||||
|
||||
|
||||
def get_devices(self):
|
||||
pass
|
||||
|
||||
|
||||
def get_device_from_socket(self):
|
||||
pass
|
||||
|
||||
|
||||
def get_amd_gpu_bdfs(self):
|
||||
pass
|
||||
|
||||
|
||||
def get_amd_cpu_bdfs(self):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
# def getBus(device):
|
||||
# """ Return the bus identifier of a given device
|
||||
|
||||
# @param device: DRM device identifier
|
||||
# """
|
||||
# bdfid = c_uint64(0)
|
||||
# ret = rocmsmi.rsmi_dev_pci_id_get(device, byref(bdfid))
|
||||
|
||||
# # BDFID = ((DOMAIN & 0xffffffff) << 32) | ((BUS & 0xff) << 8) |((DEVICE & 0x1f) <<3 ) | (FUNCTION & 0x7)
|
||||
# domain = (bdfid.value >> 32) & 0xffffffff
|
||||
# bus = (bdfid.value >> 8) & 0xff
|
||||
# device = (bdfid.value >> 3) & 0x1f
|
||||
# function = bdfid.value & 0x7
|
||||
|
||||
# pic_id = '{:04X}:{:02X}:{:02X}.{:0X}'.format(domain, bus, device, function)
|
||||
# if rsmi_ret_ok(ret, device):
|
||||
# return pic_id
|
||||
@@ -1,71 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
### Handle init singularly
|
||||
# Python imports module does not re-execute code on import
|
||||
|
||||
import atexit
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Handle bindings for windows, Hyper-v and KVM seperately
|
||||
from amdsmiBindings import *
|
||||
|
||||
# Using basic python logging for user errors and development
|
||||
# logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) # Logging for Development
|
||||
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.ERROR) # User level logging
|
||||
|
||||
# On initial import set initialized variable
|
||||
amd_smi_initialized = False
|
||||
|
||||
def check_return(return_code, error_statment): #@TODO would raising an exception be better?
|
||||
if return_code != amdsmi_status.AMDSMI_STATUS_SUCCESS:
|
||||
logging.error(error_statment)
|
||||
sys.exit(return_code)
|
||||
|
||||
|
||||
def check_amdgpu_driver(): #@TODO Handle KVM logic
|
||||
""" Returns true if amdgpu is found in the list of initialized modules """
|
||||
amd_gpu_status_file = Path("/sys/module/amdgpu/initstate")
|
||||
|
||||
if amd_gpu_status_file.exists():
|
||||
if amd_gpu_status_file.read_text().strip() == 'live':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def init_amd_smi(flag=amdsmi_init_flags.AMD_SMI_INIT_AMD_GPUS):
|
||||
""" Initializes AMD-SMI """
|
||||
# Check if amdgpu driver is up
|
||||
if check_amdgpu_driver():
|
||||
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
|
||||
init_status = amdsmi.amdsmi_init(flag)
|
||||
check_return(return_code=init_status, error_statment=f'AMD SMI initialization returned {init_status} (the expected value is {amdsmi_status_t.AMDSMI_STATUS_SUCCESS})')
|
||||
logging.info('amd-smi initialized successfully')
|
||||
else:
|
||||
logging.error('Driver not initialized (amdgpu not found in modules)')
|
||||
exit(-1)
|
||||
|
||||
|
||||
def amdsmi_shut_down():
|
||||
""" Shutdown AMD-SMI """
|
||||
# Only init AMD GPUs for now, waiting for future support for AMD CPUs
|
||||
shut_down_status = amdsmi.amdsmi_shut_down()
|
||||
check_return(return_code=shut_down_status, error_statment=f'AMD SMI Shutdown code returned {shut_down_status} (the expected value is {amdsmi_status_t.AMDSMI_STATUS_SUCCESS})')
|
||||
logging.debug('amd-smi shutdown successfully')
|
||||
|
||||
|
||||
def signal_handler(sig, frame):
|
||||
logging.debug(f'Handling signal: {sig}')
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if not amd_smi_initialized:
|
||||
init_amd_smi()
|
||||
amd_smi_initialized = True
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
atexit.register(amdsmi_shut_down)
|
||||
@@ -1,46 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
# import orderedDict
|
||||
import json
|
||||
|
||||
class AMD_SMI_Logger(object):
|
||||
def __init__(self) -> None:
|
||||
# self.card = {}
|
||||
# backwards compatability
|
||||
pass
|
||||
|
||||
def store_output(self, target_device, log):
|
||||
pass
|
||||
|
||||
def print_output(self, format=''):
|
||||
# JSON, CSV, text
|
||||
# split into 3 different formats
|
||||
# for elem in self.cards:
|
||||
# print pretty
|
||||
pass
|
||||
|
||||
def print_json(self):
|
||||
# json_data = '[{"ID":10,"Name":"Pankaj","Role":"CEO"},' \
|
||||
# '{"ID":20,"Name":"David Lee","Role":"Editor"}]'
|
||||
|
||||
# json_object = json.loads(json_data)
|
||||
|
||||
# json_formatted_str = json.dumps(json_object, indent=2)
|
||||
|
||||
# print(json_formatted_str)
|
||||
pass
|
||||
|
||||
def print_csv(self):
|
||||
# # Opening JSON file and loading the data
|
||||
# # into the variable data
|
||||
# with open('data.json') as json_file:
|
||||
# data = json.load(json_file)
|
||||
|
||||
# employee_data = data['emp_details']
|
||||
|
||||
# # now we will open a file for writing
|
||||
# data_file = open('data_file.csv', 'w')
|
||||
|
||||
# # create the csv writer object
|
||||
# csv_writer = csv.writer(data_file)
|
||||
pass
|
||||
@@ -1,97 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import platform
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import BDF
|
||||
from amd_smi_init import *
|
||||
|
||||
class AMD_SMI_Modules(object):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def get_socket_handles(self):
|
||||
### Returns tuple of (int, list of ctypes: socket_handles)
|
||||
socket_count = c_uint32(0)
|
||||
return_code = amdsmi.amdsmi_get_socket_handles(byref(socket_count), None)
|
||||
check_return(return_code=return_code, error_statment="Invalid get_socket_handles request")
|
||||
|
||||
sockets = [0] * socket_count.value # 1
|
||||
socket_handles = (c_void_p * socket_count.value)(*sockets) # That is a pointer, not a multiplication
|
||||
return_code = amdsmi.amdsmi_get_socket_handles(byref(socket_count), socket_handles)
|
||||
check_return(return_code=return_code, error_statment=f"Invalid get_socket_handles with {socket_count.value} sockets")
|
||||
return (socket_count.value, socket_handles)
|
||||
|
||||
|
||||
def get_device_handles(self, socket_handle):
|
||||
"""Gets the Device Handles that are in the current socket"""
|
||||
### Returns tuple of (int, list of ctypes: device_handles)
|
||||
device_count = c_uint32(0)
|
||||
return_code = amdsmi.amdsmi_get_device_handles(socket_handle, byref(device_count), None)
|
||||
check_return(return_code=return_code, error_statment="Invalid get_device_handles request")
|
||||
|
||||
devices = [0] * device_count.value
|
||||
device_handles = (c_void_p * len(devices))(*devices)
|
||||
return_code = amdsmi.amdsmi_get_device_handles(socket_handle, byref(device_count), byref(device_handles))
|
||||
check_return(return_code=return_code, error_statment=f"Invalid get_device_handles with {device_count.value} devices")
|
||||
return (device_count.value, device_handles)
|
||||
|
||||
|
||||
def get_socket_info(self, socket_handle):
|
||||
""" Given a socket_handle, return the socket_info, which is just a BDF object"""
|
||||
socket_info = create_string_buffer(128) # createstringbuffer or something??? c_char_p
|
||||
return_code = amdsmi.amdsmi_get_socket_info(socket_handle, byref(socket_info), c_size_t(128))
|
||||
check_return(return_code=return_code, error_statment="Invalid get_socket_info request")
|
||||
socket_bdf = BDF.BDF(socket_info.value.decode())
|
||||
return(socket_bdf)
|
||||
|
||||
|
||||
def get_device_type(self, device_handle, format=True):
|
||||
# format: True for string; False for int
|
||||
# Returns device_type string for the given device_handle
|
||||
dev_type = c_int(0)
|
||||
return_code = amdsmi.amdsmi_get_device_type(device_handle, byref(dev_type))
|
||||
check_return(return_code=return_code, error_statment="Invalid get_device_type request")
|
||||
|
||||
if format == True: # Return string
|
||||
return device_type__enumvalues[dev_type.value]
|
||||
|
||||
return dev_type.value # Return int
|
||||
|
||||
|
||||
def get_device_bdf(self, device_handle):
|
||||
|
||||
# class amdsmi_bdf_t (Union):
|
||||
# _fields_ = [
|
||||
# ('bdf_submodule', bdf_submodule),
|
||||
# ('as_uint', c_uint64)
|
||||
# ]
|
||||
bdf = amdsmi_bdf_t()
|
||||
# bdf.bdf_submodule
|
||||
|
||||
|
||||
|
||||
return_code = amdsmi.amdsmi_get_device_bdf(device_handle, bdf)
|
||||
check_return(return_code=return_code, error_statment="Invalid amdsmi_get_device_bdf request")
|
||||
return (bdf)
|
||||
|
||||
|
||||
def get_device_handle_from_bdf(self, bdf):
|
||||
pass
|
||||
|
||||
|
||||
def get_fan_speed(self, bdf):
|
||||
pass
|
||||
|
||||
def show_retired_pages(self):
|
||||
# num_pages = c_uint32()
|
||||
# records = rsmi_retired_page_record_t()
|
||||
pass
|
||||
@@ -1,619 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import platform
|
||||
|
||||
from _version import __version__
|
||||
from amd_smi_helpers import AMD_SMI_Helpers
|
||||
|
||||
# sudo /src/out/ubuntu-20.04/20.04/bin/rocm-smi -bc --json | python -m json.tool
|
||||
|
||||
class AMD_SMI_Parser(argparse.ArgumentParser):
|
||||
|
||||
def __init__(self, version, discovery, static, firmware, bad_pages, metric,
|
||||
process, profile, event,topology, set_value, reset, misc, gpu_v):
|
||||
|
||||
# Helper variables
|
||||
self.amd_smi_helpers = AMD_SMI_Helpers()
|
||||
self.gpu_choices = self.amd_smi_helpers.get_gpu_choices()
|
||||
self.vf_choices = ['3','2','1']
|
||||
|
||||
# Adjust argument parser options
|
||||
super().__init__(
|
||||
formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90),
|
||||
# formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
||||
description=f'AMD System Management Interface | Version: {__version__}', #@TODO add the enviornment
|
||||
add_help=True,
|
||||
prog='amd-smi')
|
||||
|
||||
# Setup subparsers
|
||||
subparsers = self.add_subparsers(
|
||||
title="AMD-SMI Commands",
|
||||
parser_class=argparse.ArgumentParser,
|
||||
required=True,
|
||||
help='Descriptions:',
|
||||
# dest='cmd',
|
||||
metavar="")
|
||||
|
||||
# Add all subparsers
|
||||
# Add --json, --csv,--file,--loglevel, watch, watch_time, & iterations && backwards compatability --gpuvsmi --rocmsmi
|
||||
self.add_version_parser(subparsers, version)
|
||||
self.add_discovery_parser(subparsers, discovery)
|
||||
self.add_static_parser(subparsers, static)
|
||||
self.add_firmware_parser(subparsers, firmware)
|
||||
self.add_bad_pages_parser(subparsers, bad_pages)
|
||||
self.add_metric_parser(subparsers, metric)
|
||||
self.add_process_parser(subparsers, process)
|
||||
self.add_profile_parser(subparsers, profile)
|
||||
self.add_event_parser(subparsers, event)
|
||||
self.add_topology_parser(subparsers, topology)
|
||||
# self.add_set_value_parser(subparsers, set_value)
|
||||
self.add_reset_parser(subparsers, reset)
|
||||
self.add_misc_parser(subparsers, misc)
|
||||
self.add_gpu_v_parser(subparsers, misc)
|
||||
|
||||
|
||||
def add_version_parser(self, subparsers, func):
|
||||
# Subparser help text
|
||||
version_help = "Display version information"
|
||||
|
||||
# Create version subparser
|
||||
version_parser = subparsers.add_parser('version', help=version_help, description=None)
|
||||
version_parser._optionals.title = None
|
||||
version_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
version_parser.set_defaults(func=func)
|
||||
|
||||
|
||||
def add_discovery_parser(self, subparsers, func):
|
||||
# Subparser help text
|
||||
discovery_help = "Display discovery information"
|
||||
discovery_subcommand_help = """Lists all the devices on the system and the links between devices.
|
||||
Lists all the sockets and for each socket, GPUs and/or CPUs associated to
|
||||
that socket alongside some basic information for each device.
|
||||
In virtualization environment, it can also list VFs associated to each
|
||||
GPU with some basic information for each VF."""
|
||||
|
||||
# Create discovery subparser
|
||||
discovery_parser = subparsers.add_parser('discovery', help=discovery_help, description=discovery_subcommand_help)
|
||||
discovery_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
discovery_parser.set_defaults(func=func)
|
||||
|
||||
|
||||
def add_static_parser(self, subparsers, func):
|
||||
# Subparser help text
|
||||
static_help = "Gets static information about the specified GPU"
|
||||
static_subcommand_help = """If no argument is provided, return static information for all GPUs on the system.
|
||||
If no static argument is specified all static information will be displayed."""
|
||||
static_optionals_title = "Static Arguments"
|
||||
|
||||
# Optional arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
vf_help = """Gets general information about the specified VF (timeslice, fb info, …).
|
||||
Available only on virtualization OSs"""
|
||||
asic_help = "All asic information"
|
||||
bus_help = "All bus information"
|
||||
vbios_help = "All video bios information (if available)"
|
||||
limit_help = "All limit metric values (i.e. power and thermal limits)"
|
||||
driver_help = "Displays driver version"
|
||||
caps_help = "All caps information"
|
||||
|
||||
# Options arguments help text for Hypervisors and Baremetal
|
||||
ras_help = "Displays RAS features information"
|
||||
board_help = "All board information" # Linux Baremetal only @TODO is applicable to Azure
|
||||
|
||||
# Options arguments help text for Hypervisors
|
||||
dfc_help = "All DFC FW table information"
|
||||
fb_help = "Displays Frame Buffer information"
|
||||
num_vf_help = "Displays number of supported and enabled VFs"
|
||||
|
||||
# Create static subparser
|
||||
static_parser = subparsers.add_parser('static', help=static_help, description=static_subcommand_help)
|
||||
static_parser._optionals.title = static_optionals_title
|
||||
static_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
static_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = static_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
static_parser.add_argument('-a', '--asic', action='store_true', required=False, help=asic_help)
|
||||
static_parser.add_argument('-b', '--bus', action='store_true', required=False, help=bus_help)
|
||||
static_parser.add_argument('-v', '--vbios', action='store_true', required=False, help=vbios_help)
|
||||
static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help)
|
||||
static_parser.add_argument('-d', '--driver', action='store_true', required=False, help=driver_help)
|
||||
static_parser.add_argument('-c', '--caps', action='store_true', required=False, help=caps_help)
|
||||
|
||||
# Options to display on Hypervisors and Baremetal
|
||||
if self.amd_smi_helpers.is_hypervisor() or self.amd_smi_helpers.is_baremetal():
|
||||
static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help)
|
||||
if self.amd_smi_helpers.is_linux(): #@TODO Check if applicable to Azure
|
||||
static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help)
|
||||
|
||||
# Options to only display on a Hypervisor
|
||||
if self.amd_smi_helpers.is_hypervisor():
|
||||
device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices)
|
||||
static_parser.add_argument('-du', '--dfc-ucode', action='store_true', required=False, help=dfc_help)
|
||||
static_parser.add_argument('-f', '--fb-info', action='store_true', required=False, help=fb_help)
|
||||
static_parser.add_argument('-n', '--num-vf', action='store_true', required=False, help=num_vf_help)
|
||||
|
||||
|
||||
def add_firmware_parser(self, subparsers, func):
|
||||
# Subparser help text
|
||||
firmware_help = "Gets firmware information about the specified GPU"
|
||||
firmware_subcommand_help = "If no argument is provided, return firmware information for all GPUs on the system."
|
||||
firmware_optionals_title = "Firmware Arguments"
|
||||
|
||||
# Optional arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
vf_help = """Gets general information about the specified VF (timeslice, fb info, …).
|
||||
Available only on virtualization OSs"""
|
||||
fw_list_help = "All FW list information"
|
||||
err_records_help = "All error records information"
|
||||
|
||||
# Create firmware subparser
|
||||
firmware_parser = subparsers.add_parser('firmware', help=firmware_help, description=firmware_subcommand_help)
|
||||
firmware_parser._optionals.title = firmware_optionals_title
|
||||
firmware_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
firmware_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = firmware_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
firmware_parser.add_argument('-f', '--fw-list', action='store_true', required=False, help=fw_list_help) # Redundant?
|
||||
|
||||
# Options to only display on a Hypervisor
|
||||
if self.amd_smi_helpers.is_hypervisor():
|
||||
device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices)
|
||||
firmware_parser.add_argument('-e', '--error-records', action='store_true', required=False, help=err_records_help)
|
||||
|
||||
|
||||
def add_bad_pages_parser(self, subparsers, func): #@TODO Retired pages?
|
||||
if not (self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
|
||||
# The bad_pages subcommand is only applicable to Linux Baremetal systems
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
bad_pages_help = "Gets bad page information about the specified GPU"
|
||||
bad_pages_subcommand_help = "If no argument is provided, return bad page information for all GPUs on the system."
|
||||
bad_pages_optionals_title = "Bad pages Arguments"
|
||||
|
||||
# Optional arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
pending_help = "Displays all pending retired pages"
|
||||
retired_help = "Displays retired pages" #@TODO Wording
|
||||
un_res_help = "Displays unreservable pages"
|
||||
|
||||
# Create bad_pages subparser
|
||||
bad_pages_parser = subparsers.add_parser('bad_pages', help=bad_pages_help, description=bad_pages_subcommand_help)
|
||||
bad_pages_parser._optionals.title = bad_pages_optionals_title
|
||||
bad_pages_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
bad_pages_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = bad_pages_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
bad_pages_parser.add_argument('-p', '--pending', action='store_true', required=False, help=pending_help)
|
||||
bad_pages_parser.add_argument('-r', '--retired', action='store_true', required=False, help=retired_help)
|
||||
bad_pages_parser.add_argument('-u', '--un-res', action='store_true', required=False, help=un_res_help)
|
||||
|
||||
|
||||
def add_metric_parser(self, subparsers, func):
|
||||
# Subparser help text
|
||||
metric_help = "Gets metric/performance information about the specified GPU"
|
||||
metric_subcommand_help = """If no argument is provided, return metric information for all GPUs on the system.
|
||||
If no metric argument is specified all metric information will be displayed."""
|
||||
metric_optionals_title = "Metric arguments"
|
||||
|
||||
# Optional arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
vf_help = """Gets general information about the specified VF (timeslice, fb info, …).
|
||||
Available only on virtualization OSs"""
|
||||
usage_help = "All metrics usage information"
|
||||
|
||||
# Help text for Arguments only Available on Virtual OS and Baremetal platforms
|
||||
fb_usage_help = "Total and used framebuffer"
|
||||
|
||||
# Help text for Arguments only on Hypervisor and Baremetal platforms
|
||||
power_help = "Current power usage"
|
||||
clock_help = "Average, max, and current clock frequencies"
|
||||
temperature_help = "Current temperatures"
|
||||
ecc_help = "Number of ECC errors"
|
||||
pcie_help = "Current PCIe speed and width"
|
||||
voltage_help = "Current GPU voltages"
|
||||
|
||||
# Help text for Arguments only on Linux Baremetal platforms
|
||||
fan_help = "Current fan speed"
|
||||
pcie_usage_help = "Estimated PCIe link usage"
|
||||
vc_help = "Display voltage curve"
|
||||
overdrive_help = "Current GPU clock overdrive level"
|
||||
mo_help = "Current memory clock overdrive level"
|
||||
perf_level_help = "Current DPM performance level"
|
||||
replay_count_help = "PCIe replay count"
|
||||
xgmi_err_help = "XGMI error information since last read"
|
||||
energy_help = "Amount of energy consumed" #@TODO ? Available only on host Linux Baremetal platforms
|
||||
|
||||
# Help text for Arguments only on Hypervisors
|
||||
schedule_help = "All scheduling information"
|
||||
guard_help = "All guard information"
|
||||
guest_help = "All guest data information"
|
||||
|
||||
# Create metric subparser
|
||||
metric_parser = subparsers.add_parser('metric', help=metric_help, description=metric_subcommand_help)
|
||||
metric_parser._optionals.title = metric_optionals_title
|
||||
metric_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
metric_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = metric_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
metric_parser.add_argument('-u', '--usage', action='store_true', required=False, help=usage_help)
|
||||
|
||||
# Optional Args for Virtual OS and Baremetal systems
|
||||
if self.amd_smi_helpers.is_virtual_os() or self.amd_smi_helpers.is_baremetal():
|
||||
metric_parser.add_argument('-b', '--fb-usage', action='store_true', required=False, help=fb_usage_help)
|
||||
|
||||
# Optional Args for Hypervisors and Baremetal systems
|
||||
if self.amd_smi_helpers.is_hypervisor() or self.amd_smi_helpers.is_baremetal():
|
||||
metric_parser.add_argument('-p', '--power', action='store_true', required=False, help=power_help)
|
||||
metric_parser.add_argument('-c', '--clock', action='store_true', required=False, help=clock_help)
|
||||
metric_parser.add_argument('-t', '--temperature', action='store_true', required=False, help=temperature_help)
|
||||
metric_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help)
|
||||
metric_parser.add_argument('-P', '--pcie', action='store_true', required=False, help=pcie_help)
|
||||
metric_parser.add_argument('-v', '--voltage', action='store_true', required=False, help=voltage_help)
|
||||
|
||||
# Optional Args for Linux Baremetal Systems #@TODO Discuss logic if Linux Hypervisors would be allowed to have this
|
||||
if self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux():
|
||||
metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help)
|
||||
metric_parser.add_argument('-s', '--pcie-usage', action='store_true', required=False, help=pcie_usage_help)
|
||||
metric_parser.add_argument('-V', '--voltage-curve', action='store_true', required=False, help=vc_help)
|
||||
metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help)
|
||||
metric_parser.add_argument('-m', '--mem-overdrive', action='store_true', required=False, help=mo_help)
|
||||
metric_parser.add_argument('-l', '--perf-level', action='store_true', required=False, help=perf_level_help)
|
||||
metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help)
|
||||
metric_parser.add_argument('-x', '--xgmi-err', action='store_true', required=False, help=xgmi_err_help)
|
||||
metric_parser.add_argument('-E', '--energy', action='store_true', required=False, help=energy_help)
|
||||
|
||||
# Options to only display to Hypervisors
|
||||
if self.amd_smi_helpers.is_hypervisor():
|
||||
device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices)
|
||||
metric_parser.add_argument('-s', '--schedule', action='store_true', required=False, help=schedule_help)
|
||||
metric_parser.add_argument('-g', '--guard', action='store_true', required=False, help=guard_help)
|
||||
metric_parser.add_argument('-G', '--guest', action='store_true', required=False, help=guest_help)
|
||||
|
||||
|
||||
def add_process_parser(self, subparsers, func):
|
||||
if self.amd_smi_helpers.is_hypervisor():
|
||||
# Don't add this subparser on Hypervisors
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
process_help = "Lists general process information running on the specified GPU"
|
||||
process_subcommand_help = """If no argument is provided, returns information for all GPUs on the system.
|
||||
If no argument is provided all process information will be displayed."""
|
||||
process_optionals_title = "Process arguments"
|
||||
|
||||
# Required arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
|
||||
# Help text for Arguments only on Guest and BM platforms
|
||||
general_help = "pid, process name, memory usage"
|
||||
engine_help = "All engine usages"
|
||||
pid_help = "Gets all process information about the specified process based on Process ID"
|
||||
name_help = """Gets all process information about the specified process based on Process Name.
|
||||
If multiple processes have the same name information is returned for all of them.""" #@TODO wording
|
||||
|
||||
# Create process subparser
|
||||
process_parser = subparsers.add_parser('process', help=process_help, description=process_subcommand_help)
|
||||
process_parser._optionals.title = process_optionals_title
|
||||
process_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
process_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = process_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
process_parser.add_argument('-g', '--general', action='store_true', required=False, help=general_help)
|
||||
process_parser.add_argument('-e', '--engine', action='store_true', required=False, help=engine_help)
|
||||
process_parser.add_argument('-p', '--pid', action='store', required=False, help=pid_help)
|
||||
process_parser.add_argument('-n', '--name', action='store', required=False, help=name_help)
|
||||
|
||||
|
||||
def add_profile_parser(self, subparsers, func):
|
||||
if not (self.amd_smi_helpers.is_windows() and self.amd_smi_helpers.is_hypervisor()):
|
||||
# This subparser only applies to Azure Hyper-V systems
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
profile_help = "Displays information about all profiles and current profile"
|
||||
profile_subcommand_help = "If no argument is provided, returns information for all GPUs on the system."
|
||||
profile_optionals_title = "Profile Arguments"
|
||||
|
||||
# Required arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
|
||||
# Create profile subparser
|
||||
profile_parser = subparsers.add_parser('profile', help=profile_help, description=profile_subcommand_help)
|
||||
profile_parser._optionals.title = profile_optionals_title
|
||||
profile_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
profile_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = profile_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
|
||||
def add_event_parser(self, subparsers, func):
|
||||
if self.amd_smi_helpers.is_linux() and not self.amd_smi_helpers.is_virtual_os():
|
||||
# This subparser only applies to Linux BareMetal & Linux Hypervisors
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
event_help = "Displays event information for the given GPU"
|
||||
event_subcommand_help = "If no argument is provided, returns event information for all GPUs on the system."
|
||||
event_optionals_title = "Event Arguments"
|
||||
|
||||
# Required arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
|
||||
# Create event subparser
|
||||
event_parser = subparsers.add_parser('event', help=event_help, description=event_subcommand_help)
|
||||
event_parser._optionals.title = event_optionals_title
|
||||
event_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
event_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = event_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
|
||||
def add_topology_parser(self, subparsers, func):
|
||||
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
topology_help = "Displays topology information of the devices."
|
||||
topology_subcommand_help = "If no argument is provided, returns information for all GPUs on the system."
|
||||
topology_optionals_title = "Topology arguments"
|
||||
|
||||
# Required arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
|
||||
# Help text for Arguments only on Guest and BM platforms
|
||||
topo_access_help = "Displays link accessibility between GPUs"
|
||||
topo_weight_help = "Displays relative weight between GPUs"
|
||||
topo_hops_help = "Displays the number of hops between GPUs"
|
||||
topo_type_help = "Displays the link type between GPUs."
|
||||
topo_numa_help = "Displays the numa nodes."
|
||||
|
||||
# Create topology subparser
|
||||
topology_parser = subparsers.add_parser('topology', help=topology_help, description=topology_subcommand_help)
|
||||
topology_parser._optionals.title = topology_optionals_title
|
||||
topology_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
topology_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = topology_parser.add_mutually_exclusive_group()
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
topology_parser.add_argument('-a', '--topo-access', action='store_true', required=False, help=topo_access_help)
|
||||
topology_parser.add_argument('-w', '--topo-weight', action='store_true', required=False, help=topo_weight_help)
|
||||
topology_parser.add_argument('-o', '--topo-hops', action='store_true', required=False, help=topo_hops_help)
|
||||
topology_parser.add_argument('-t', '--topo-type', action='store_true', required=False, help=topo_type_help)
|
||||
topology_parser.add_argument('-n', '--topo-numa', action='store_true', required=False, help=topo_numa_help)
|
||||
|
||||
|
||||
def add_set_value_parser(self, subparsers, func):
|
||||
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
set_value_help = "Set options for devices."
|
||||
set_value_subcommand_help = "The user must specify one of the options for the set configuration."
|
||||
set_value_optionals_title = "Set Arguments"
|
||||
|
||||
# Required arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
|
||||
# Help text for Arguments only on Guest and BM platforms
|
||||
set_clk_help = "Sets clock frequency levels for specified clocks"
|
||||
set_sclk_help = "Sets GPU clock frequency levels"
|
||||
set_mclk_help = "Sets memory clock frequency levels"
|
||||
set_pcie_help = "Sets PCIe clock frequency levels"
|
||||
set_slevel_help = "Change GPU clock frequency and voltage for a specific level"
|
||||
set_mlevel_help = "Change GPU memory frequency and voltage for a specific level"
|
||||
set_vc_help = "Change SCLK voltage curve for a specified point"
|
||||
set_srange_help = "Sets min and max SCLK speed"
|
||||
set_mrange_help = "Sets min and max MCLK speed"
|
||||
set_fan_help = "Sets GPU fan speed (level or %)"
|
||||
set_perf_level_help = "Sets performance level"
|
||||
set_overdrive_help = "Set GPU overdrive level"
|
||||
set_mem_overdrive_help = "Set memory overclock overdrive level"
|
||||
set_power_overdrive_help = "Set the maximum GPU power using power overdrive in Watts"
|
||||
set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes"
|
||||
set_perf_det_help = "Set GPU clock frequency limit to get minimal performance variation"
|
||||
ras_enable_help = "Enable RAS for specified block and error type"
|
||||
ras_disable_help = "Disable RAS for specified block and error type."
|
||||
ras_inject_help = "Inject RAS poison for specified block"
|
||||
|
||||
# -c, --setclk <type> <level>
|
||||
# .
|
||||
# -s, --setsclk <level>
|
||||
# .
|
||||
# -m, --setmclk <type> <level>
|
||||
# .
|
||||
# -p, --setpcie <level>
|
||||
# .
|
||||
# -S, --setslevel <sclk_level> <sclk> <svolt>
|
||||
# .
|
||||
# -M, --setmlevel <mclk_level> <mclk> <mvolt>
|
||||
# .
|
||||
# -v, --setvc <point> <sclk> <svolt>
|
||||
# .
|
||||
# -r, --setsrange <sclk_min> <sclk_max>
|
||||
#
|
||||
# -R, --setmrange <mclk_min> <mclk_max>
|
||||
# .
|
||||
# -f, --setfan <level>
|
||||
#
|
||||
# -pl, --setperflevel <level>
|
||||
#
|
||||
# -o, --setoverdrive %
|
||||
# Set GPU overdrive level.
|
||||
# -O, --setmemoverdrive %
|
||||
# Set memory overclock overdrive level.
|
||||
# -po, --setpoweroverdrive <power>
|
||||
# Set the maximum GPU power using power overdrive in Watts.
|
||||
# -P, --setprofile <profile>
|
||||
# Set power profile level (#) or a quoted string of custom profile attributes (“ # # # # “)
|
||||
# -pd, --setperfdet <sclk>
|
||||
# Set GPU clock frequency limit to get minimal performance variation.
|
||||
# -re, --rasenable <block> <err_type>
|
||||
# Enable RAS for specified block and error type.
|
||||
# -rd, --rasdisable <block> <err_type>
|
||||
# Disable RAS for specified block and error type.
|
||||
# -ri, --rasinject <block>
|
||||
# Inject RAS poison for specified block
|
||||
|
||||
# Create set_value subparser
|
||||
set_value_parser = subparsers.add_parser('set', help=set_value_help, description=set_value_subcommand_help)
|
||||
set_value_parser._optionals.title = set_value_optionals_title
|
||||
set_value_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
set_value_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = set_value_parser.add_mutually_exclusive_group(required=True)
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
set_value_parser.add_argument('-c', '--setclk', action='store', required=False, help=topo_access_help)
|
||||
set_value_parser.add_argument('-s', '--topo-weight', action='store', required=False, help=topo_weight_help)
|
||||
set_value_parser.add_argument('-m', '--topo-hops', action='store', required=False, help=topo_hops_help)
|
||||
set_value_parser.add_argument('-p', '--topo-type', action='store', required=False, help=topo_type_help)
|
||||
set_value_parser.add_argument('-S', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-M', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-v', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-r', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-R', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-f', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-pl', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-o' '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-O', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-po', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-P', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-pd', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-re', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-rd', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
set_value_parser.add_argument('-ri', '--topo-numa', action='store', required=False, help=topo_numa_help)
|
||||
|
||||
|
||||
def add_reset_parser(self, subparsers, func):
|
||||
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
reset_help = "Reset options for devices."
|
||||
reset_subcommand_help = "The user must specify one of the options to reset devices."
|
||||
reset_optionals_title = "Reset Arguments"
|
||||
|
||||
# Required arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
|
||||
# Help text for Arguments only on Guest and BM platforms
|
||||
gpureset_help = "Reset the specified GPU"
|
||||
resetclk_help = "Reset clocks and overdrive to default"
|
||||
resetfans_help = "Reset fans to automatic (driver) control"
|
||||
resetprofile_help = "Reset power profile back to default"
|
||||
resetpoweroverdrive_help = "Set the maximum GPU power back to the device default state"
|
||||
resetxgmierr_help = "Reset XGMI error counts"
|
||||
resetperfdet_help = "Disable performance determinism"
|
||||
|
||||
# Create reset subparser
|
||||
reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help)
|
||||
reset_parser._optionals.title = reset_optionals_title
|
||||
reset_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
reset_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = reset_parser.add_mutually_exclusive_group(required=True)
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
reset_parser.add_argument('-g', '--gpureset', action='store_true', required=False, help=gpureset_help)
|
||||
reset_parser.add_argument('-c', '--resetclk', action='store_true', required=False, help=resetclk_help)
|
||||
reset_parser.add_argument('-f', '--resetfans', action='store_true', required=False, help=resetfans_help)
|
||||
reset_parser.add_argument('-p', '--resetprofile', action='store_true', required=False, help=resetprofile_help)
|
||||
reset_parser.add_argument('-o', '--resetpoweroverdrive', action='store_true', required=False, help=resetpoweroverdrive_help)
|
||||
reset_parser.add_argument('-x', '--resetxgmierr', action='store_true', required=False, help=resetxgmierr_help)
|
||||
reset_parser.add_argument('-d', '--resetperfdet', action='store_true', required=False, help=resetperfdet_help)
|
||||
|
||||
|
||||
def add_misc_parser(self, subparsers, func):
|
||||
if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
|
||||
return
|
||||
|
||||
# Subparser help text
|
||||
misc_help = "The miscellaneous options"
|
||||
misc_subcommand_help = "The user must specify one of the options to reset devices."
|
||||
misc_optionals_title = "Misc Arguments"
|
||||
|
||||
# Optional arguments help text
|
||||
gpu_help = "Select a GPU from the possible choices"
|
||||
load_help = "Load clock, fan, performance, and profile settings from a given file."
|
||||
save_help = "Save clock, fan, performance, and profile settings to a given file."
|
||||
|
||||
# Create misc subparser
|
||||
misc_parser = subparsers.add_parser('misc', help=misc_help, description=misc_subcommand_help)
|
||||
misc_parser._optionals.title = misc_optionals_title
|
||||
misc_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
misc_parser.set_defaults(func=func)
|
||||
|
||||
# Mutually Exclusive Args within the subparser
|
||||
device_args = misc_parser.add_mutually_exclusive_group(required=True)
|
||||
device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# Optional Args
|
||||
misc_parser.add_argument('-l', '--load', action='store', type=open, required=False, help=load_help)
|
||||
misc_parser.add_argument('-s', '--save', action='store', type=open, required=False, help=save_help)
|
||||
|
||||
|
||||
# def add_gpu_v_parser(self, subparsers, func):
|
||||
# if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()):
|
||||
# # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work
|
||||
# return
|
||||
|
||||
# # Subparser help text
|
||||
# gpu_v_help = "The gpu_v options"
|
||||
# gpu_v_subcommand_help = "The user must specify one of the options to reset devices."
|
||||
# gpu_v_optionals_title = "gpu_v Arguments"
|
||||
|
||||
# # Optional arguments help text
|
||||
# gpu_help = "Select a GPU from the possible choices"
|
||||
# load_help = "Load clock, fan, performance, and profile settings from a given file."
|
||||
# save_help = "Save clock, fan, performance, and profile settings to a given file."
|
||||
|
||||
# # Create gpu_v subparser
|
||||
# gpu_v_parser = subparsers.add_parser('gpu_v', help=gpu_v_help, description=gpu_v_subcommand_help)
|
||||
# gpu_v_parser._optionals.title = gpu_v_optionals_title
|
||||
# gpu_v_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90)
|
||||
# gpu_v_parser.set_defaults(func=func)
|
||||
|
||||
# # Mutually Exclusive Args within the subparser
|
||||
# device_args = gpu_v_parser.add_mutually_exclusive_group(required=True)
|
||||
# device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices)
|
||||
|
||||
# # Optional Args
|
||||
# gpu_v_parser.add_argument('-l', '--load', action='store', type=open, required=False, help=load_help)
|
||||
# gpu_v_parser.add_argument('-s', '--save', action='store', type=open, required=False, help=save_help)
|
||||
@@ -1,818 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""! @brief AMD_SMI FFI"""
|
||||
|
||||
from ctypes import *
|
||||
from enum import Enum
|
||||
import os
|
||||
|
||||
|
||||
path_amdsmi = '/opt/rocm/lib/libamd_smi64.so' #@TODO make this dynamic
|
||||
|
||||
try:
|
||||
cdll.LoadLibrary(path_amdsmi)
|
||||
amdsmi = CDLL(path_amdsmi)
|
||||
except OSError:
|
||||
print("Unable to load libamd_smi64.so library\n")
|
||||
exit(1)
|
||||
|
||||
##
|
||||
# @brief Initialization flags
|
||||
#
|
||||
# may be OR'd together and passed to smi.amdsmi_init()
|
||||
##
|
||||
|
||||
class amdsmi_init_flags(c_int):
|
||||
AMD_SMI_INIT_ALL_DEVICES = 0x0 # Default option
|
||||
AMD_SMI_INIT_AMD_CPUS = (1 << 0)
|
||||
AMD_SMI_INIT_AMD_GPUS = (1 << 1)
|
||||
AMD_SMI_INIT_NON_AMD_CPUS = (1 << 2)
|
||||
AMD_SMI_INIT_NON_AMD_GPUS = (1 << 3)
|
||||
|
||||
# Maximum size definitions GPUVSMI
|
||||
AMDSMI_MAX_MM_IP_COUNT = 8
|
||||
AMDSMI_MAX_DATE_LENGTH = 32 # YYYY-MM-DD:HH:MM:SS.MSC #
|
||||
AMDSMI_MAX_STRING_LENGTH = 64
|
||||
AMDSMI_NORMAL_STRING_LENGTH = 32
|
||||
AMDSMI_MAX_DEVICES = 32
|
||||
AMDSMI_MAX_NAME = 32
|
||||
AMDSMI_MAX_DRIVER_VERSION_LENGTH = 80
|
||||
AMDSMI_PRODUCT_NAME_LENGTH = 128
|
||||
AMDSMI_MAX_CONTAINER_TYPE = 2
|
||||
|
||||
AMDSMI_GPU_UUID_SIZE = 38
|
||||
|
||||
|
||||
class amdsmi_mm_ip(c_int):
|
||||
MM_UVD = 0
|
||||
MM_VCE = 1
|
||||
MM_VCN = 2
|
||||
MM__MAX = 3
|
||||
|
||||
|
||||
class amdsmi_container_types(c_int):
|
||||
CONTAINER_LXC = 0
|
||||
CONTAINER_DOCKER = 1
|
||||
|
||||
# ! opaque handler point to underlying implementation
|
||||
amdsmi_device_handle = POINTER(c_uint)
|
||||
amdsmi_socket_handle = POINTER(c_uint)
|
||||
|
||||
class device_type(c_int):
|
||||
UNKNOWN = 0
|
||||
AMD_GPU = 1
|
||||
AMD_CPU = 2
|
||||
NON_AMD_GPU = 3
|
||||
NON_AMD_CPU = 4
|
||||
|
||||
device_type__enumvalues = {
|
||||
0: 'UNKNOWN',
|
||||
1: 'AMD_GPU',
|
||||
2: 'AMD_CPU',
|
||||
3: 'NON_AMD_GPU',
|
||||
4: 'NON_AMD_CPU',
|
||||
}
|
||||
|
||||
#Error codes retured by amd_smi_lib functions
|
||||
class amdsmi_status(c_int):
|
||||
AMDSMI_STATUS_SUCCESS = 0 # Call succeeded
|
||||
AMDSMI_STATUS_INVAL = 1 # Invalid parameters
|
||||
AMDSMI_STATUS_NOT_SUPPORTED = 2 # Command not supported
|
||||
AMDSMI_STATUS_FILE_ERROR = 3 # Problem accessing a file.
|
||||
AMDSMI_STATUS_NO_PERM = 4 # Permission Denied
|
||||
AMDSMI_STATUS_OUT_OF_RESOURCES = 5 # Not enough memory
|
||||
AMDSMI_STATUS_INTERNAL_EXCEPTION = 6 # An internal exception was caught
|
||||
AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS = 7 # The provided input is out of allowable or safe range
|
||||
AMDSMI_STATUS_INIT_ERROR = 8 # An error occurred when initializing internal data structures
|
||||
AMDSMI_STATUS_NOT_YET_IMPLEMENTED = 9 # Not implemented yet
|
||||
AMDSMI_STATUS_NOT_FOUND = 10 # Device Not found
|
||||
AMDSMI_STATUS_INSUFFICIENT_SIZE = 11 # Not enough resources were available for the operation
|
||||
AMDSMI_STATUS_INTERRUPT = 12 # An interrupt occurred during execution of function
|
||||
AMDSMI_STATUS_UNEXPECTED_SIZE = 13 # An unexpected amount of data was read
|
||||
AMDSMI_STATUS_NO_DATA = 14 # No data was found for a given input
|
||||
AMDSMI_STATUS_UNEXPECTED_DATA = 15 # The data read or provided to function is not what was expected
|
||||
AMDSMI_STATUS_BUSY = 16 # Device busy
|
||||
AMDSMI_STATUS_REFCOUNT_OVERFLOW = 17 # An internal reference counter exceeded INT32_MAX
|
||||
AMDSMI_LIB_START = 1000
|
||||
AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START # Fail to load lib
|
||||
AMDSMI_STATUS_FAIL_LOAD_SYMBOL = 1001
|
||||
AMDSMI_STATUS_DRM_ERROR = 1002 # Error when call libdrm
|
||||
AMDSMI_STATUS_IO = 1003 # Error
|
||||
AMDSMI_STATUS_FAULT = 1004 # Bad address
|
||||
AMDSMI_STATUS_API_FAILED = 1005 # API call failed
|
||||
AMDSMI_STATUS_TIMEOUT = 1006 # Timeout in API call
|
||||
AMDSMI_STATUS_NO_SLOT = 1007 # No more free slot
|
||||
AMDSMI_STATUS_RETRY = 1008 # Retry operation
|
||||
AMDSMI_STATUS_NOT_INIT = 1009 # Device not initialized
|
||||
AMDSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF # An unknown error occurred
|
||||
|
||||
amdsmi_status_t = amdsmi_status
|
||||
#Clock types
|
||||
class amdsmi_clk_type (c_int):
|
||||
CLK_TYPE_SYS = 0x0, # System clock
|
||||
CLK_TYPE_FIRST = CLK_TYPE_SYS
|
||||
CLK_TYPE_GFX = CLK_TYPE_SYS
|
||||
CLK_TYPE_DF = 0x1 # Data Fabric clock (for ASICs
|
||||
# running on a separate clock)
|
||||
CLK_TYPE_DCEF = 0x2 # Display Controller Engine clock
|
||||
CLK_TYPE_SOC = 0x3
|
||||
CLK_TYPE_MEM = 0x4
|
||||
CLK_TYPE_PCIE = 0x5
|
||||
CLK_TYPE_VCLK0 = 0x6
|
||||
CLK_TYPE_VCLK1 = 0x7
|
||||
CLK_TYPE_DCLK0 = 0x8
|
||||
CLK_TYPE_DCLK1 = 0x9
|
||||
CLK_TYPE__MAX = CLK_TYPE_DCLK1
|
||||
|
||||
amdsmi_clk_type_t = amdsmi_clk_type
|
||||
#This enumeration is used to indicate from which part of the device a
|
||||
# temperature reading should be obtained
|
||||
class amdsmi_temperature_type (c_int):
|
||||
TEMPERATURE_TYPE_EDGE = 0
|
||||
TEMPERATURE_TYPE_FIRST = TEMPERATURE_TYPE_EDGE
|
||||
TEMPERATURE_TYPE_JUNCTION = 1
|
||||
TEMPERATURE_TYPE_VRAM = 2
|
||||
TEMPERATURE_TYPE_HBM_0 = 3
|
||||
TEMPERATURE_TYPE_HBM_1 = 4
|
||||
TEMPERATURE_TYPE_HBM_2 = 5
|
||||
TEMPERATURE_TYPE_HBM_3 = 6
|
||||
TEMPERATURE_TYPE_PLX = 7
|
||||
TEMPERATURE_TYPE__MAX = TEMPERATURE_TYPE_PLX
|
||||
|
||||
#The values of this enum are used to identify the various firmware
|
||||
#blocks.
|
||||
class amdsmi_fw_block_t (c_int):
|
||||
FW_ID_SMU = 1
|
||||
FW_ID_FIRST = FW_ID_SMU
|
||||
FW_ID_CP_CE = 2
|
||||
FW_ID_CP_PFP = 3
|
||||
FW_ID_CP_ME = 4
|
||||
FW_ID_CP_MEC_JT1 = 5
|
||||
FW_ID_CP_MEC_JT2 = 6
|
||||
FW_ID_CP_MEC1 = 7
|
||||
FW_ID_CP_MEC2 = 8
|
||||
FW_ID_RLC = 9
|
||||
FW_ID_SDMA0 = 10
|
||||
FW_ID_SDMA1 = 11
|
||||
FW_ID_SDMA2 = 12
|
||||
FW_ID_SDMA3 = 13
|
||||
FW_ID_SDMA4 = 14
|
||||
FW_ID_SDMA5 = 15
|
||||
FW_ID_SDMA6 = 16
|
||||
FW_ID_SDMA7 = 17
|
||||
FW_ID_VCN = 18
|
||||
FW_ID_UVD = 19
|
||||
FW_ID_VCE = 20
|
||||
FW_ID_ISP = 21
|
||||
FW_ID_DMCU_ERAM = 22 # eRAM
|
||||
FW_ID_DMCU_ISR = 23 # ISR
|
||||
FW_ID_RLC_RESTORE_LIST_GPM_MEM = 24
|
||||
FW_ID_RLC_RESTORE_LIST_SRM_MEM = 25
|
||||
FW_ID_RLC_RESTORE_LIST_CNTL = 26
|
||||
FW_ID_RLC_V = 27
|
||||
FW_ID_MMSCH = 28
|
||||
FW_ID_PSP_SYSDRV = 29
|
||||
FW_ID_PSP_SOSDRV = 30
|
||||
FW_ID_PSP_TOC = 31
|
||||
FW_ID_PSP_KEYDB = 32
|
||||
FW_ID_DFC = 33
|
||||
FW_ID_PSP_SPL = 34
|
||||
FW_ID_DRV_CAP = 35
|
||||
FW_ID_MC = 36
|
||||
FW_ID_PSP_BL = 37
|
||||
FW_ID_CP_PM4 = 38
|
||||
FW_ID_ASD = 39
|
||||
FW_ID_TA_RAS = 40
|
||||
FW_ID_XGMI = 41
|
||||
FW_ID_RLC_SRLG = 42
|
||||
FW_ID_RLC_SRLS = 43
|
||||
FW_ID_SMC = 44
|
||||
FW_ID_DMCU = 45
|
||||
FW_ID__MAX = 46
|
||||
|
||||
#This structure represents a range (e.g., frequencies or voltages)
|
||||
|
||||
class amdsmi_range_t (Structure):
|
||||
_fields_ = [
|
||||
('lower_bound', c_uint64),
|
||||
('upper_bound', c_uint64),
|
||||
]
|
||||
|
||||
class amdsmi_xgmi_info_t (Structure):
|
||||
_fields_ = [
|
||||
('xgmi_lanes', c_uint8),
|
||||
('xgmi_hive_id', c_uint64),
|
||||
('xgmi_node_id', c_uint64),
|
||||
('index', c_uint32),
|
||||
]
|
||||
|
||||
#GPU Capability info
|
||||
|
||||
class gfx (Structure):
|
||||
_fields_ = [
|
||||
('gfxip_major', c_uint32),
|
||||
('gfxip_minor', c_uint32),
|
||||
('gfxip_cu_count', c_uint16)]
|
||||
|
||||
class mm (Structure):
|
||||
_fields_ = [
|
||||
('mm_ip_count', c_uint8),
|
||||
('mm_ip_list', c_uint8 * AMDSMI_MAX_MM_IP_COUNT)
|
||||
]
|
||||
class amdsmi_gpu_caps_t (Structure):
|
||||
_fields_ = [
|
||||
('gfx', gfx),
|
||||
('mm', mm),
|
||||
('ras_supported', c_bool),
|
||||
('max_vf_num', c_uint8),
|
||||
('gfx_ip_count', c_uint32),
|
||||
('dma_ip_count', c_uint32)
|
||||
]
|
||||
|
||||
class amdsmi_vram_info (Structure):
|
||||
_fields_ = [
|
||||
('vram_total', c_uint32),
|
||||
('vram_used', c_uint32),
|
||||
]
|
||||
|
||||
class amdsmi_frequency_range_t(Structure):
|
||||
_fields_ = [
|
||||
('supported_freq_range', amdsmi_range_t),
|
||||
('current_freq_range', amdsmi_range_t),
|
||||
]
|
||||
|
||||
class bdf_submodule (Structure):
|
||||
_fields_ = [
|
||||
('function_number', c_uint64, 3),
|
||||
('device_number', c_uint64, 5),
|
||||
('bus_number', c_uint64, 8),
|
||||
('domain_number', c_uint64, 48),
|
||||
]
|
||||
class amdsmi_bdf_t (Union):
|
||||
_fields_ = [
|
||||
('bdf_submodule', bdf_submodule),
|
||||
('as_uint', c_uint64)
|
||||
]
|
||||
|
||||
class amdsmi_power_cap_info_t (Structure):
|
||||
_fields_ = [
|
||||
('power_cap', c_uint64),
|
||||
('default_power_cap', c_uint64),
|
||||
('dpm_cap', c_uint64),
|
||||
('min_power_cap', c_uint64),
|
||||
('max_power_cap', c_uint64)
|
||||
]
|
||||
|
||||
class amdsmi_vbios_info_t (Structure):
|
||||
_fields_ =[
|
||||
('name', c_char * AMDSMI_MAX_STRING_LENGTH),
|
||||
('vbios_version', c_uint32),
|
||||
('build_date', c_char * AMDSMI_MAX_DATE_LENGTH),
|
||||
('part_number', c_char * AMDSMI_MAX_STRING_LENGTH),
|
||||
('vbios_version_string', c_char * AMDSMI_NORMAL_STRING_LENGTH)
|
||||
]
|
||||
|
||||
class fw_info_list (Structure):
|
||||
_fields_ = [
|
||||
('fw_id', amdsmi_fw_block_t),
|
||||
('fw_version', c_uint64)
|
||||
]
|
||||
class amdsmi_fw_info_t (Structure):
|
||||
_fields_ =[
|
||||
('num_fw_info', c_uint8),
|
||||
('fw_info_list', fw_info_list * amdsmi_fw_block_t.FW_ID__MAX)
|
||||
]
|
||||
|
||||
class amdsmi_asic_info_t (Structure):
|
||||
_fields_ = [
|
||||
('market_name', c_char * AMDSMI_MAX_STRING_LENGTH),
|
||||
('family', c_uint32),
|
||||
('vendor_id', c_uint32),
|
||||
('subvendor_id', c_uint32),
|
||||
('device_id', c_uint64),
|
||||
('rev_id', c_uint32),
|
||||
('asic_serial', c_char * AMDSMI_NORMAL_STRING_LENGTH)
|
||||
]
|
||||
|
||||
class amdsmi_board_info (Structure):
|
||||
_fields_ = [
|
||||
('serial_number', c_uint64),
|
||||
('is_master', c_bool),
|
||||
('model_number', c_char * AMDSMI_NORMAL_STRING_LENGTH),
|
||||
('product_serial', c_char * AMDSMI_NORMAL_STRING_LENGTH),
|
||||
('fru_id', c_char * AMDSMI_NORMAL_STRING_LENGTH),
|
||||
('product_name', c_char * AMDSMI_PRODUCT_NAME_LENGTH),
|
||||
('manufacturer_name', c_char * AMDSMI_NORMAL_STRING_LENGTH),
|
||||
]
|
||||
|
||||
class amdsmi_temperature_t (Structure):
|
||||
_fields_ = [
|
||||
('cur_temp', c_uint32)
|
||||
]
|
||||
|
||||
class amdsmi_temperature_limit_t (Structure):
|
||||
_fields_ = [
|
||||
('limit', c_uint32)
|
||||
]
|
||||
|
||||
class amdsmi_power_limit_t (Structure):
|
||||
_fields_ = [
|
||||
('limit', c_uint32)
|
||||
]
|
||||
|
||||
class amdsmi_power_measure (Structure):
|
||||
_fields_ = [
|
||||
('average_socket_power', c_uint32),
|
||||
('energy_accumulator', c_uint64),
|
||||
('voltage_gfx', c_uint32),
|
||||
('voltage_soc', c_uint32),
|
||||
('voltage_mem', c_uint32),
|
||||
]
|
||||
|
||||
class amdsmi_clk_measure_t (Structure):
|
||||
_fields_ = [
|
||||
('cur_clk', c_uint32),
|
||||
('avg_clk', c_uint32),
|
||||
('min_clk', c_uint32),
|
||||
('max_clk', c_uint32)
|
||||
]
|
||||
|
||||
class amdsmi_engine_usage_t (Structure):
|
||||
_fields_ = [
|
||||
('gfx_activity', c_uint32),
|
||||
('umc_activity', c_uint32),
|
||||
('mm_activity', c_uint32 * AMDSMI_MAX_MM_IP_COUNT)
|
||||
]
|
||||
|
||||
amdsmi_process_handle = c_uint32
|
||||
|
||||
class memory_usage (Structure):
|
||||
_fields_ = [
|
||||
('gtt_mem', c_uint64),
|
||||
('cpu_mem', c_uint64),
|
||||
('vram_mem', c_uint64)
|
||||
]
|
||||
|
||||
|
||||
class engine_usage (Structure):
|
||||
_fields_ = [
|
||||
('gfx', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
|
||||
('compute', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
|
||||
('sdma', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
|
||||
('enc', c_uint16 * AMDSMI_MAX_MM_IP_COUNT),
|
||||
('dec',c_uint16 * AMDSMI_MAX_MM_IP_COUNT)
|
||||
]
|
||||
class amdsmi_proc_info_t(Structure):
|
||||
_fields_ = [
|
||||
('name', c_char * AMDSMI_NORMAL_STRING_LENGTH),
|
||||
('pid', amdsmi_process_handle),
|
||||
('mem', c_uint64),
|
||||
('engine_usage', engine_usage),
|
||||
('memory_usage', memory_usage),
|
||||
('container_name', c_char * AMDSMI_NORMAL_STRING_LENGTH)
|
||||
|
||||
]
|
||||
amdsmi_process_info = amdsmi_proc_info_t
|
||||
|
||||
# Guaranteed maximum possible number of supported frequencies
|
||||
AMDSMI_MAX_NUM_FREQUENCIES = 32
|
||||
|
||||
# The number of points that make up a voltage-frequency curve definition
|
||||
AMDSMI_NUM_VOLTAGE_CURVE_POINTS = 3
|
||||
|
||||
class amdsmi_dev_perf_level_t (c_int):
|
||||
AMDSMI_DEV_PERF_LEVEL_AUTO = 0 # Performance level is "auto"
|
||||
AMDSMI_DEV_PERF_LEVEL_FIRST = AMDSMI_DEV_PERF_LEVEL_AUTO
|
||||
AMDSMI_DEV_PERF_LEVEL_HIGH = 1 # Keep PowerPlay levels "high", regardless of workload
|
||||
AMDSMI_DEV_PERF_LEVEL_MANUAL = 2 # Only use values defined by manually setting the AMDSMI_CLK_TYPE_SYS speed
|
||||
AMDSMI_DEV_PERF_LEVEL_STABLE_STD = 3 # Stable power state with profiling clocks
|
||||
AMDSMI_DEV_PERF_LEVEL_STABLE_PEAK = 4 # Stable power state with peak clocks
|
||||
AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK = 5 # Stable power state with minimum memory clock
|
||||
AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK = 6 # Stable power state with minimum system clock
|
||||
AMDSMI_DEV_PERF_LEVEL_DETERMINISM = 7 # Performance determinism state
|
||||
AMDSMI_DEV_PERF_LEVEL_LAST = AMDSMI_DEV_PERF_LEVEL_DETERMINISM
|
||||
AMDSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 # Unknown performance level
|
||||
|
||||
amdsmi_dev_perf_level = amdsmi_dev_perf_level_t
|
||||
|
||||
class amdsmi_sw_component_t (c_int):
|
||||
AMDSMI_SW_COMP_FIRST = 0x0
|
||||
AMDSMI_SW_COMP_DRIVER = AMDSMI_SW_COMP_FIRST # Driver
|
||||
AMDSMI_SW_COMP_LAST = AMDSMI_SW_COMP_DRIVER
|
||||
|
||||
amdsmi_event_handle_t = c_uint64
|
||||
|
||||
|
||||
#Event Groups
|
||||
# Enum denoting an event group. The value of the enum is the
|
||||
# base value for all the event enums in the group.
|
||||
class amdsmi_event_group_t (c_int):
|
||||
AMDSMI_EVNT_GRP_XGMI = 0 # Data Fabric(XGMI) related events
|
||||
AMDSMI_EVNT_GRP_XGMI_DATA_OUT = 10 # XGMI Outbound data
|
||||
AMDSMI_EVNT_GRP_INVALID = 0xFFFFFFFF
|
||||
|
||||
# Event types
|
||||
# Event type enum. Events belonging to a particular event group
|
||||
# ::amdsmi_event_group_t should begin enumerating at the ::amdsmi_event_group_t
|
||||
# value for that group.
|
||||
|
||||
class amdsmi_event_type_t (c_int):
|
||||
AMDSMI_EVNT_FIRST = amdsmi_event_group_t.AMDSMI_EVNT_GRP_XGMI
|
||||
AMDSMI_EVNT_XGMI_FIRST = amdsmi_event_group_t.AMDSMI_EVNT_GRP_XGMI
|
||||
AMDSMI_EVNT_XGMI_0_NOP_TX = AMDSMI_EVNT_XGMI_FIRST # NOPs sent to neighbor 0
|
||||
AMDSMI_EVNT_XGMI_0_REQUEST_TX = 1
|
||||
AMDSMI_EVNT_XGMI_0_RESPONSE_TX = 2
|
||||
AMDSMI_EVNT_XGMI_0_BEATS_TX = 3
|
||||
AMDSMI_EVNT_XGMI_1_NOP_TX = 4
|
||||
AMDSMI_EVNT_XGMI_1_REQUEST_TX = 5
|
||||
AMDSMI_EVNT_XGMI_1_RESPONSE_TX = 6
|
||||
AMDSMI_EVNT_XGMI_1_BEATS_TX = 7
|
||||
AMDSMI_EVNT_XGMI_LAST = 7
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_FIRST = 10
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_0 = 10
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_1 = 11
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_2 = 12
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_3 = 13
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_4 = 14
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_5 = 15
|
||||
AMDSMI_EVNT_XGMI_DATA_OUT_LAST = AMDSMI_EVNT_XGMI_DATA_OUT_5
|
||||
AMDSMI_EVNT_LAST = AMDSMI_EVNT_XGMI_DATA_OUT_LAST
|
||||
|
||||
class amdsmi_counter_command_t (c_int):
|
||||
AMDSMI_CNTR_CMD_START = 0
|
||||
AMDSMI_CNTR_CMD_STOP = 1
|
||||
|
||||
class amdsmi_counter_value_t (Structure):
|
||||
_fields_ = [
|
||||
('value', c_uint64),
|
||||
('time_enabled', c_uint64),
|
||||
('time_running', c_uint64)
|
||||
]
|
||||
|
||||
class amdsmi_evt_notification_type_t (c_int):
|
||||
AMDSMI_EVT_NOTIF_VMFAULT = 1 # VM page fault
|
||||
AMDSMI_EVT_NOTIF_FIRST = AMDSMI_EVT_NOTIF_VMFAULT,
|
||||
AMDSMI_EVT_NOTIF_THERMAL_THROTTLE = 2,
|
||||
AMDSMI_EVT_NOTIF_GPU_PRE_RESET = 3,
|
||||
AMDSMI_EVT_NOTIF_GPU_POST_RESET = 4,
|
||||
AMDSMI_EVT_NOTIF_LAST = AMDSMI_EVT_NOTIF_GPU_POST_RESET
|
||||
|
||||
# function to generate event bitmask from event id
|
||||
def AMDSMI_EVENT_MASK_FROM_INDEX (i):
|
||||
return c_ulonglong(1 << (i - 1))
|
||||
|
||||
MAX_EVENT_NOTIFICATION_MSG_SIZE = 64
|
||||
|
||||
# Event notification data returned from event notification API
|
||||
class amdsmi_evt_notification_data_t (Structure):
|
||||
_fields_ = [
|
||||
('device_handle', c_void_p), # Handler of device that corresponds to the event
|
||||
('event', amdsmi_evt_notification_type_t), # Event type
|
||||
('message', c_char * MAX_EVENT_NOTIFICATION_MSG_SIZE) # Event message
|
||||
]
|
||||
|
||||
# Temperature Metrics. This enum is used to identify various
|
||||
# temperature metrics. Corresponding values will be in millidegress Celcius.
|
||||
|
||||
class amdsmi_temperature_metric_t (c_int):
|
||||
AMDSMI_TEMP_CURRENT = 0 # Temperature current value
|
||||
AMDSMI_TEMP_FIRST = AMDSMI_TEMP_CURRENT
|
||||
AMDSMI_TEMP_MAX = 1 # Temperature max value
|
||||
AMDSMI_TEMP_MIN = 2 # Temperature min value
|
||||
AMDSMI_TEMP_MAX_HYST = 3 # Temperature hysteresis value for max limit (This is an absolute temperature, not a delta)
|
||||
AMDSMI_TEMP_MIN_HYST = 4 # Temperature hysteresis value for min limit (not a delta)
|
||||
AMDSMI_TEMP_CRITICAL = 5 # Temperature critical max value, typically greater than corresponding temp_max values.
|
||||
AMDSMI_TEMP_CRITICAL_HYST = 6 # Temperature hysteresis value for critical limit. (not a delta)
|
||||
AMDSMI_TEMP_EMERGENCY = 7 # Temperature emergency max value, for chips supporting more than two upper temperature
|
||||
# limits. Must be equal or greater than corresponding temp_crit values.
|
||||
AMDSMI_TEMP_EMERGENCY_HYST = 8 # Temperature hysteresis value for emergency limit. (not a delta).
|
||||
AMDSMI_TEMP_CRIT_MIN = 9 # Temperature critical min value, typically lower than corresponding temperature min values
|
||||
AMDSMI_TEMP_CRIT_MIN_HYST = 10 # Temperature hysteresis value for critical minimum limit. (not a delta)
|
||||
AMDSMI_TEMP_OFFSET = 11 # Temperature offset which is added to the temperature reading by the chip.
|
||||
AMDSMI_TEMP_LOWEST = 12 # Historical minimum temperature.
|
||||
AMDSMI_TEMP_HIGHEST = 13 # Historical maximum temperature.
|
||||
AMDSMI_TEMP_LAST = AMDSMI_TEMP_HIGHEST
|
||||
|
||||
class amdsmi_voltage_metric_t (c_int):
|
||||
AMDSMI_VOLT_CURRENT = 0 # Voltage current value.
|
||||
AMDSMI_VOLT_FIRST = AMDSMI_VOLT_CURRENT
|
||||
AMDSMI_VOLT_MAX = 1 # Voltage max value.
|
||||
AMDSMI_VOLT_MIN_CRIT = 2 # Voltage critical min value.
|
||||
AMDSMI_VOLT_MIN = 3 # Voltage min value.
|
||||
AMDSMI_VOLT_MAX_CRIT = 4 # Voltage critical max value.
|
||||
AMDSMI_VOLT_AVERAGE = 5 # Average voltage.
|
||||
AMDSMI_VOLT_LOWEST = 6 # Historical minimum voltage.
|
||||
AMDSMI_VOLT_HIGHEST = 7 # Historical maximum voltage.
|
||||
AMDSMI_VOLT_LAST = AMDSMI_VOLT_HIGHEST
|
||||
|
||||
# This ennumeration is used to indicate which type of
|
||||
# voltage reading should be obtained.
|
||||
|
||||
class amdsmi_voltage_type_t (c_int):
|
||||
AMDSMI_VOLT_TYPE_FIRST = 0
|
||||
AMDSMI_VOLT_TYPE_VDDGFX = AMDSMI_VOLT_TYPE_FIRST # Vddgfx GPU voltage
|
||||
AMDSMI_VOLT_TYPE_LAST = AMDSMI_VOLT_TYPE_VDDGFX
|
||||
AMDSMI_VOLT_TYPE_INVALID = 0xFFFFFFFF # Invalid type
|
||||
|
||||
# Pre-set Profile Selections. These bitmasks can be AND'd with the
|
||||
# ::amdsmi_power_profile_status_t.available_profiles returned from
|
||||
# ::amdsmi_dev_power_profile_presets_get to determine which power profiles
|
||||
# are supported by the system.
|
||||
|
||||
class amdsmi_power_profile_preset_masks_t (c_int):
|
||||
AMDSMI_PWR_PROF_PRST_CUSTOM_MASK = 0x1 # Custom Power Profile
|
||||
AMDSMI_PWR_PROF_PRST_VIDEO_MASK = 0x2 # Video Power Profile
|
||||
AMDSMI_PWR_PROF_PRST_POWER_SAVING_MASK = 0x4 # Power Saving Profile
|
||||
AMDSMI_PWR_PROF_PRST_COMPUTE_MASK = 0x8 # Compute Saving Profile
|
||||
AMDSMI_PWR_PROF_PRST_VR_MASK = 0x10 # VR Power Profile
|
||||
|
||||
# 3D Full Screen Power Profile
|
||||
AMDSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20
|
||||
AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT = 0x40 # Default Boot Up Profile
|
||||
AMDSMI_PWR_PROF_PRST_LAST = AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT
|
||||
|
||||
# Invalid power profile
|
||||
AMDSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
class amdsmi_gpu_block_t (c_int):
|
||||
AMDSMI_GPU_BLOCK_INVALID = 0x0000000000000000 # Used to indicate an invalid block
|
||||
AMDSMI_GPU_BLOCK_FIRST = 0x0000000000000001
|
||||
|
||||
AMDSMI_GPU_BLOCK_UMC = AMDSMI_GPU_BLOCK_FIRST # UMC block
|
||||
AMDSMI_GPU_BLOCK_SDMA = 0x0000000000000002 # SDMA block
|
||||
AMDSMI_GPU_BLOCK_GFX = 0x0000000000000004 # GFX block
|
||||
AMDSMI_GPU_BLOCK_MMHUB = 0x0000000000000008 # MMHUB block
|
||||
AMDSMI_GPU_BLOCK_ATHUB = 0x0000000000000010 # ATHUB block
|
||||
AMDSMI_GPU_BLOCK_PCIE_BIF = 0x0000000000000020 # PCIE_BIF block
|
||||
AMDSMI_GPU_BLOCK_HDP = 0x0000000000000040 # HDP block
|
||||
AMDSMI_GPU_BLOCK_XGMI_WAFL = 0x0000000000000080 # XGMI block
|
||||
AMDSMI_GPU_BLOCK_DF = 0x0000000000000100 # DF block
|
||||
AMDSMI_GPU_BLOCK_SMN = 0x0000000000000200 # SMN block
|
||||
AMDSMI_GPU_BLOCK_SEM = 0x0000000000000400 # SEM block
|
||||
AMDSMI_GPU_BLOCK_MP0 = 0x0000000000000800 # MP0 block
|
||||
AMDSMI_GPU_BLOCK_MP1 = 0x0000000000001000 # MP1 block
|
||||
AMDSMI_GPU_BLOCK_FUSE = 0x0000000000002000 # Fuse block
|
||||
|
||||
AMDSMI_GPU_BLOCK_LAST = AMDSMI_GPU_BLOCK_FUSE # The highest bit position for supported blocks
|
||||
AMDSMI_GPU_BLOCK_RESERVED = 0x8000000000000000
|
||||
|
||||
class amdsmi_ras_err_state_t (c_int):
|
||||
AMDSMI_RAS_ERR_STATE_NONE = 0 # No current errors
|
||||
AMDSMI_RAS_ERR_STATE_DISABLED = 1 # ECC is disabled
|
||||
AMDSMI_RAS_ERR_STATE_PARITY = 2 # ECC errors present, but type unknown
|
||||
AMDSMI_RAS_ERR_STATE_SING_C = 3 # Single correctable error
|
||||
AMDSMI_RAS_ERR_STATE_MULT_UC = 4 # Multiple uncorrectable errors
|
||||
AMDSMI_RAS_ERR_STATE_POISON = 5 # Firmware detected error and isolated page. Treat as uncorrectable.
|
||||
AMDSMI_RAS_ERR_STATE_ENABLED = 6 # ECC is enabled
|
||||
|
||||
AMDSMI_RAS_ERR_STATE_LAST = AMDSMI_RAS_ERR_STATE_ENABLED
|
||||
AMDSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF
|
||||
|
||||
class amdsmi_memory_type_t (c_int):
|
||||
AMDSMI_MEM_TYPE_FIRST = 0
|
||||
|
||||
AMDSMI_MEM_TYPE_VRAM = AMDSMI_MEM_TYPE_FIRST # VRAM memory
|
||||
AMDSMI_MEM_TYPE_VIS_VRAM = 1 # VRAM memory that is visible
|
||||
AMDSMI_MEM_TYPE_GTT = 2 # GTT memory
|
||||
|
||||
AMDSMI_MEM_TYPE_LAST = AMDSMI_MEM_TYPE_GTT
|
||||
|
||||
class amdsmi_freq_ind_t (c_int):
|
||||
AMDSMI_FREQ_IND_MIN = 0 # Index used for the minimum frequency value
|
||||
AMDSMI_FREQ_IND_MAX = 1 # Index used for the maximum frequency value
|
||||
AMDSMI_FREQ_IND_INVALID = 0xFFFFFFFF # An invalid frequency index
|
||||
|
||||
class amdsmi_xgmi_status_t (c_int):
|
||||
AMDSMI_XGMI_STATUS_NO_ERRORS = 0
|
||||
AMDSMI_XGMI_STATUS_ERROR = 1
|
||||
AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS = 2
|
||||
|
||||
amdsmi_bit_field_t = c_uint64()
|
||||
amdsmi_bit_field = amdsmi_bit_field_t
|
||||
|
||||
# Reserved Memory Page States
|
||||
class amdsmi_memory_page_status_t (c_int):
|
||||
AMDSMI_MEM_PAGE_STATUS_RESERVED = 0 # Reserved. This gpu page is reserved and not available for use
|
||||
AMDSMI_MEM_PAGE_STATUS_PENDING = 1 # Pending. This gpu page is marked as bad
|
||||
# and will be marked reserved at the next window.
|
||||
AMDSMI_MEM_PAGE_STATUS_UNRESERVABLE = 2 # Unable to reserve this page
|
||||
|
||||
# Types for IO Link
|
||||
class AMDSMI_IO_LINK_TYPE (c_int):
|
||||
AMDSMI_IOLINK_TYPE_UNDEFINED = 0 # unknown type.
|
||||
AMDSMI_IOLINK_TYPE_PCIEXPRESS = 1 # PCI Express
|
||||
AMDSMI_IOLINK_TYPE_XGMI = 2 # XGMI
|
||||
AMDSMI_IOLINK_TYPE_NUMIOLINKTYPES = 3 # Number of IO Link types
|
||||
AMDSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF # Max of IO Link types
|
||||
|
||||
# The utilization counter type
|
||||
class AMDSMI_UTILIZATION_COUNTER_TYPE (c_int):
|
||||
AMDSMI_UTILIZATION_COUNTER_FIRST = 0 # GFX Activity
|
||||
AMDSMI_COARSE_GRAIN_GFX_ACTIVITY = AMDSMI_UTILIZATION_COUNTER_FIRST
|
||||
AMDSMI_COARSE_GRAIN_MEM_ACTIVITY = 1 # Memory Activity
|
||||
AMDSMI_UTILIZATION_COUNTER_LAST = AMDSMI_COARSE_GRAIN_MEM_ACTIVITY
|
||||
|
||||
# Reserved Memory Page Record
|
||||
class amdsmi_utilization_counter_t (Structure):
|
||||
_fields_=[
|
||||
('page_address', c_uint64),
|
||||
('page_size', c_uint64),
|
||||
('status', amdsmi_memory_page_status_t),
|
||||
]
|
||||
|
||||
# Number of possible power profiles that a system could support
|
||||
AMDSMI_MAX_NUM_POWER_PROFILES = (sizeof(amdsmi_bit_field_t) * 8)
|
||||
|
||||
# This structure contains information about which power profiles are
|
||||
# supported by the system for a given device, and which power profile is currently active.
|
||||
|
||||
class amdsmi_power_profile_status_t (Structure):
|
||||
_fields_ = [
|
||||
('available_profiles', c_uint64), # Which profiles are supported by this system
|
||||
('current', amdsmi_power_profile_preset_masks_t), # Which power profile is currently active
|
||||
('num_profiles', c_uint32) # How many power profiles are available
|
||||
]
|
||||
|
||||
# This structure holds information about clock frequencies.
|
||||
class amdsmi_frequencies_t (Structure):
|
||||
_fields_ = [
|
||||
('num_supported', c_uint32), # The number of supported frequencies
|
||||
('current', c_uint32), # The current frequency index
|
||||
('frequency', c_uint64 * AMDSMI_MAX_NUM_FREQUENCIES), # List of frequencies.
|
||||
# Only the first num_supported frequencies are valid.
|
||||
]
|
||||
|
||||
#This structure holds information about the possible PCIe
|
||||
#bandwidths. Specifically, the possible transfer rates and their
|
||||
#associated numbers of lanes are stored here.
|
||||
class amdsmi_pcie_bandwidth_t (Structure):
|
||||
_fields_ = [
|
||||
('transfer_rate', amdsmi_frequencies_t), # Transfer rates (T/s) that are possible
|
||||
('lanes', c_uint32 * AMDSMI_MAX_NUM_FREQUENCIES), # List of lanes for corresponding transfer rate.
|
||||
# Only the first num_supported bandwidths are valid.
|
||||
]
|
||||
|
||||
# This structure holds version information.
|
||||
|
||||
class amdsmi_version_t (Structure):
|
||||
_fields_ = [
|
||||
('major', c_uint32), # Major version
|
||||
('minor', c_uint32), # Minor version
|
||||
('patch', c_uint32), # Patch, build or stepping version
|
||||
('build', c_char_p), # Build string
|
||||
]
|
||||
|
||||
# This structure represents a point on the frequency-voltage plane.
|
||||
class amdsmi_od_vddc_point_t (Structure):
|
||||
_fields_ = [
|
||||
('frequency', c_uint64), # Frequency coordinate (in Hz)
|
||||
('voltage', c_uint64), # Voltage coordinate (in mV)
|
||||
]
|
||||
|
||||
# This structure holds 2 ::amdsmi_range_t's, one for frequency and one for
|
||||
# voltage. These 2 ranges indicate the range of possible values for the
|
||||
# corresponding ::amdsmi_od_vddc_point_t.
|
||||
|
||||
class amdsmi_freq_volt_region_t (Structure):
|
||||
_fields_ = [
|
||||
('freq_range', amdsmi_range_t), # The frequency range for this VDDC Curve point
|
||||
('volt_range', amdsmi_range_t), # The voltage range for this VDDC Curve point
|
||||
]
|
||||
|
||||
# Array of ::AMDSMI_NUM_VOLTAGE_CURVE_POINTS ::amdsmi_od_vddc_point_t's that
|
||||
# make up the voltage frequency curve points.
|
||||
|
||||
class amdsmi_od_volt_curve_t (Structure):
|
||||
_fields_ = [
|
||||
# Array of ::AMDSMI_NUM_VOLTAGE_CURVE_POINTS ::amdsmi_od_vddc_point_t's that
|
||||
# make up the voltage frequency curve points.
|
||||
('vc_points', amdsmi_od_vddc_point_t * AMDSMI_NUM_VOLTAGE_CURVE_POINTS)
|
||||
]
|
||||
|
||||
# This structure holds the frequency-voltage values for a device.
|
||||
class amdsmi_od_volt_freq_data_t (Structure):
|
||||
_fields_ = [
|
||||
('curr_sclk_range', amdsmi_range_t), # The current SCLK frequency range
|
||||
('curr_mclk_range', amdsmi_range_t), # The current MCLK frequency range; (upper bound only)
|
||||
('sclk_freq_limits', amdsmi_range_t), # The range possible of SCLK values
|
||||
('mclk_freq_limits', amdsmi_range_t), # The range possible of MCLK values
|
||||
('curve', amdsmi_od_volt_curve_t), # The current voltage curve
|
||||
('num_regions', c_uint32), # The number of voltage curve regions
|
||||
]
|
||||
|
||||
# The following structures hold the gpu metrics values for a device.
|
||||
# Size and version information of metrics data
|
||||
|
||||
class amd_metrics_table_header_t (Structure):
|
||||
_fields_ = [
|
||||
('structure_size', c_uint16),
|
||||
('format_revision', c_ubyte),
|
||||
('content_revision', c_ubyte),
|
||||
]
|
||||
|
||||
AMDSMI_GPU_METRICS_API_FORMAT_VER = 1
|
||||
AMDSMI_GPU_METRICS_API_CONTENT_VER_1 = 1
|
||||
AMDSMI_GPU_METRICS_API_CONTENT_VER_2 = 2
|
||||
AMDSMI_GPU_METRICS_API_CONTENT_VER_3 = 3
|
||||
|
||||
AMDSMI_NUM_HBM_INSTANCES = 4 # This should match NUM_HBM_INSTANCES
|
||||
CENTRIGRADE_TO_MILLI_CENTIGRADE = 1000 # Unit conversion factor for HBM temperatures
|
||||
|
||||
class amdsmi_gpu_metrics_t (Structure):
|
||||
_fields_ = [
|
||||
('common_header', amd_metrics_table_header_t),
|
||||
# Temperature
|
||||
('temperature_edge', c_uint16),
|
||||
('temperature_hotspot', c_uint16),
|
||||
('temperature_mem', c_uint16),
|
||||
('temperature_vrgfx', c_uint16),
|
||||
('temperature_vrsoc', c_uint16),
|
||||
('temperature_vrmem', c_uint16),
|
||||
# Utilization
|
||||
('average_gfx_activity', c_uint16),
|
||||
('average_umc_activity', c_uint16),
|
||||
('average_mm_activity', c_uint16),
|
||||
# Power/Energy
|
||||
('average_socket_power', c_uint16),
|
||||
('energy_accumulator', c_uint64),
|
||||
# Driver attached timestamp (in ns)
|
||||
('system_clock_counter', c_uint64),
|
||||
# Average clocks
|
||||
('average_gfxclk_frequency', c_uint16),
|
||||
('average_socclk_frequency', c_uint16),
|
||||
('average_uclk_frequency', c_uint16),
|
||||
('average_vclk0_frequency', c_uint16),
|
||||
('average_dclk0_frequency', c_uint16),
|
||||
('average_vclk1_frequency', c_uint16),
|
||||
('average_dclk1_frequency', c_uint16),
|
||||
# Current clocks
|
||||
('current_gfxclk', c_uint16),
|
||||
('current_socclk', c_uint16),
|
||||
('current_uclk', c_uint16),
|
||||
('current_vclk0', c_uint16),
|
||||
('current_dclk0', c_uint16),
|
||||
('current_vclk1', c_uint16),
|
||||
('current_dclk1', c_uint16),
|
||||
# Throttle status
|
||||
('throttle_status', c_uint32),
|
||||
# Fans
|
||||
('current_fan_speed', c_uint16),
|
||||
# Link width/speed
|
||||
('pcie_link_width', c_uint16), # v1 mod.(8->16)
|
||||
('pcie_link_speed', c_uint16), # in 0.1 GT/s; v1 mod. (8->16)
|
||||
('padding', c_uint16), # new in v1
|
||||
('gfx_activity_acc', c_uint32), # new in v1
|
||||
('mem_actvity_acc', c_uint32), # new in v1
|
||||
('temperature_hbm', c_uint16 * AMDSMI_NUM_HBM_INSTANCES) # new in v1
|
||||
]
|
||||
|
||||
# This structure holds error counts.
|
||||
class amdsmi_error_count_t (Structure):
|
||||
_fields_ = [
|
||||
('correctable_count', c_uint64), # Accumulated correctable errors
|
||||
('uncorrectable_count', c_uint64) # Accumulated uncorrectable errors
|
||||
]
|
||||
|
||||
# This structure holds pcie info.
|
||||
class amdsmi_pcie_info_t (Structure):
|
||||
_fields_ = [
|
||||
('pcie_lanes', c_uint16),
|
||||
('pcie_speed', c_uint16),
|
||||
]
|
||||
|
||||
class amdsmi_process_info_t (Structure):
|
||||
_fields_ = [
|
||||
('process_id', c_uint32), # Process ID
|
||||
('pasid', c_uint32), # PASID
|
||||
('vram_usage', c_uint64), # VRAM usage
|
||||
('sdma_usage', c_uint64), # SDMA usage in microseconds
|
||||
('cu_occupancy', c_uint32), # Compute Unit usage in percent
|
||||
]
|
||||
|
||||
# Opaque handle to function-support object
|
||||
class amdsmi_func_id_iter_handle(Structure):
|
||||
pass
|
||||
amdsmi_func_id_iter_handle_t = POINTER(amdsmi_func_id_iter_handle)
|
||||
|
||||
# Place-holder "variant" for functions that have don't have any variants,
|
||||
# but do have monitors or sensors.
|
||||
|
||||
AMDSMI_DEFAULT_VARIANT = 0xFFFFFFFFFFFFFFFF
|
||||
|
||||
class submodule_union(Union):
|
||||
_fields_ = [
|
||||
('memory_type', amdsmi_memory_type_t),
|
||||
('temp_metric', amdsmi_temperature_metric_t),
|
||||
('evnt_type', amdsmi_event_type_t),
|
||||
('evnt_group', amdsmi_event_group_t),
|
||||
('clk_type', amdsmi_clk_type_t),
|
||||
('fw_block', amdsmi_fw_block_t),
|
||||
('gpu_block_type', amdsmi_gpu_block_t),
|
||||
]
|
||||
class amdsmi_func_id_value_t (Union):
|
||||
_fields_ = [
|
||||
('id', c_uint64),
|
||||
('name', c_char_p),
|
||||
('submodule', submodule_union)
|
||||
]
|
||||
|
||||
amd_id = amdsmi_func_id_value_t
|
||||
@@ -259,8 +259,6 @@ int main() {
|
||||
// Get device type. Since the amdsmi is initialized with
|
||||
// AMD_SMI_INIT_AMD_GPUS, the device_type must be AMD_GPU.
|
||||
device_type_t device_type = {};
|
||||
std::cout << "Device Handle: " << device_handles[j] << std::endl;
|
||||
|
||||
ret = amdsmi_get_device_type(device_handles[j], &device_type);
|
||||
CHK_AMDSMI_RET(ret)
|
||||
if (device_type != AMD_GPU) {
|
||||
|
||||
Reference in New Issue
Block a user