diff --git a/amd_smi_cli/BDF.py b/amd_smi_cli/BDF.py deleted file mode 100644 index a4cf29f3da..0000000000 --- a/amd_smi_cli/BDF.py +++ /dev/null @@ -1,96 +0,0 @@ -import logging -import platform -import re - - -class BDF(object): - """ BDF Class to cast and compare BDF objects using built-in python comparators - - Useful for validating a BDF string and converting it to a BDF object - This allows us to handle BDF objects in a pythonic way - - Attributes: - __eq__: The equals comparator - __: An integer count of the eggs we have laid. - """ - - def __init__(self, bdf): - """Init a BDF object""" - if isinstance(bdf, BDF): - self.segment, self.bus, self.device, self.function = tuple(bdf) - else: - if bdf.startswith("BDF("): - bdf = bdf.replace('BDF(', '').replace(')', '') - # Tell if this is baremetal vs Virtualization - self.operating_system = platform.system() - - try: - bdf_components = [int(x, 16) for x in re.split('[:.]', bdf)] - except ValueError as e: - logging.error(f"Invalid string passed: {bdf}") - raise e - - self.segment = bdf_components[0] if len(bdf_components) == 4 else 0 - self.bus, self.device, self.function = bdf_components[-3:] - if self.segment > 65535: - raise ValueError("BDF Segment can't be greater than 65535") - if self.bus > 255: - raise ValueError("BDF Bus can't be greater than 255") - if self.device > 31: - raise ValueError("BDF Device can't be greater than 31") - if self.function > 7: - raise ValueError("BDF Function can't be greater than 7") - - def __eq__(self, passed_bdf): - """Overrides the == operator and allows for BDF objects to be compared to BDF strings""" - - # Only accept strings and BDF objects - if isinstance(passed_bdf, str): - if passed_bdf == '': - return False - passed_bdf = BDF(passed_bdf) - elif not isinstance(passed_bdf, BDF): - return False - - if self.segment == passed_bdf.segment and \ - self.bus == passed_bdf.bus and \ - self.device == passed_bdf.device and \ - self.function == passed_bdf.function: - return True - else: - return False - - def __ne__(self, passed_bdf): - """Overrides the != operator and allows for BDF objects to be compared to BDF strings""" - # Since we overrided the == operator we can use that to make this simple - return not self == passed_bdf - - def __add__(self, passed_bdf): - """Overrides the + operator and allows for concatenation""" - return str(self) + passed_bdf - - def __radd__(self, passed_bdf): - """Overrides the + operator and allows for concatenation""" - return passed_bdf + str(self) - - def __str__(self): - """Cast BDF object to a string""" - return "{:04X}:{:02X}:{:02X}:{}".format(self.segment, self.bus, self.device, self.function) - - def __repr__(self): - """How the BDF object is represented""" - return f"BDF({self})" - - def __iter__(self): - """Make the BDF object iterable over its 4 values""" - yield from (self.segment, self.bus, self.device, self.function) - - def __contains__(self, passed_bdf): - """Overrided the 'in' comparator in python""" - passed_bdf = str(BDF(passed_bdf)) - - bdf_regex = "(?:[0-6]?[0-9a-fA-F]{1,4}:)?[0-2]?[0-9a-fA-F]{1,2}:[0-9a-fA-F]{1,2}\.[0-7]" - for match in re.findall(bdf_regex, passed_bdf): - if self == match: - return True - return False diff --git a/amd_smi_cli/_version.py b/amd_smi_cli/_version.py deleted file mode 100644 index b3c06d4883..0000000000 --- a/amd_smi_cli/_version.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.0.1" \ No newline at end of file diff --git a/amd_smi_cli/amd_smi.py b/amd_smi_cli/amd_smi.py deleted file mode 100644 index 88c9d9c3c3..0000000000 --- a/amd_smi_cli/amd_smi.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python3 - -# from amd_smi_init import * - -from amd_smi_commands import AMD_SMI_Commands -from amd_smi_parser import AMD_SMI_Parser - - -# sudo /src/out/ubuntu-20.04/20.04/bin/rocm-smi -bc --json | python -m json.tool - - -if __name__ == "__main__": - amd_smi_commands = AMD_SMI_Commands() - amd_smi_parser = AMD_SMI_Parser(amd_smi_commands.version, - amd_smi_commands.discovery, - amd_smi_commands.static, - amd_smi_commands.firmware, - amd_smi_commands.bad_pages, - amd_smi_commands.metric, - amd_smi_commands.process, - amd_smi_commands.profile, - amd_smi_commands.event, - amd_smi_commands.topology, - amd_smi_commands.set_value, - amd_smi_commands.reset, - amd_smi_commands.misc, - amd_smi_commands.gpu_v) - - args = amd_smi_parser.parse_args() - args.func(args) # This needs to be there to handle subparsers with no subcommands - # AMDSMI logger print out json, csv, or string diff --git a/amd_smi_cli/amd_smi_commands.py b/amd_smi_cli/amd_smi_commands.py deleted file mode 100644 index e5ea6b8478..0000000000 --- a/amd_smi_cli/amd_smi_commands.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/python3 - -import platform -import signal -import subprocess -import sys -import time -import traceback - -from pathlib import Path - -# from amd_smi_init import * -from BDF import BDF -from _version import __version__ - -from amd_smi_logger import AMD_SMI_Logger - - - -class AMD_SMI_Commands(object): - # def __init__(self, amd_smi_logger) -> None: - # logger = amd_smi_logger - # # Make an AMD-SMI-Object-Logger only with the commands object on init - # # Call the logger when we want to store a print: - # # self.logger.store_output(gpu_id, string) # store in ordered dict - # Every function prints the logger at the end - # logger.printoutput(args.json, args.csv) # Which in Logger handles and checks for json or csv - # Check if init can accept args given, if so then init can be used to call watch functions for looping - - - def version(self, args): - kernel_version = 123 - print(f'AMD-SMI version: {__version__} | Kernel version: {kernel_version}') - - - def discovery(self, args): - print('discovery test') - - - def static(self, args): - #This is where the arg handling comes through - print(args.asic) - print(args.bus) - print(args.driver) - print('static test') - - - def firmware(self, args): - print('firmware test') - - - def bad_pages(self, args): - # Retired Pages - print('Bad Pages test') - - - def metric(self, args): - print('Metric test') - - - def process(self, args): - print('Process Test') - - - def profile(self, args): - print('Profile test') - - - def event(self, args): - print('event test') - - - def topology(self, args): - print('topology test') - - - def set_value(self, args): - print('set_value test') - - - def reset(self, args): - print('reset test') - - - def misc(self, args): - print('misc test') - - - def gpu_v(self, args): - print('misc test') - diff --git a/amd_smi_cli/amd_smi_helpers.py b/amd_smi_cli/amd_smi_helpers.py deleted file mode 100644 index e23c792a19..0000000000 --- a/amd_smi_cli/amd_smi_helpers.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import platform -import signal -import subprocess -import sys -import time -import traceback -import logging - -from pathlib import Path - -from BDF import BDF -from amd_smi_init import * - - -class AMD_SMI_Helpers(object): - def __init__(self) -> None: - # implement basic config for debug logs - self.operating_system = platform.system() - self._is_hypervisor = False - self._is_virtual_os = False - self._is_baremetal = False - self._is_linux = False - self._is_windows = False - - self.virtual_operating_systems_product_names = ['KVM', 'VirtualBox', 'VMware'] #@TODO get KVM example - - if self.operating_system.startswith('Linux'): - self._is_linux = True - # logging.debug(f'whatever:{self._is_linux}') - # KVM hypervisor check @TODO - - product_name = '' - product_name_path = Path('/sys/class/dmi/id/product_name') - if product_name_path.exists(): - product_name = product_name_path.read_text().strip() - - if product_name == '': - # Unable to determine product_name default to baremetal - self._is_baremetal = True - else: - for vm_os in self.virtual_operating_systems_product_names: - if product_name.startswith(vm_os): - # Log that this is a virtual OS - self._is_virtual_os = True - break - - # The current way I determine if a system is baremetal by deduction of the other two arguments - self._is_baremetal = not self._is_hypervisor and not self._is_virtual_os - - - if self.operating_system.startswith('VMkernel'): - self._is_hypervisor = True - - - if self.operating_system.startswith('Window'): - # Check Powershell for Hyper-V enablement - self._is_windows = True - - # Get-CimInstance -ClassName Win32_ComputerSystem Manufacturer - - - # if self.product_name == '' and not self._is_hypervisor: - # self._is_virtual_os = any(self.product_name.startswith(virtual_os) for virtual_os in self.virtual_operating_systems) - - - # self.operating_system = '' - - - def os_info(self): - # Return OS info - # operating_system = - - - # if sys.platform.startswith('win'): - - # elif sys.platform.startswith('linux'): - - return True - - - def is_virtual_os(self): - return self._is_virtual_os - - - def is_hypervisor(self): - # Returns True if hypervisor is enabled on the system - return self._is_hypervisor - - - def is_baremetal(self): - # Returns True if system is baremetal, if system is hypervisor this should return False - return self._is_baremetal - - - - def is_linux(self): - return self._is_linux - - - def is_windows(self): - return self._is_windows - - - def get_gpu_choices(self): - # Return in format {gpu_index : (BDF, UUID)} - - gpu_choices = {} - gpu_index = '1' - gpu_bdf = BDF('0000:23:00.0') - gpu_uuid = '1234' - gpu_choices[gpu_index] = (gpu_bdf, gpu_uuid) - return gpu_choices - - - def get_devices(self): - pass - - - def get_device_from_socket(self): - pass - - - def get_amd_gpu_bdfs(self): - pass - - - def get_amd_cpu_bdfs(self): - pass - - - - # def getBus(device): - # """ Return the bus identifier of a given device - - # @param device: DRM device identifier - # """ - # bdfid = c_uint64(0) - # ret = rocmsmi.rsmi_dev_pci_id_get(device, byref(bdfid)) - - # # BDFID = ((DOMAIN & 0xffffffff) << 32) | ((BUS & 0xff) << 8) |((DEVICE & 0x1f) <<3 ) | (FUNCTION & 0x7) - # domain = (bdfid.value >> 32) & 0xffffffff - # bus = (bdfid.value >> 8) & 0xff - # device = (bdfid.value >> 3) & 0x1f - # function = bdfid.value & 0x7 - - # pic_id = '{:04X}:{:02X}:{:02X}.{:0X}'.format(domain, bus, device, function) - # if rsmi_ret_ok(ret, device): - # return pic_id diff --git a/amd_smi_cli/amd_smi_init.py b/amd_smi_cli/amd_smi_init.py deleted file mode 100644 index b6daa1abc0..0000000000 --- a/amd_smi_cli/amd_smi_init.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/python3 - -### Handle init singularly -# Python imports module does not re-execute code on import - -import atexit -import logging -import signal -import sys - -from pathlib import Path - -# Handle bindings for windows, Hyper-v and KVM seperately -from amdsmiBindings import * - -# Using basic python logging for user errors and development -# logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) # Logging for Development -logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.ERROR) # User level logging - -# On initial import set initialized variable -amd_smi_initialized = False - -def check_return(return_code, error_statment): #@TODO would raising an exception be better? - if return_code != amdsmi_status.AMDSMI_STATUS_SUCCESS: - logging.error(error_statment) - sys.exit(return_code) - - -def check_amdgpu_driver(): #@TODO Handle KVM logic - """ Returns true if amdgpu is found in the list of initialized modules """ - amd_gpu_status_file = Path("/sys/module/amdgpu/initstate") - - if amd_gpu_status_file.exists(): - if amd_gpu_status_file.read_text().strip() == 'live': - return True - - return False - - -def init_amd_smi(flag=amdsmi_init_flags.AMD_SMI_INIT_AMD_GPUS): - """ Initializes AMD-SMI """ - # Check if amdgpu driver is up - if check_amdgpu_driver(): - # Only init AMD GPUs for now, waiting for future support for AMD CPUs - init_status = amdsmi.amdsmi_init(flag) - check_return(return_code=init_status, error_statment=f'AMD SMI initialization returned {init_status} (the expected value is {amdsmi_status_t.AMDSMI_STATUS_SUCCESS})') - logging.info('amd-smi initialized successfully') - else: - logging.error('Driver not initialized (amdgpu not found in modules)') - exit(-1) - - -def amdsmi_shut_down(): - """ Shutdown AMD-SMI """ - # Only init AMD GPUs for now, waiting for future support for AMD CPUs - shut_down_status = amdsmi.amdsmi_shut_down() - check_return(return_code=shut_down_status, error_statment=f'AMD SMI Shutdown code returned {shut_down_status} (the expected value is {amdsmi_status_t.AMDSMI_STATUS_SUCCESS})') - logging.debug('amd-smi shutdown successfully') - - -def signal_handler(sig, frame): - logging.debug(f'Handling signal: {sig}') - sys.exit(0) - - -if not amd_smi_initialized: - init_amd_smi() - amd_smi_initialized = True - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - atexit.register(amdsmi_shut_down) diff --git a/amd_smi_cli/amd_smi_logger.py b/amd_smi_cli/amd_smi_logger.py deleted file mode 100644 index 5826be47f6..0000000000 --- a/amd_smi_cli/amd_smi_logger.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/python3 - -# import orderedDict -import json - -class AMD_SMI_Logger(object): - def __init__(self) -> None: - # self.card = {} - # backwards compatability - pass - - def store_output(self, target_device, log): - pass - - def print_output(self, format=''): - # JSON, CSV, text - # split into 3 different formats - # for elem in self.cards: - # print pretty - pass - - def print_json(self): - # json_data = '[{"ID":10,"Name":"Pankaj","Role":"CEO"},' \ - # '{"ID":20,"Name":"David Lee","Role":"Editor"}]' - - # json_object = json.loads(json_data) - - # json_formatted_str = json.dumps(json_object, indent=2) - - # print(json_formatted_str) - pass - - def print_csv(self): - # # Opening JSON file and loading the data - # # into the variable data - # with open('data.json') as json_file: - # data = json.load(json_file) - - # employee_data = data['emp_details'] - - # # now we will open a file for writing - # data_file = open('data_file.csv', 'w') - - # # create the csv writer object - # csv_writer = csv.writer(data_file) - pass \ No newline at end of file diff --git a/amd_smi_cli/amd_smi_modules.py b/amd_smi_cli/amd_smi_modules.py deleted file mode 100644 index 97556b9d14..0000000000 --- a/amd_smi_cli/amd_smi_modules.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import platform -import signal -import subprocess -import sys -import time -import traceback - -from pathlib import Path - -import BDF -from amd_smi_init import * - -class AMD_SMI_Modules(object): - def __init__(self) -> None: - pass - - - def get_socket_handles(self): - ### Returns tuple of (int, list of ctypes: socket_handles) - socket_count = c_uint32(0) - return_code = amdsmi.amdsmi_get_socket_handles(byref(socket_count), None) - check_return(return_code=return_code, error_statment="Invalid get_socket_handles request") - - sockets = [0] * socket_count.value # 1 - socket_handles = (c_void_p * socket_count.value)(*sockets) # That is a pointer, not a multiplication - return_code = amdsmi.amdsmi_get_socket_handles(byref(socket_count), socket_handles) - check_return(return_code=return_code, error_statment=f"Invalid get_socket_handles with {socket_count.value} sockets") - return (socket_count.value, socket_handles) - - - def get_device_handles(self, socket_handle): - """Gets the Device Handles that are in the current socket""" - ### Returns tuple of (int, list of ctypes: device_handles) - device_count = c_uint32(0) - return_code = amdsmi.amdsmi_get_device_handles(socket_handle, byref(device_count), None) - check_return(return_code=return_code, error_statment="Invalid get_device_handles request") - - devices = [0] * device_count.value - device_handles = (c_void_p * len(devices))(*devices) - return_code = amdsmi.amdsmi_get_device_handles(socket_handle, byref(device_count), byref(device_handles)) - check_return(return_code=return_code, error_statment=f"Invalid get_device_handles with {device_count.value} devices") - return (device_count.value, device_handles) - - - def get_socket_info(self, socket_handle): - """ Given a socket_handle, return the socket_info, which is just a BDF object""" - socket_info = create_string_buffer(128) # createstringbuffer or something??? c_char_p - return_code = amdsmi.amdsmi_get_socket_info(socket_handle, byref(socket_info), c_size_t(128)) - check_return(return_code=return_code, error_statment="Invalid get_socket_info request") - socket_bdf = BDF.BDF(socket_info.value.decode()) - return(socket_bdf) - - - def get_device_type(self, device_handle, format=True): - # format: True for string; False for int - # Returns device_type string for the given device_handle - dev_type = c_int(0) - return_code = amdsmi.amdsmi_get_device_type(device_handle, byref(dev_type)) - check_return(return_code=return_code, error_statment="Invalid get_device_type request") - - if format == True: # Return string - return device_type__enumvalues[dev_type.value] - - return dev_type.value # Return int - - - def get_device_bdf(self, device_handle): - - # class amdsmi_bdf_t (Union): - # _fields_ = [ - # ('bdf_submodule', bdf_submodule), - # ('as_uint', c_uint64) - # ] - bdf = amdsmi_bdf_t() - # bdf.bdf_submodule - - - - return_code = amdsmi.amdsmi_get_device_bdf(device_handle, bdf) - check_return(return_code=return_code, error_statment="Invalid amdsmi_get_device_bdf request") - return (bdf) - - - def get_device_handle_from_bdf(self, bdf): - pass - - - def get_fan_speed(self, bdf): - pass - - def show_retired_pages(self): - # num_pages = c_uint32() - # records = rsmi_retired_page_record_t() - pass \ No newline at end of file diff --git a/amd_smi_cli/amd_smi_parser.py b/amd_smi_cli/amd_smi_parser.py deleted file mode 100644 index 0edf07417f..0000000000 --- a/amd_smi_cli/amd_smi_parser.py +++ /dev/null @@ -1,619 +0,0 @@ -#!/usr/bin/python3 - -import argparse -import platform - -from _version import __version__ -from amd_smi_helpers import AMD_SMI_Helpers - -# sudo /src/out/ubuntu-20.04/20.04/bin/rocm-smi -bc --json | python -m json.tool - -class AMD_SMI_Parser(argparse.ArgumentParser): - - def __init__(self, version, discovery, static, firmware, bad_pages, metric, - process, profile, event,topology, set_value, reset, misc, gpu_v): - - # Helper variables - self.amd_smi_helpers = AMD_SMI_Helpers() - self.gpu_choices = self.amd_smi_helpers.get_gpu_choices() - self.vf_choices = ['3','2','1'] - - # Adjust argument parser options - super().__init__( - formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90), - # formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description=f'AMD System Management Interface | Version: {__version__}', #@TODO add the enviornment - add_help=True, - prog='amd-smi') - - # Setup subparsers - subparsers = self.add_subparsers( - title="AMD-SMI Commands", - parser_class=argparse.ArgumentParser, - required=True, - help='Descriptions:', - # dest='cmd', - metavar="") - - # Add all subparsers - # Add --json, --csv,--file,--loglevel, watch, watch_time, & iterations && backwards compatability --gpuvsmi --rocmsmi - self.add_version_parser(subparsers, version) - self.add_discovery_parser(subparsers, discovery) - self.add_static_parser(subparsers, static) - self.add_firmware_parser(subparsers, firmware) - self.add_bad_pages_parser(subparsers, bad_pages) - self.add_metric_parser(subparsers, metric) - self.add_process_parser(subparsers, process) - self.add_profile_parser(subparsers, profile) - self.add_event_parser(subparsers, event) - self.add_topology_parser(subparsers, topology) - # self.add_set_value_parser(subparsers, set_value) - self.add_reset_parser(subparsers, reset) - self.add_misc_parser(subparsers, misc) - self.add_gpu_v_parser(subparsers, misc) - - - def add_version_parser(self, subparsers, func): - # Subparser help text - version_help = "Display version information" - - # Create version subparser - version_parser = subparsers.add_parser('version', help=version_help, description=None) - version_parser._optionals.title = None - version_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - version_parser.set_defaults(func=func) - - - def add_discovery_parser(self, subparsers, func): - # Subparser help text - discovery_help = "Display discovery information" - discovery_subcommand_help = """Lists all the devices on the system and the links between devices. - Lists all the sockets and for each socket, GPUs and/or CPUs associated to - that socket alongside some basic information for each device. - In virtualization environment, it can also list VFs associated to each - GPU with some basic information for each VF.""" - - # Create discovery subparser - discovery_parser = subparsers.add_parser('discovery', help=discovery_help, description=discovery_subcommand_help) - discovery_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - discovery_parser.set_defaults(func=func) - - - def add_static_parser(self, subparsers, func): - # Subparser help text - static_help = "Gets static information about the specified GPU" - static_subcommand_help = """If no argument is provided, return static information for all GPUs on the system. - If no static argument is specified all static information will be displayed.""" - static_optionals_title = "Static Arguments" - - # Optional arguments help text - gpu_help = "Select a GPU from the possible choices" - vf_help = """Gets general information about the specified VF (timeslice, fb info, …). - Available only on virtualization OSs""" - asic_help = "All asic information" - bus_help = "All bus information" - vbios_help = "All video bios information (if available)" - limit_help = "All limit metric values (i.e. power and thermal limits)" - driver_help = "Displays driver version" - caps_help = "All caps information" - - # Options arguments help text for Hypervisors and Baremetal - ras_help = "Displays RAS features information" - board_help = "All board information" # Linux Baremetal only @TODO is applicable to Azure - - # Options arguments help text for Hypervisors - dfc_help = "All DFC FW table information" - fb_help = "Displays Frame Buffer information" - num_vf_help = "Displays number of supported and enabled VFs" - - # Create static subparser - static_parser = subparsers.add_parser('static', help=static_help, description=static_subcommand_help) - static_parser._optionals.title = static_optionals_title - static_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - static_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = static_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - static_parser.add_argument('-a', '--asic', action='store_true', required=False, help=asic_help) - static_parser.add_argument('-b', '--bus', action='store_true', required=False, help=bus_help) - static_parser.add_argument('-v', '--vbios', action='store_true', required=False, help=vbios_help) - static_parser.add_argument('-l', '--limit', action='store_true', required=False, help=limit_help) - static_parser.add_argument('-d', '--driver', action='store_true', required=False, help=driver_help) - static_parser.add_argument('-c', '--caps', action='store_true', required=False, help=caps_help) - - # Options to display on Hypervisors and Baremetal - if self.amd_smi_helpers.is_hypervisor() or self.amd_smi_helpers.is_baremetal(): - static_parser.add_argument('-r', '--ras', action='store_true', required=False, help=ras_help) - if self.amd_smi_helpers.is_linux(): #@TODO Check if applicable to Azure - static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help) - - # Options to only display on a Hypervisor - if self.amd_smi_helpers.is_hypervisor(): - device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices) - static_parser.add_argument('-du', '--dfc-ucode', action='store_true', required=False, help=dfc_help) - static_parser.add_argument('-f', '--fb-info', action='store_true', required=False, help=fb_help) - static_parser.add_argument('-n', '--num-vf', action='store_true', required=False, help=num_vf_help) - - - def add_firmware_parser(self, subparsers, func): - # Subparser help text - firmware_help = "Gets firmware information about the specified GPU" - firmware_subcommand_help = "If no argument is provided, return firmware information for all GPUs on the system." - firmware_optionals_title = "Firmware Arguments" - - # Optional arguments help text - gpu_help = "Select a GPU from the possible choices" - vf_help = """Gets general information about the specified VF (timeslice, fb info, …). - Available only on virtualization OSs""" - fw_list_help = "All FW list information" - err_records_help = "All error records information" - - # Create firmware subparser - firmware_parser = subparsers.add_parser('firmware', help=firmware_help, description=firmware_subcommand_help) - firmware_parser._optionals.title = firmware_optionals_title - firmware_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - firmware_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = firmware_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - firmware_parser.add_argument('-f', '--fw-list', action='store_true', required=False, help=fw_list_help) # Redundant? - - # Options to only display on a Hypervisor - if self.amd_smi_helpers.is_hypervisor(): - device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices) - firmware_parser.add_argument('-e', '--error-records', action='store_true', required=False, help=err_records_help) - - - def add_bad_pages_parser(self, subparsers, func): #@TODO Retired pages? - if not (self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()): - # The bad_pages subcommand is only applicable to Linux Baremetal systems - return - - # Subparser help text - bad_pages_help = "Gets bad page information about the specified GPU" - bad_pages_subcommand_help = "If no argument is provided, return bad page information for all GPUs on the system." - bad_pages_optionals_title = "Bad pages Arguments" - - # Optional arguments help text - gpu_help = "Select a GPU from the possible choices" - pending_help = "Displays all pending retired pages" - retired_help = "Displays retired pages" #@TODO Wording - un_res_help = "Displays unreservable pages" - - # Create bad_pages subparser - bad_pages_parser = subparsers.add_parser('bad_pages', help=bad_pages_help, description=bad_pages_subcommand_help) - bad_pages_parser._optionals.title = bad_pages_optionals_title - bad_pages_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - bad_pages_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = bad_pages_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - bad_pages_parser.add_argument('-p', '--pending', action='store_true', required=False, help=pending_help) - bad_pages_parser.add_argument('-r', '--retired', action='store_true', required=False, help=retired_help) - bad_pages_parser.add_argument('-u', '--un-res', action='store_true', required=False, help=un_res_help) - - - def add_metric_parser(self, subparsers, func): - # Subparser help text - metric_help = "Gets metric/performance information about the specified GPU" - metric_subcommand_help = """If no argument is provided, return metric information for all GPUs on the system. - If no metric argument is specified all metric information will be displayed.""" - metric_optionals_title = "Metric arguments" - - # Optional arguments help text - gpu_help = "Select a GPU from the possible choices" - vf_help = """Gets general information about the specified VF (timeslice, fb info, …). - Available only on virtualization OSs""" - usage_help = "All metrics usage information" - - # Help text for Arguments only Available on Virtual OS and Baremetal platforms - fb_usage_help = "Total and used framebuffer" - - # Help text for Arguments only on Hypervisor and Baremetal platforms - power_help = "Current power usage" - clock_help = "Average, max, and current clock frequencies" - temperature_help = "Current temperatures" - ecc_help = "Number of ECC errors" - pcie_help = "Current PCIe speed and width" - voltage_help = "Current GPU voltages" - - # Help text for Arguments only on Linux Baremetal platforms - fan_help = "Current fan speed" - pcie_usage_help = "Estimated PCIe link usage" - vc_help = "Display voltage curve" - overdrive_help = "Current GPU clock overdrive level" - mo_help = "Current memory clock overdrive level" - perf_level_help = "Current DPM performance level" - replay_count_help = "PCIe replay count" - xgmi_err_help = "XGMI error information since last read" - energy_help = "Amount of energy consumed" #@TODO ? Available only on host Linux Baremetal platforms - - # Help text for Arguments only on Hypervisors - schedule_help = "All scheduling information" - guard_help = "All guard information" - guest_help = "All guest data information" - - # Create metric subparser - metric_parser = subparsers.add_parser('metric', help=metric_help, description=metric_subcommand_help) - metric_parser._optionals.title = metric_optionals_title - metric_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - metric_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = metric_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - metric_parser.add_argument('-u', '--usage', action='store_true', required=False, help=usage_help) - - # Optional Args for Virtual OS and Baremetal systems - if self.amd_smi_helpers.is_virtual_os() or self.amd_smi_helpers.is_baremetal(): - metric_parser.add_argument('-b', '--fb-usage', action='store_true', required=False, help=fb_usage_help) - - # Optional Args for Hypervisors and Baremetal systems - if self.amd_smi_helpers.is_hypervisor() or self.amd_smi_helpers.is_baremetal(): - metric_parser.add_argument('-p', '--power', action='store_true', required=False, help=power_help) - metric_parser.add_argument('-c', '--clock', action='store_true', required=False, help=clock_help) - metric_parser.add_argument('-t', '--temperature', action='store_true', required=False, help=temperature_help) - metric_parser.add_argument('-e', '--ecc', action='store_true', required=False, help=ecc_help) - metric_parser.add_argument('-P', '--pcie', action='store_true', required=False, help=pcie_help) - metric_parser.add_argument('-v', '--voltage', action='store_true', required=False, help=voltage_help) - - # Optional Args for Linux Baremetal Systems #@TODO Discuss logic if Linux Hypervisors would be allowed to have this - if self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux(): - metric_parser.add_argument('-f', '--fan', action='store_true', required=False, help=fan_help) - metric_parser.add_argument('-s', '--pcie-usage', action='store_true', required=False, help=pcie_usage_help) - metric_parser.add_argument('-V', '--voltage-curve', action='store_true', required=False, help=vc_help) - metric_parser.add_argument('-o', '--overdrive', action='store_true', required=False, help=overdrive_help) - metric_parser.add_argument('-m', '--mem-overdrive', action='store_true', required=False, help=mo_help) - metric_parser.add_argument('-l', '--perf-level', action='store_true', required=False, help=perf_level_help) - metric_parser.add_argument('-r', '--replay-count', action='store_true', required=False, help=replay_count_help) - metric_parser.add_argument('-x', '--xgmi-err', action='store_true', required=False, help=xgmi_err_help) - metric_parser.add_argument('-E', '--energy', action='store_true', required=False, help=energy_help) - - # Options to only display to Hypervisors - if self.amd_smi_helpers.is_hypervisor(): - device_args.add_argument('--vf', action='store', help=vf_help, choices=self.vf_choices) - metric_parser.add_argument('-s', '--schedule', action='store_true', required=False, help=schedule_help) - metric_parser.add_argument('-g', '--guard', action='store_true', required=False, help=guard_help) - metric_parser.add_argument('-G', '--guest', action='store_true', required=False, help=guest_help) - - - def add_process_parser(self, subparsers, func): - if self.amd_smi_helpers.is_hypervisor(): - # Don't add this subparser on Hypervisors - return - - # Subparser help text - process_help = "Lists general process information running on the specified GPU" - process_subcommand_help = """If no argument is provided, returns information for all GPUs on the system. - If no argument is provided all process information will be displayed.""" - process_optionals_title = "Process arguments" - - # Required arguments help text - gpu_help = "Select a GPU from the possible choices" - - # Help text for Arguments only on Guest and BM platforms - general_help = "pid, process name, memory usage" - engine_help = "All engine usages" - pid_help = "Gets all process information about the specified process based on Process ID" - name_help = """Gets all process information about the specified process based on Process Name. - If multiple processes have the same name information is returned for all of them.""" #@TODO wording - - # Create process subparser - process_parser = subparsers.add_parser('process', help=process_help, description=process_subcommand_help) - process_parser._optionals.title = process_optionals_title - process_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - process_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = process_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - process_parser.add_argument('-g', '--general', action='store_true', required=False, help=general_help) - process_parser.add_argument('-e', '--engine', action='store_true', required=False, help=engine_help) - process_parser.add_argument('-p', '--pid', action='store', required=False, help=pid_help) - process_parser.add_argument('-n', '--name', action='store', required=False, help=name_help) - - - def add_profile_parser(self, subparsers, func): - if not (self.amd_smi_helpers.is_windows() and self.amd_smi_helpers.is_hypervisor()): - # This subparser only applies to Azure Hyper-V systems - return - - # Subparser help text - profile_help = "Displays information about all profiles and current profile" - profile_subcommand_help = "If no argument is provided, returns information for all GPUs on the system." - profile_optionals_title = "Profile Arguments" - - # Required arguments help text - gpu_help = "Select a GPU from the possible choices" - - # Create profile subparser - profile_parser = subparsers.add_parser('profile', help=profile_help, description=profile_subcommand_help) - profile_parser._optionals.title = profile_optionals_title - profile_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - profile_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = profile_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - - def add_event_parser(self, subparsers, func): - if self.amd_smi_helpers.is_linux() and not self.amd_smi_helpers.is_virtual_os(): - # This subparser only applies to Linux BareMetal & Linux Hypervisors - return - - # Subparser help text - event_help = "Displays event information for the given GPU" - event_subcommand_help = "If no argument is provided, returns event information for all GPUs on the system." - event_optionals_title = "Event Arguments" - - # Required arguments help text - gpu_help = "Select a GPU from the possible choices" - - # Create event subparser - event_parser = subparsers.add_parser('event', help=event_help, description=event_subcommand_help) - event_parser._optionals.title = event_optionals_title - event_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - event_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = event_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - - def add_topology_parser(self, subparsers, func): - if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()): - # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work - return - - # Subparser help text - topology_help = "Displays topology information of the devices." - topology_subcommand_help = "If no argument is provided, returns information for all GPUs on the system." - topology_optionals_title = "Topology arguments" - - # Required arguments help text - gpu_help = "Select a GPU from the possible choices" - - # Help text for Arguments only on Guest and BM platforms - topo_access_help = "Displays link accessibility between GPUs" - topo_weight_help = "Displays relative weight between GPUs" - topo_hops_help = "Displays the number of hops between GPUs" - topo_type_help = "Displays the link type between GPUs." - topo_numa_help = "Displays the numa nodes." - - # Create topology subparser - topology_parser = subparsers.add_parser('topology', help=topology_help, description=topology_subcommand_help) - topology_parser._optionals.title = topology_optionals_title - topology_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - topology_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = topology_parser.add_mutually_exclusive_group() - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - topology_parser.add_argument('-a', '--topo-access', action='store_true', required=False, help=topo_access_help) - topology_parser.add_argument('-w', '--topo-weight', action='store_true', required=False, help=topo_weight_help) - topology_parser.add_argument('-o', '--topo-hops', action='store_true', required=False, help=topo_hops_help) - topology_parser.add_argument('-t', '--topo-type', action='store_true', required=False, help=topo_type_help) - topology_parser.add_argument('-n', '--topo-numa', action='store_true', required=False, help=topo_numa_help) - - - def add_set_value_parser(self, subparsers, func): - if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()): - # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work - return - - # Subparser help text - set_value_help = "Set options for devices." - set_value_subcommand_help = "The user must specify one of the options for the set configuration." - set_value_optionals_title = "Set Arguments" - - # Required arguments help text - gpu_help = "Select a GPU from the possible choices" - - # Help text for Arguments only on Guest and BM platforms - set_clk_help = "Sets clock frequency levels for specified clocks" - set_sclk_help = "Sets GPU clock frequency levels" - set_mclk_help = "Sets memory clock frequency levels" - set_pcie_help = "Sets PCIe clock frequency levels" - set_slevel_help = "Change GPU clock frequency and voltage for a specific level" - set_mlevel_help = "Change GPU memory frequency and voltage for a specific level" - set_vc_help = "Change SCLK voltage curve for a specified point" - set_srange_help = "Sets min and max SCLK speed" - set_mrange_help = "Sets min and max MCLK speed" - set_fan_help = "Sets GPU fan speed (level or %)" - set_perf_level_help = "Sets performance level" - set_overdrive_help = "Set GPU overdrive level" - set_mem_overdrive_help = "Set memory overclock overdrive level" - set_power_overdrive_help = "Set the maximum GPU power using power overdrive in Watts" - set_profile_help = "Set power profile level (#) or a quoted string of custom profile attributes" - set_perf_det_help = "Set GPU clock frequency limit to get minimal performance variation" - ras_enable_help = "Enable RAS for specified block and error type" - ras_disable_help = "Disable RAS for specified block and error type." - ras_inject_help = "Inject RAS poison for specified block" - -# -c, --setclk -# . -# -s, --setsclk -# . -# -m, --setmclk -# . -# -p, --setpcie -# . -# -S, --setslevel -# . -# -M, --setmlevel -# . -# -v, --setvc -# . -# -r, --setsrange -# -# -R, --setmrange -# . -# -f, --setfan -# -# -pl, --setperflevel -# -# -o, --setoverdrive % -# Set GPU overdrive level. -# -O, --setmemoverdrive % -# Set memory overclock overdrive level. -# -po, --setpoweroverdrive -# Set the maximum GPU power using power overdrive in Watts. -# -P, --setprofile -# Set power profile level (#) or a quoted string of custom profile attributes (“ # # # # “) -# -pd, --setperfdet -# Set GPU clock frequency limit to get minimal performance variation. -# -re, --rasenable -# Enable RAS for specified block and error type. -# -rd, --rasdisable -# Disable RAS for specified block and error type. -# -ri, --rasinject -# Inject RAS poison for specified block - - # Create set_value subparser - set_value_parser = subparsers.add_parser('set', help=set_value_help, description=set_value_subcommand_help) - set_value_parser._optionals.title = set_value_optionals_title - set_value_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - set_value_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = set_value_parser.add_mutually_exclusive_group(required=True) - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - set_value_parser.add_argument('-c', '--setclk', action='store', required=False, help=topo_access_help) - set_value_parser.add_argument('-s', '--topo-weight', action='store', required=False, help=topo_weight_help) - set_value_parser.add_argument('-m', '--topo-hops', action='store', required=False, help=topo_hops_help) - set_value_parser.add_argument('-p', '--topo-type', action='store', required=False, help=topo_type_help) - set_value_parser.add_argument('-S', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-M', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-v', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-r', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-R', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-f', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-pl', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-o' '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-O', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-po', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-P', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-pd', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-re', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-rd', '--topo-numa', action='store', required=False, help=topo_numa_help) - set_value_parser.add_argument('-ri', '--topo-numa', action='store', required=False, help=topo_numa_help) - - - def add_reset_parser(self, subparsers, func): - if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()): - # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work - return - - # Subparser help text - reset_help = "Reset options for devices." - reset_subcommand_help = "The user must specify one of the options to reset devices." - reset_optionals_title = "Reset Arguments" - - # Required arguments help text - gpu_help = "Select a GPU from the possible choices" - - # Help text for Arguments only on Guest and BM platforms - gpureset_help = "Reset the specified GPU" - resetclk_help = "Reset clocks and overdrive to default" - resetfans_help = "Reset fans to automatic (driver) control" - resetprofile_help = "Reset power profile back to default" - resetpoweroverdrive_help = "Set the maximum GPU power back to the device default state" - resetxgmierr_help = "Reset XGMI error counts" - resetperfdet_help = "Disable performance determinism" - - # Create reset subparser - reset_parser = subparsers.add_parser('reset', help=reset_help, description=reset_subcommand_help) - reset_parser._optionals.title = reset_optionals_title - reset_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - reset_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = reset_parser.add_mutually_exclusive_group(required=True) - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - reset_parser.add_argument('-g', '--gpureset', action='store_true', required=False, help=gpureset_help) - reset_parser.add_argument('-c', '--resetclk', action='store_true', required=False, help=resetclk_help) - reset_parser.add_argument('-f', '--resetfans', action='store_true', required=False, help=resetfans_help) - reset_parser.add_argument('-p', '--resetprofile', action='store_true', required=False, help=resetprofile_help) - reset_parser.add_argument('-o', '--resetpoweroverdrive', action='store_true', required=False, help=resetpoweroverdrive_help) - reset_parser.add_argument('-x', '--resetxgmierr', action='store_true', required=False, help=resetxgmierr_help) - reset_parser.add_argument('-d', '--resetperfdet', action='store_true', required=False, help=resetperfdet_help) - - - def add_misc_parser(self, subparsers, func): - if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()): - # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work - return - - # Subparser help text - misc_help = "The miscellaneous options" - misc_subcommand_help = "The user must specify one of the options to reset devices." - misc_optionals_title = "Misc Arguments" - - # Optional arguments help text - gpu_help = "Select a GPU from the possible choices" - load_help = "Load clock, fan, performance, and profile settings from a given file." - save_help = "Save clock, fan, performance, and profile settings to a given file." - - # Create misc subparser - misc_parser = subparsers.add_parser('misc', help=misc_help, description=misc_subcommand_help) - misc_parser._optionals.title = misc_optionals_title - misc_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - misc_parser.set_defaults(func=func) - - # Mutually Exclusive Args within the subparser - device_args = misc_parser.add_mutually_exclusive_group(required=True) - device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # Optional Args - misc_parser.add_argument('-l', '--load', action='store', type=open, required=False, help=load_help) - misc_parser.add_argument('-s', '--save', action='store', type=open, required=False, help=save_help) - - - # def add_gpu_v_parser(self, subparsers, func): - # if not(self.amd_smi_helpers.is_baremetal() and self.amd_smi_helpers.is_linux()): - # # This subparser is only applicable to Baremetal Linux @TODO confirm how KVM should work - # return - - # # Subparser help text - # gpu_v_help = "The gpu_v options" - # gpu_v_subcommand_help = "The user must specify one of the options to reset devices." - # gpu_v_optionals_title = "gpu_v Arguments" - - # # Optional arguments help text - # gpu_help = "Select a GPU from the possible choices" - # load_help = "Load clock, fan, performance, and profile settings from a given file." - # save_help = "Save clock, fan, performance, and profile settings to a given file." - - # # Create gpu_v subparser - # gpu_v_parser = subparsers.add_parser('gpu_v', help=gpu_v_help, description=gpu_v_subcommand_help) - # gpu_v_parser._optionals.title = gpu_v_optionals_title - # gpu_v_parser.formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=80, width=90) - # gpu_v_parser.set_defaults(func=func) - - # # Mutually Exclusive Args within the subparser - # device_args = gpu_v_parser.add_mutually_exclusive_group(required=True) - # device_args.add_argument('--gpu', action='store', help=gpu_help, choices=self.gpu_choices) - - # # Optional Args - # gpu_v_parser.add_argument('-l', '--load', action='store', type=open, required=False, help=load_help) - # gpu_v_parser.add_argument('-s', '--save', action='store', type=open, required=False, help=save_help) diff --git a/amd_smi_cli/amdsmiBindings.py b/amd_smi_cli/amdsmiBindings.py deleted file mode 100644 index df9222d537..0000000000 --- a/amd_smi_cli/amdsmiBindings.py +++ /dev/null @@ -1,818 +0,0 @@ -#!/usr/bin/env python3 -"""! @brief AMD_SMI FFI""" - -from ctypes import * -from enum import Enum -import os - - -path_amdsmi = '/opt/rocm/lib/libamd_smi64.so' #@TODO make this dynamic - -try: - cdll.LoadLibrary(path_amdsmi) - amdsmi = CDLL(path_amdsmi) -except OSError: - print("Unable to load libamd_smi64.so library\n") - exit(1) - -## -# @brief Initialization flags -# -# may be OR'd together and passed to smi.amdsmi_init() -## - -class amdsmi_init_flags(c_int): - AMD_SMI_INIT_ALL_DEVICES = 0x0 # Default option - AMD_SMI_INIT_AMD_CPUS = (1 << 0) - AMD_SMI_INIT_AMD_GPUS = (1 << 1) - AMD_SMI_INIT_NON_AMD_CPUS = (1 << 2) - AMD_SMI_INIT_NON_AMD_GPUS = (1 << 3) - -# Maximum size definitions GPUVSMI -AMDSMI_MAX_MM_IP_COUNT = 8 -AMDSMI_MAX_DATE_LENGTH = 32 # YYYY-MM-DD:HH:MM:SS.MSC # -AMDSMI_MAX_STRING_LENGTH = 64 -AMDSMI_NORMAL_STRING_LENGTH = 32 -AMDSMI_MAX_DEVICES = 32 -AMDSMI_MAX_NAME = 32 -AMDSMI_MAX_DRIVER_VERSION_LENGTH = 80 -AMDSMI_PRODUCT_NAME_LENGTH = 128 -AMDSMI_MAX_CONTAINER_TYPE = 2 - -AMDSMI_GPU_UUID_SIZE = 38 - - -class amdsmi_mm_ip(c_int): - MM_UVD = 0 - MM_VCE = 1 - MM_VCN = 2 - MM__MAX = 3 - - -class amdsmi_container_types(c_int): - CONTAINER_LXC = 0 - CONTAINER_DOCKER = 1 - -# ! opaque handler point to underlying implementation -amdsmi_device_handle = POINTER(c_uint) -amdsmi_socket_handle = POINTER(c_uint) - -class device_type(c_int): - UNKNOWN = 0 - AMD_GPU = 1 - AMD_CPU = 2 - NON_AMD_GPU = 3 - NON_AMD_CPU = 4 - -device_type__enumvalues = { - 0: 'UNKNOWN', - 1: 'AMD_GPU', - 2: 'AMD_CPU', - 3: 'NON_AMD_GPU', - 4: 'NON_AMD_CPU', -} - -#Error codes retured by amd_smi_lib functions -class amdsmi_status(c_int): - AMDSMI_STATUS_SUCCESS = 0 # Call succeeded - AMDSMI_STATUS_INVAL = 1 # Invalid parameters - AMDSMI_STATUS_NOT_SUPPORTED = 2 # Command not supported - AMDSMI_STATUS_FILE_ERROR = 3 # Problem accessing a file. - AMDSMI_STATUS_NO_PERM = 4 # Permission Denied - AMDSMI_STATUS_OUT_OF_RESOURCES = 5 # Not enough memory - AMDSMI_STATUS_INTERNAL_EXCEPTION = 6 # An internal exception was caught - AMDSMI_STATUS_INPUT_OUT_OF_BOUNDS = 7 # The provided input is out of allowable or safe range - AMDSMI_STATUS_INIT_ERROR = 8 # An error occurred when initializing internal data structures - AMDSMI_STATUS_NOT_YET_IMPLEMENTED = 9 # Not implemented yet - AMDSMI_STATUS_NOT_FOUND = 10 # Device Not found - AMDSMI_STATUS_INSUFFICIENT_SIZE = 11 # Not enough resources were available for the operation - AMDSMI_STATUS_INTERRUPT = 12 # An interrupt occurred during execution of function - AMDSMI_STATUS_UNEXPECTED_SIZE = 13 # An unexpected amount of data was read - AMDSMI_STATUS_NO_DATA = 14 # No data was found for a given input - AMDSMI_STATUS_UNEXPECTED_DATA = 15 # The data read or provided to function is not what was expected - AMDSMI_STATUS_BUSY = 16 # Device busy - AMDSMI_STATUS_REFCOUNT_OVERFLOW = 17 # An internal reference counter exceeded INT32_MAX - AMDSMI_LIB_START = 1000 - AMDSMI_STATUS_FAIL_LOAD_MODULE = AMDSMI_LIB_START # Fail to load lib - AMDSMI_STATUS_FAIL_LOAD_SYMBOL = 1001 - AMDSMI_STATUS_DRM_ERROR = 1002 # Error when call libdrm - AMDSMI_STATUS_IO = 1003 # Error - AMDSMI_STATUS_FAULT = 1004 # Bad address - AMDSMI_STATUS_API_FAILED = 1005 # API call failed - AMDSMI_STATUS_TIMEOUT = 1006 # Timeout in API call - AMDSMI_STATUS_NO_SLOT = 1007 # No more free slot - AMDSMI_STATUS_RETRY = 1008 # Retry operation - AMDSMI_STATUS_NOT_INIT = 1009 # Device not initialized - AMDSMI_STATUS_UNKNOWN_ERROR = 0xFFFFFFFF # An unknown error occurred - -amdsmi_status_t = amdsmi_status -#Clock types -class amdsmi_clk_type (c_int): - CLK_TYPE_SYS = 0x0, # System clock - CLK_TYPE_FIRST = CLK_TYPE_SYS - CLK_TYPE_GFX = CLK_TYPE_SYS - CLK_TYPE_DF = 0x1 # Data Fabric clock (for ASICs - # running on a separate clock) - CLK_TYPE_DCEF = 0x2 # Display Controller Engine clock - CLK_TYPE_SOC = 0x3 - CLK_TYPE_MEM = 0x4 - CLK_TYPE_PCIE = 0x5 - CLK_TYPE_VCLK0 = 0x6 - CLK_TYPE_VCLK1 = 0x7 - CLK_TYPE_DCLK0 = 0x8 - CLK_TYPE_DCLK1 = 0x9 - CLK_TYPE__MAX = CLK_TYPE_DCLK1 - -amdsmi_clk_type_t = amdsmi_clk_type -#This enumeration is used to indicate from which part of the device a -# temperature reading should be obtained -class amdsmi_temperature_type (c_int): - TEMPERATURE_TYPE_EDGE = 0 - TEMPERATURE_TYPE_FIRST = TEMPERATURE_TYPE_EDGE - TEMPERATURE_TYPE_JUNCTION = 1 - TEMPERATURE_TYPE_VRAM = 2 - TEMPERATURE_TYPE_HBM_0 = 3 - TEMPERATURE_TYPE_HBM_1 = 4 - TEMPERATURE_TYPE_HBM_2 = 5 - TEMPERATURE_TYPE_HBM_3 = 6 - TEMPERATURE_TYPE_PLX = 7 - TEMPERATURE_TYPE__MAX = TEMPERATURE_TYPE_PLX - -#The values of this enum are used to identify the various firmware -#blocks. -class amdsmi_fw_block_t (c_int): - FW_ID_SMU = 1 - FW_ID_FIRST = FW_ID_SMU - FW_ID_CP_CE = 2 - FW_ID_CP_PFP = 3 - FW_ID_CP_ME = 4 - FW_ID_CP_MEC_JT1 = 5 - FW_ID_CP_MEC_JT2 = 6 - FW_ID_CP_MEC1 = 7 - FW_ID_CP_MEC2 = 8 - FW_ID_RLC = 9 - FW_ID_SDMA0 = 10 - FW_ID_SDMA1 = 11 - FW_ID_SDMA2 = 12 - FW_ID_SDMA3 = 13 - FW_ID_SDMA4 = 14 - FW_ID_SDMA5 = 15 - FW_ID_SDMA6 = 16 - FW_ID_SDMA7 = 17 - FW_ID_VCN = 18 - FW_ID_UVD = 19 - FW_ID_VCE = 20 - FW_ID_ISP = 21 - FW_ID_DMCU_ERAM = 22 # eRAM - FW_ID_DMCU_ISR = 23 # ISR - FW_ID_RLC_RESTORE_LIST_GPM_MEM = 24 - FW_ID_RLC_RESTORE_LIST_SRM_MEM = 25 - FW_ID_RLC_RESTORE_LIST_CNTL = 26 - FW_ID_RLC_V = 27 - FW_ID_MMSCH = 28 - FW_ID_PSP_SYSDRV = 29 - FW_ID_PSP_SOSDRV = 30 - FW_ID_PSP_TOC = 31 - FW_ID_PSP_KEYDB = 32 - FW_ID_DFC = 33 - FW_ID_PSP_SPL = 34 - FW_ID_DRV_CAP = 35 - FW_ID_MC = 36 - FW_ID_PSP_BL = 37 - FW_ID_CP_PM4 = 38 - FW_ID_ASD = 39 - FW_ID_TA_RAS = 40 - FW_ID_XGMI = 41 - FW_ID_RLC_SRLG = 42 - FW_ID_RLC_SRLS = 43 - FW_ID_SMC = 44 - FW_ID_DMCU = 45 - FW_ID__MAX = 46 - -#This structure represents a range (e.g., frequencies or voltages) - -class amdsmi_range_t (Structure): - _fields_ = [ - ('lower_bound', c_uint64), - ('upper_bound', c_uint64), - ] - -class amdsmi_xgmi_info_t (Structure): - _fields_ = [ - ('xgmi_lanes', c_uint8), - ('xgmi_hive_id', c_uint64), - ('xgmi_node_id', c_uint64), - ('index', c_uint32), - ] - -#GPU Capability info - -class gfx (Structure): - _fields_ = [ - ('gfxip_major', c_uint32), - ('gfxip_minor', c_uint32), - ('gfxip_cu_count', c_uint16)] - -class mm (Structure): - _fields_ = [ - ('mm_ip_count', c_uint8), - ('mm_ip_list', c_uint8 * AMDSMI_MAX_MM_IP_COUNT) - ] -class amdsmi_gpu_caps_t (Structure): - _fields_ = [ - ('gfx', gfx), - ('mm', mm), - ('ras_supported', c_bool), - ('max_vf_num', c_uint8), - ('gfx_ip_count', c_uint32), - ('dma_ip_count', c_uint32) - ] - -class amdsmi_vram_info (Structure): - _fields_ = [ - ('vram_total', c_uint32), - ('vram_used', c_uint32), - ] - -class amdsmi_frequency_range_t(Structure): - _fields_ = [ - ('supported_freq_range', amdsmi_range_t), - ('current_freq_range', amdsmi_range_t), - ] - -class bdf_submodule (Structure): - _fields_ = [ - ('function_number', c_uint64, 3), - ('device_number', c_uint64, 5), - ('bus_number', c_uint64, 8), - ('domain_number', c_uint64, 48), - ] -class amdsmi_bdf_t (Union): - _fields_ = [ - ('bdf_submodule', bdf_submodule), - ('as_uint', c_uint64) - ] - -class amdsmi_power_cap_info_t (Structure): - _fields_ = [ - ('power_cap', c_uint64), - ('default_power_cap', c_uint64), - ('dpm_cap', c_uint64), - ('min_power_cap', c_uint64), - ('max_power_cap', c_uint64) - ] - -class amdsmi_vbios_info_t (Structure): - _fields_ =[ - ('name', c_char * AMDSMI_MAX_STRING_LENGTH), - ('vbios_version', c_uint32), - ('build_date', c_char * AMDSMI_MAX_DATE_LENGTH), - ('part_number', c_char * AMDSMI_MAX_STRING_LENGTH), - ('vbios_version_string', c_char * AMDSMI_NORMAL_STRING_LENGTH) - ] - -class fw_info_list (Structure): - _fields_ = [ - ('fw_id', amdsmi_fw_block_t), - ('fw_version', c_uint64) - ] -class amdsmi_fw_info_t (Structure): - _fields_ =[ - ('num_fw_info', c_uint8), - ('fw_info_list', fw_info_list * amdsmi_fw_block_t.FW_ID__MAX) - ] - -class amdsmi_asic_info_t (Structure): - _fields_ = [ - ('market_name', c_char * AMDSMI_MAX_STRING_LENGTH), - ('family', c_uint32), - ('vendor_id', c_uint32), - ('subvendor_id', c_uint32), - ('device_id', c_uint64), - ('rev_id', c_uint32), - ('asic_serial', c_char * AMDSMI_NORMAL_STRING_LENGTH) - ] - -class amdsmi_board_info (Structure): - _fields_ = [ - ('serial_number', c_uint64), - ('is_master', c_bool), - ('model_number', c_char * AMDSMI_NORMAL_STRING_LENGTH), - ('product_serial', c_char * AMDSMI_NORMAL_STRING_LENGTH), - ('fru_id', c_char * AMDSMI_NORMAL_STRING_LENGTH), - ('product_name', c_char * AMDSMI_PRODUCT_NAME_LENGTH), - ('manufacturer_name', c_char * AMDSMI_NORMAL_STRING_LENGTH), - ] - -class amdsmi_temperature_t (Structure): - _fields_ = [ - ('cur_temp', c_uint32) - ] - -class amdsmi_temperature_limit_t (Structure): - _fields_ = [ - ('limit', c_uint32) - ] - -class amdsmi_power_limit_t (Structure): - _fields_ = [ - ('limit', c_uint32) - ] - -class amdsmi_power_measure (Structure): - _fields_ = [ - ('average_socket_power', c_uint32), - ('energy_accumulator', c_uint64), - ('voltage_gfx', c_uint32), - ('voltage_soc', c_uint32), - ('voltage_mem', c_uint32), - ] - -class amdsmi_clk_measure_t (Structure): - _fields_ = [ - ('cur_clk', c_uint32), - ('avg_clk', c_uint32), - ('min_clk', c_uint32), - ('max_clk', c_uint32) - ] - -class amdsmi_engine_usage_t (Structure): - _fields_ = [ - ('gfx_activity', c_uint32), - ('umc_activity', c_uint32), - ('mm_activity', c_uint32 * AMDSMI_MAX_MM_IP_COUNT) - ] - -amdsmi_process_handle = c_uint32 - -class memory_usage (Structure): - _fields_ = [ - ('gtt_mem', c_uint64), - ('cpu_mem', c_uint64), - ('vram_mem', c_uint64) - ] - - -class engine_usage (Structure): - _fields_ = [ - ('gfx', c_uint16 * AMDSMI_MAX_MM_IP_COUNT), - ('compute', c_uint16 * AMDSMI_MAX_MM_IP_COUNT), - ('sdma', c_uint16 * AMDSMI_MAX_MM_IP_COUNT), - ('enc', c_uint16 * AMDSMI_MAX_MM_IP_COUNT), - ('dec',c_uint16 * AMDSMI_MAX_MM_IP_COUNT) - ] -class amdsmi_proc_info_t(Structure): - _fields_ = [ - ('name', c_char * AMDSMI_NORMAL_STRING_LENGTH), - ('pid', amdsmi_process_handle), - ('mem', c_uint64), - ('engine_usage', engine_usage), - ('memory_usage', memory_usage), - ('container_name', c_char * AMDSMI_NORMAL_STRING_LENGTH) - - ] -amdsmi_process_info = amdsmi_proc_info_t - -# Guaranteed maximum possible number of supported frequencies -AMDSMI_MAX_NUM_FREQUENCIES = 32 - -# The number of points that make up a voltage-frequency curve definition -AMDSMI_NUM_VOLTAGE_CURVE_POINTS = 3 - -class amdsmi_dev_perf_level_t (c_int): - AMDSMI_DEV_PERF_LEVEL_AUTO = 0 # Performance level is "auto" - AMDSMI_DEV_PERF_LEVEL_FIRST = AMDSMI_DEV_PERF_LEVEL_AUTO - AMDSMI_DEV_PERF_LEVEL_HIGH = 1 # Keep PowerPlay levels "high", regardless of workload - AMDSMI_DEV_PERF_LEVEL_MANUAL = 2 # Only use values defined by manually setting the AMDSMI_CLK_TYPE_SYS speed - AMDSMI_DEV_PERF_LEVEL_STABLE_STD = 3 # Stable power state with profiling clocks - AMDSMI_DEV_PERF_LEVEL_STABLE_PEAK = 4 # Stable power state with peak clocks - AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK = 5 # Stable power state with minimum memory clock - AMDSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK = 6 # Stable power state with minimum system clock - AMDSMI_DEV_PERF_LEVEL_DETERMINISM = 7 # Performance determinism state - AMDSMI_DEV_PERF_LEVEL_LAST = AMDSMI_DEV_PERF_LEVEL_DETERMINISM - AMDSMI_DEV_PERF_LEVEL_UNKNOWN = 0x100 # Unknown performance level - -amdsmi_dev_perf_level = amdsmi_dev_perf_level_t - -class amdsmi_sw_component_t (c_int): - AMDSMI_SW_COMP_FIRST = 0x0 - AMDSMI_SW_COMP_DRIVER = AMDSMI_SW_COMP_FIRST # Driver - AMDSMI_SW_COMP_LAST = AMDSMI_SW_COMP_DRIVER - -amdsmi_event_handle_t = c_uint64 - - -#Event Groups -# Enum denoting an event group. The value of the enum is the -# base value for all the event enums in the group. -class amdsmi_event_group_t (c_int): - AMDSMI_EVNT_GRP_XGMI = 0 # Data Fabric(XGMI) related events - AMDSMI_EVNT_GRP_XGMI_DATA_OUT = 10 # XGMI Outbound data - AMDSMI_EVNT_GRP_INVALID = 0xFFFFFFFF - -# Event types -# Event type enum. Events belonging to a particular event group -# ::amdsmi_event_group_t should begin enumerating at the ::amdsmi_event_group_t -# value for that group. - -class amdsmi_event_type_t (c_int): - AMDSMI_EVNT_FIRST = amdsmi_event_group_t.AMDSMI_EVNT_GRP_XGMI - AMDSMI_EVNT_XGMI_FIRST = amdsmi_event_group_t.AMDSMI_EVNT_GRP_XGMI - AMDSMI_EVNT_XGMI_0_NOP_TX = AMDSMI_EVNT_XGMI_FIRST # NOPs sent to neighbor 0 - AMDSMI_EVNT_XGMI_0_REQUEST_TX = 1 - AMDSMI_EVNT_XGMI_0_RESPONSE_TX = 2 - AMDSMI_EVNT_XGMI_0_BEATS_TX = 3 - AMDSMI_EVNT_XGMI_1_NOP_TX = 4 - AMDSMI_EVNT_XGMI_1_REQUEST_TX = 5 - AMDSMI_EVNT_XGMI_1_RESPONSE_TX = 6 - AMDSMI_EVNT_XGMI_1_BEATS_TX = 7 - AMDSMI_EVNT_XGMI_LAST = 7 - AMDSMI_EVNT_XGMI_DATA_OUT_FIRST = 10 - AMDSMI_EVNT_XGMI_DATA_OUT_0 = 10 - AMDSMI_EVNT_XGMI_DATA_OUT_1 = 11 - AMDSMI_EVNT_XGMI_DATA_OUT_2 = 12 - AMDSMI_EVNT_XGMI_DATA_OUT_3 = 13 - AMDSMI_EVNT_XGMI_DATA_OUT_4 = 14 - AMDSMI_EVNT_XGMI_DATA_OUT_5 = 15 - AMDSMI_EVNT_XGMI_DATA_OUT_LAST = AMDSMI_EVNT_XGMI_DATA_OUT_5 - AMDSMI_EVNT_LAST = AMDSMI_EVNT_XGMI_DATA_OUT_LAST - -class amdsmi_counter_command_t (c_int): - AMDSMI_CNTR_CMD_START = 0 - AMDSMI_CNTR_CMD_STOP = 1 - -class amdsmi_counter_value_t (Structure): - _fields_ = [ - ('value', c_uint64), - ('time_enabled', c_uint64), - ('time_running', c_uint64) - ] - -class amdsmi_evt_notification_type_t (c_int): - AMDSMI_EVT_NOTIF_VMFAULT = 1 # VM page fault - AMDSMI_EVT_NOTIF_FIRST = AMDSMI_EVT_NOTIF_VMFAULT, - AMDSMI_EVT_NOTIF_THERMAL_THROTTLE = 2, - AMDSMI_EVT_NOTIF_GPU_PRE_RESET = 3, - AMDSMI_EVT_NOTIF_GPU_POST_RESET = 4, - AMDSMI_EVT_NOTIF_LAST = AMDSMI_EVT_NOTIF_GPU_POST_RESET - -# function to generate event bitmask from event id -def AMDSMI_EVENT_MASK_FROM_INDEX (i): - return c_ulonglong(1 << (i - 1)) - -MAX_EVENT_NOTIFICATION_MSG_SIZE = 64 - -# Event notification data returned from event notification API -class amdsmi_evt_notification_data_t (Structure): - _fields_ = [ - ('device_handle', c_void_p), # Handler of device that corresponds to the event - ('event', amdsmi_evt_notification_type_t), # Event type - ('message', c_char * MAX_EVENT_NOTIFICATION_MSG_SIZE) # Event message - ] - -# Temperature Metrics. This enum is used to identify various -# temperature metrics. Corresponding values will be in millidegress Celcius. - -class amdsmi_temperature_metric_t (c_int): - AMDSMI_TEMP_CURRENT = 0 # Temperature current value - AMDSMI_TEMP_FIRST = AMDSMI_TEMP_CURRENT - AMDSMI_TEMP_MAX = 1 # Temperature max value - AMDSMI_TEMP_MIN = 2 # Temperature min value - AMDSMI_TEMP_MAX_HYST = 3 # Temperature hysteresis value for max limit (This is an absolute temperature, not a delta) - AMDSMI_TEMP_MIN_HYST = 4 # Temperature hysteresis value for min limit (not a delta) - AMDSMI_TEMP_CRITICAL = 5 # Temperature critical max value, typically greater than corresponding temp_max values. - AMDSMI_TEMP_CRITICAL_HYST = 6 # Temperature hysteresis value for critical limit. (not a delta) - AMDSMI_TEMP_EMERGENCY = 7 # Temperature emergency max value, for chips supporting more than two upper temperature - # limits. Must be equal or greater than corresponding temp_crit values. - AMDSMI_TEMP_EMERGENCY_HYST = 8 # Temperature hysteresis value for emergency limit. (not a delta). - AMDSMI_TEMP_CRIT_MIN = 9 # Temperature critical min value, typically lower than corresponding temperature min values - AMDSMI_TEMP_CRIT_MIN_HYST = 10 # Temperature hysteresis value for critical minimum limit. (not a delta) - AMDSMI_TEMP_OFFSET = 11 # Temperature offset which is added to the temperature reading by the chip. - AMDSMI_TEMP_LOWEST = 12 # Historical minimum temperature. - AMDSMI_TEMP_HIGHEST = 13 # Historical maximum temperature. - AMDSMI_TEMP_LAST = AMDSMI_TEMP_HIGHEST - -class amdsmi_voltage_metric_t (c_int): - AMDSMI_VOLT_CURRENT = 0 # Voltage current value. - AMDSMI_VOLT_FIRST = AMDSMI_VOLT_CURRENT - AMDSMI_VOLT_MAX = 1 # Voltage max value. - AMDSMI_VOLT_MIN_CRIT = 2 # Voltage critical min value. - AMDSMI_VOLT_MIN = 3 # Voltage min value. - AMDSMI_VOLT_MAX_CRIT = 4 # Voltage critical max value. - AMDSMI_VOLT_AVERAGE = 5 # Average voltage. - AMDSMI_VOLT_LOWEST = 6 # Historical minimum voltage. - AMDSMI_VOLT_HIGHEST = 7 # Historical maximum voltage. - AMDSMI_VOLT_LAST = AMDSMI_VOLT_HIGHEST - -# This ennumeration is used to indicate which type of -# voltage reading should be obtained. - -class amdsmi_voltage_type_t (c_int): - AMDSMI_VOLT_TYPE_FIRST = 0 - AMDSMI_VOLT_TYPE_VDDGFX = AMDSMI_VOLT_TYPE_FIRST # Vddgfx GPU voltage - AMDSMI_VOLT_TYPE_LAST = AMDSMI_VOLT_TYPE_VDDGFX - AMDSMI_VOLT_TYPE_INVALID = 0xFFFFFFFF # Invalid type - -# Pre-set Profile Selections. These bitmasks can be AND'd with the -# ::amdsmi_power_profile_status_t.available_profiles returned from -# ::amdsmi_dev_power_profile_presets_get to determine which power profiles -# are supported by the system. - -class amdsmi_power_profile_preset_masks_t (c_int): - AMDSMI_PWR_PROF_PRST_CUSTOM_MASK = 0x1 # Custom Power Profile - AMDSMI_PWR_PROF_PRST_VIDEO_MASK = 0x2 # Video Power Profile - AMDSMI_PWR_PROF_PRST_POWER_SAVING_MASK = 0x4 # Power Saving Profile - AMDSMI_PWR_PROF_PRST_COMPUTE_MASK = 0x8 # Compute Saving Profile - AMDSMI_PWR_PROF_PRST_VR_MASK = 0x10 # VR Power Profile - - # 3D Full Screen Power Profile - AMDSMI_PWR_PROF_PRST_3D_FULL_SCR_MASK = 0x20 - AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT = 0x40 # Default Boot Up Profile - AMDSMI_PWR_PROF_PRST_LAST = AMDSMI_PWR_PROF_PRST_BOOTUP_DEFAULT - - # Invalid power profile - AMDSMI_PWR_PROF_PRST_INVALID = 0xFFFFFFFFFFFFFFFF - -class amdsmi_gpu_block_t (c_int): - AMDSMI_GPU_BLOCK_INVALID = 0x0000000000000000 # Used to indicate an invalid block - AMDSMI_GPU_BLOCK_FIRST = 0x0000000000000001 - - AMDSMI_GPU_BLOCK_UMC = AMDSMI_GPU_BLOCK_FIRST # UMC block - AMDSMI_GPU_BLOCK_SDMA = 0x0000000000000002 # SDMA block - AMDSMI_GPU_BLOCK_GFX = 0x0000000000000004 # GFX block - AMDSMI_GPU_BLOCK_MMHUB = 0x0000000000000008 # MMHUB block - AMDSMI_GPU_BLOCK_ATHUB = 0x0000000000000010 # ATHUB block - AMDSMI_GPU_BLOCK_PCIE_BIF = 0x0000000000000020 # PCIE_BIF block - AMDSMI_GPU_BLOCK_HDP = 0x0000000000000040 # HDP block - AMDSMI_GPU_BLOCK_XGMI_WAFL = 0x0000000000000080 # XGMI block - AMDSMI_GPU_BLOCK_DF = 0x0000000000000100 # DF block - AMDSMI_GPU_BLOCK_SMN = 0x0000000000000200 # SMN block - AMDSMI_GPU_BLOCK_SEM = 0x0000000000000400 # SEM block - AMDSMI_GPU_BLOCK_MP0 = 0x0000000000000800 # MP0 block - AMDSMI_GPU_BLOCK_MP1 = 0x0000000000001000 # MP1 block - AMDSMI_GPU_BLOCK_FUSE = 0x0000000000002000 # Fuse block - - AMDSMI_GPU_BLOCK_LAST = AMDSMI_GPU_BLOCK_FUSE # The highest bit position for supported blocks - AMDSMI_GPU_BLOCK_RESERVED = 0x8000000000000000 - -class amdsmi_ras_err_state_t (c_int): - AMDSMI_RAS_ERR_STATE_NONE = 0 # No current errors - AMDSMI_RAS_ERR_STATE_DISABLED = 1 # ECC is disabled - AMDSMI_RAS_ERR_STATE_PARITY = 2 # ECC errors present, but type unknown - AMDSMI_RAS_ERR_STATE_SING_C = 3 # Single correctable error - AMDSMI_RAS_ERR_STATE_MULT_UC = 4 # Multiple uncorrectable errors - AMDSMI_RAS_ERR_STATE_POISON = 5 # Firmware detected error and isolated page. Treat as uncorrectable. - AMDSMI_RAS_ERR_STATE_ENABLED = 6 # ECC is enabled - - AMDSMI_RAS_ERR_STATE_LAST = AMDSMI_RAS_ERR_STATE_ENABLED - AMDSMI_RAS_ERR_STATE_INVALID = 0xFFFFFFFF - -class amdsmi_memory_type_t (c_int): - AMDSMI_MEM_TYPE_FIRST = 0 - - AMDSMI_MEM_TYPE_VRAM = AMDSMI_MEM_TYPE_FIRST # VRAM memory - AMDSMI_MEM_TYPE_VIS_VRAM = 1 # VRAM memory that is visible - AMDSMI_MEM_TYPE_GTT = 2 # GTT memory - - AMDSMI_MEM_TYPE_LAST = AMDSMI_MEM_TYPE_GTT - -class amdsmi_freq_ind_t (c_int): - AMDSMI_FREQ_IND_MIN = 0 # Index used for the minimum frequency value - AMDSMI_FREQ_IND_MAX = 1 # Index used for the maximum frequency value - AMDSMI_FREQ_IND_INVALID = 0xFFFFFFFF # An invalid frequency index - -class amdsmi_xgmi_status_t (c_int): - AMDSMI_XGMI_STATUS_NO_ERRORS = 0 - AMDSMI_XGMI_STATUS_ERROR = 1 - AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS = 2 - -amdsmi_bit_field_t = c_uint64() -amdsmi_bit_field = amdsmi_bit_field_t - -# Reserved Memory Page States -class amdsmi_memory_page_status_t (c_int): - AMDSMI_MEM_PAGE_STATUS_RESERVED = 0 # Reserved. This gpu page is reserved and not available for use - AMDSMI_MEM_PAGE_STATUS_PENDING = 1 # Pending. This gpu page is marked as bad - # and will be marked reserved at the next window. - AMDSMI_MEM_PAGE_STATUS_UNRESERVABLE = 2 # Unable to reserve this page - -# Types for IO Link -class AMDSMI_IO_LINK_TYPE (c_int): - AMDSMI_IOLINK_TYPE_UNDEFINED = 0 # unknown type. - AMDSMI_IOLINK_TYPE_PCIEXPRESS = 1 # PCI Express - AMDSMI_IOLINK_TYPE_XGMI = 2 # XGMI - AMDSMI_IOLINK_TYPE_NUMIOLINKTYPES = 3 # Number of IO Link types - AMDSMI_IOLINK_TYPE_SIZE = 0xFFFFFFFF # Max of IO Link types - -# The utilization counter type -class AMDSMI_UTILIZATION_COUNTER_TYPE (c_int): - AMDSMI_UTILIZATION_COUNTER_FIRST = 0 # GFX Activity - AMDSMI_COARSE_GRAIN_GFX_ACTIVITY = AMDSMI_UTILIZATION_COUNTER_FIRST - AMDSMI_COARSE_GRAIN_MEM_ACTIVITY = 1 # Memory Activity - AMDSMI_UTILIZATION_COUNTER_LAST = AMDSMI_COARSE_GRAIN_MEM_ACTIVITY - -# Reserved Memory Page Record -class amdsmi_utilization_counter_t (Structure): - _fields_=[ - ('page_address', c_uint64), - ('page_size', c_uint64), - ('status', amdsmi_memory_page_status_t), - ] - -# Number of possible power profiles that a system could support -AMDSMI_MAX_NUM_POWER_PROFILES = (sizeof(amdsmi_bit_field_t) * 8) - -# This structure contains information about which power profiles are -# supported by the system for a given device, and which power profile is currently active. - -class amdsmi_power_profile_status_t (Structure): - _fields_ = [ - ('available_profiles', c_uint64), # Which profiles are supported by this system - ('current', amdsmi_power_profile_preset_masks_t), # Which power profile is currently active - ('num_profiles', c_uint32) # How many power profiles are available - ] - -# This structure holds information about clock frequencies. -class amdsmi_frequencies_t (Structure): - _fields_ = [ - ('num_supported', c_uint32), # The number of supported frequencies - ('current', c_uint32), # The current frequency index - ('frequency', c_uint64 * AMDSMI_MAX_NUM_FREQUENCIES), # List of frequencies. - # Only the first num_supported frequencies are valid. - ] - -#This structure holds information about the possible PCIe -#bandwidths. Specifically, the possible transfer rates and their -#associated numbers of lanes are stored here. -class amdsmi_pcie_bandwidth_t (Structure): - _fields_ = [ - ('transfer_rate', amdsmi_frequencies_t), # Transfer rates (T/s) that are possible - ('lanes', c_uint32 * AMDSMI_MAX_NUM_FREQUENCIES), # List of lanes for corresponding transfer rate. - # Only the first num_supported bandwidths are valid. - ] - -# This structure holds version information. - -class amdsmi_version_t (Structure): - _fields_ = [ - ('major', c_uint32), # Major version - ('minor', c_uint32), # Minor version - ('patch', c_uint32), # Patch, build or stepping version - ('build', c_char_p), # Build string - ] - -# This structure represents a point on the frequency-voltage plane. -class amdsmi_od_vddc_point_t (Structure): - _fields_ = [ - ('frequency', c_uint64), # Frequency coordinate (in Hz) - ('voltage', c_uint64), # Voltage coordinate (in mV) - ] - -# This structure holds 2 ::amdsmi_range_t's, one for frequency and one for -# voltage. These 2 ranges indicate the range of possible values for the -# corresponding ::amdsmi_od_vddc_point_t. - -class amdsmi_freq_volt_region_t (Structure): - _fields_ = [ - ('freq_range', amdsmi_range_t), # The frequency range for this VDDC Curve point - ('volt_range', amdsmi_range_t), # The voltage range for this VDDC Curve point - ] - -# Array of ::AMDSMI_NUM_VOLTAGE_CURVE_POINTS ::amdsmi_od_vddc_point_t's that -# make up the voltage frequency curve points. - -class amdsmi_od_volt_curve_t (Structure): - _fields_ = [ - # Array of ::AMDSMI_NUM_VOLTAGE_CURVE_POINTS ::amdsmi_od_vddc_point_t's that - # make up the voltage frequency curve points. - ('vc_points', amdsmi_od_vddc_point_t * AMDSMI_NUM_VOLTAGE_CURVE_POINTS) - ] - -# This structure holds the frequency-voltage values for a device. -class amdsmi_od_volt_freq_data_t (Structure): - _fields_ = [ - ('curr_sclk_range', amdsmi_range_t), # The current SCLK frequency range - ('curr_mclk_range', amdsmi_range_t), # The current MCLK frequency range; (upper bound only) - ('sclk_freq_limits', amdsmi_range_t), # The range possible of SCLK values - ('mclk_freq_limits', amdsmi_range_t), # The range possible of MCLK values - ('curve', amdsmi_od_volt_curve_t), # The current voltage curve - ('num_regions', c_uint32), # The number of voltage curve regions - ] - -# The following structures hold the gpu metrics values for a device. -# Size and version information of metrics data - -class amd_metrics_table_header_t (Structure): - _fields_ = [ - ('structure_size', c_uint16), - ('format_revision', c_ubyte), - ('content_revision', c_ubyte), - ] - -AMDSMI_GPU_METRICS_API_FORMAT_VER = 1 -AMDSMI_GPU_METRICS_API_CONTENT_VER_1 = 1 -AMDSMI_GPU_METRICS_API_CONTENT_VER_2 = 2 -AMDSMI_GPU_METRICS_API_CONTENT_VER_3 = 3 - -AMDSMI_NUM_HBM_INSTANCES = 4 # This should match NUM_HBM_INSTANCES -CENTRIGRADE_TO_MILLI_CENTIGRADE = 1000 # Unit conversion factor for HBM temperatures - -class amdsmi_gpu_metrics_t (Structure): - _fields_ = [ - ('common_header', amd_metrics_table_header_t), - # Temperature - ('temperature_edge', c_uint16), - ('temperature_hotspot', c_uint16), - ('temperature_mem', c_uint16), - ('temperature_vrgfx', c_uint16), - ('temperature_vrsoc', c_uint16), - ('temperature_vrmem', c_uint16), - # Utilization - ('average_gfx_activity', c_uint16), - ('average_umc_activity', c_uint16), - ('average_mm_activity', c_uint16), - # Power/Energy - ('average_socket_power', c_uint16), - ('energy_accumulator', c_uint64), - # Driver attached timestamp (in ns) - ('system_clock_counter', c_uint64), - # Average clocks - ('average_gfxclk_frequency', c_uint16), - ('average_socclk_frequency', c_uint16), - ('average_uclk_frequency', c_uint16), - ('average_vclk0_frequency', c_uint16), - ('average_dclk0_frequency', c_uint16), - ('average_vclk1_frequency', c_uint16), - ('average_dclk1_frequency', c_uint16), - # Current clocks - ('current_gfxclk', c_uint16), - ('current_socclk', c_uint16), - ('current_uclk', c_uint16), - ('current_vclk0', c_uint16), - ('current_dclk0', c_uint16), - ('current_vclk1', c_uint16), - ('current_dclk1', c_uint16), - # Throttle status - ('throttle_status', c_uint32), - # Fans - ('current_fan_speed', c_uint16), - # Link width/speed - ('pcie_link_width', c_uint16), # v1 mod.(8->16) - ('pcie_link_speed', c_uint16), # in 0.1 GT/s; v1 mod. (8->16) - ('padding', c_uint16), # new in v1 - ('gfx_activity_acc', c_uint32), # new in v1 - ('mem_actvity_acc', c_uint32), # new in v1 - ('temperature_hbm', c_uint16 * AMDSMI_NUM_HBM_INSTANCES) # new in v1 - ] - -# This structure holds error counts. -class amdsmi_error_count_t (Structure): - _fields_ = [ - ('correctable_count', c_uint64), # Accumulated correctable errors - ('uncorrectable_count', c_uint64) # Accumulated uncorrectable errors - ] - -# This structure holds pcie info. -class amdsmi_pcie_info_t (Structure): - _fields_ = [ - ('pcie_lanes', c_uint16), - ('pcie_speed', c_uint16), - ] - -class amdsmi_process_info_t (Structure): - _fields_ = [ - ('process_id', c_uint32), # Process ID - ('pasid', c_uint32), # PASID - ('vram_usage', c_uint64), # VRAM usage - ('sdma_usage', c_uint64), # SDMA usage in microseconds - ('cu_occupancy', c_uint32), # Compute Unit usage in percent - ] - -# Opaque handle to function-support object -class amdsmi_func_id_iter_handle(Structure): - pass -amdsmi_func_id_iter_handle_t = POINTER(amdsmi_func_id_iter_handle) - -# Place-holder "variant" for functions that have don't have any variants, -# but do have monitors or sensors. - -AMDSMI_DEFAULT_VARIANT = 0xFFFFFFFFFFFFFFFF - -class submodule_union(Union): - _fields_ = [ - ('memory_type', amdsmi_memory_type_t), - ('temp_metric', amdsmi_temperature_metric_t), - ('evnt_type', amdsmi_event_type_t), - ('evnt_group', amdsmi_event_group_t), - ('clk_type', amdsmi_clk_type_t), - ('fw_block', amdsmi_fw_block_t), - ('gpu_block_type', amdsmi_gpu_block_t), - ] -class amdsmi_func_id_value_t (Union): - _fields_ = [ - ('id', c_uint64), - ('name', c_char_p), - ('submodule', submodule_union) - ] - -amd_id = amdsmi_func_id_value_t \ No newline at end of file diff --git a/example/amd_smi_drm_example.cc b/example/amd_smi_drm_example.cc index b4228dc3d9..69d9ef12b0 100644 --- a/example/amd_smi_drm_example.cc +++ b/example/amd_smi_drm_example.cc @@ -259,8 +259,6 @@ int main() { // Get device type. Since the amdsmi is initialized with // AMD_SMI_INIT_AMD_GPUS, the device_type must be AMD_GPU. device_type_t device_type = {}; - std::cout << "Device Handle: " << device_handles[j] << std::endl; - ret = amdsmi_get_device_type(device_handles[j], &device_type); CHK_AMDSMI_RET(ret) if (device_type != AMD_GPU) {