ファイル
rocm-systems/projects/amdsmi/amdsmi_cli/amdsmi_helpers.py
T

425 行
16 KiB
Python
Raw 通常表示 履歴

2023-03-20 13:29:28 -05:00
#!/usr/bin/env python3
2023-03-06 06:20:21 -06:00
#
# Copyright (C) 2023 Advanced Micro Devices. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
import logging
import platform
import sys
2023-03-06 06:20:21 -06:00
import time
from subprocess import run
from subprocess import PIPE, STDOUT
2023-03-06 06:20:21 -06:00
from amdsmi_init import *
from BDF import BDF
class AMDSMIHelpers():
"""Helper functions that aren't apart of the AMDSMI API
Useful for determining platform and device identifiers
Functions:
os_info: tuple ()
"""
def __init__(self) -> None:
self.operating_system = platform.system()
self._is_hypervisor = False
self._is_virtual_os = False
self._is_baremetal = False
self._is_linux = False
self._is_windows = False
if self.operating_system.startswith("Linux"):
self._is_linux = True
logging.debug(f"AMDSMIHelpers: Platform is linux:{self._is_linux}")
output = run(["lscpu"], stdout=PIPE, stderr=STDOUT, encoding="UTF-8").stdout
if "hypervisor" not in output:
2023-03-06 06:20:21 -06:00
self._is_baremetal = True
else:
self._is_virtual_os = True
2023-03-06 06:20:21 -06:00
2023-03-06 06:20:21 -06:00
def os_info(self, string_format=True):
"""Return operating_system and type information ex. (Linux, Baremetal)
params:
string_format (bool) True to return in string format, False to return Tuple
returns:
str or (str, str)
"""
operating_system = ""
if self._is_linux:
operating_system = "Linux"
elif self._is_windows:
operating_system = "Windows"
else:
operating_system = "Unknown"
operating_system_type = ""
if self._is_baremetal:
operating_system_type = "Baremetal"
elif self._is_virtual_os:
operating_system_type = "Guest"
elif self._is_hypervisor:
operating_system_type = "Hypervisor"
else:
operating_system_type = "Unknown"
if string_format:
return f"{operating_system} {operating_system_type}"
else:
return (operating_system, operating_system_type)
def is_virtual_os(self):
return self._is_virtual_os
def is_hypervisor(self):
# Returns True if hypervisor is enabled on the system
return self._is_hypervisor
def is_baremetal(self):
# Returns True if system is baremetal, if system is hypervisor this should return False
return self._is_baremetal
def is_linux(self):
return self._is_linux
def is_windows(self):
return self._is_windows
def get_output_format(self):
"""Returns the output format read from sys.argv
Returns:
str: outputformat
"""
args = sys.argv[1:]
outputformat = "human"
if "--json" in args or "--j" in args:
outputformat = "json"
elif "--csv" in args or "--c" in args:
outputformat = "csv"
return outputformat
2023-03-06 06:20:21 -06:00
def get_gpu_choices(self):
"""Return dictionary of possible GPU choices and string of the output:
Dictionary will be in format: gpus[ID] : (BDF, UUID, Device Handle)
String output will be in format:
"ID:0 | BDF:0000:23:00.0 | UUID:ffffffff-0000-1000-0000-000000000000"
params:
None
return:
(dict, str) : (gpu_choices, gpu_choices_str)
"""
gpu_choices = {}
gpu_choices_str = ""
2023-09-27 02:37:46 -05:00
try:
# amdsmi_get_processor_handles returns the device_handles storted for gpu_id
device_handles = amdsmi_interface.amdsmi_get_processor_handles()
except amdsmi_interface.AmdSmiLibraryException as e:
if e.err_code in (amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NOT_INIT,
amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_DRIVER_NOT_LOADED):
logging.error('Unable to get device choices, driver not initialized (amdgpu not found in modules)')
sys.exit(-1)
else:
raise e
2023-03-06 06:20:21 -06:00
for gpu_id, device_handle in enumerate(device_handles):
bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(device_handle)
uuid = amdsmi_interface.amdsmi_get_gpu_device_uuid(device_handle)
2023-03-06 06:20:21 -06:00
gpu_choices[str(gpu_id)] = {
"BDF": bdf,
"UUID": uuid,
"Device Handle": device_handle,
}
2023-09-24 02:54:06 -05:00
gpu_choices_str += f"ID:{gpu_id} | BDF:{bdf} | UUID:{uuid}\n"
# Add the all option to the gpu_choices
gpu_choices["all"] = "all"
gpu_choices_str += " all | Selects all devices\n"
2023-03-06 06:20:21 -06:00
return (gpu_choices, gpu_choices_str)
def get_device_handles_from_gpu_selections(self, gpu_selections, gpu_choices=None):
"""Convert provided gpu_selections to device_handles
Args:
gpu_selections (list[str]): This will be the GPU ID, BDF, or UUID:
ex: ID:0 | BDF:0000:23:00.0 | UUID:ffffffff-0000-1000-0000-000000000000
gpu_choices (dict{gpu_choices}): This is a dictionary of the possible gpu_choices
Returns:
(True, list[device_handles]): Returns a list of all the gpu_selections converted to
amdsmi device_handles
(False, str): Return False, and the first input that failed to be converted
"""
2023-09-24 02:54:06 -05:00
if gpu_selections == ["all"]:
return (True, amdsmi_interface.amdsmi_get_processor_handles())
2023-03-06 06:20:21 -06:00
if isinstance(gpu_selections, str):
gpu_selections = [gpu_selections]
if gpu_choices is None:
gpu_choices = self.get_gpu_choices()[0]
selected_device_handles = []
for gpu_selection in gpu_selections:
valid_gpu_choice = False
for gpu_id, gpu_info in gpu_choices.items():
bdf = gpu_info['BDF']
uuid = gpu_info['UUID']
device_handle = gpu_info['Device Handle']
# Check if passed gpu is a gpu ID or UUID
if gpu_selection == gpu_id or gpu_selection.lower() == uuid:
selected_device_handles.append(device_handle)
valid_gpu_choice = True
break
else: # Check if gpu passed is a BDF object
try:
if BDF(gpu_selection) == BDF(bdf):
selected_device_handles.append(device_handle)
valid_gpu_choice = True
break
except Exception:
# Ignore exception when checking if the gpu_choice is a BDF
pass
if not valid_gpu_choice:
logging.debug(f"AMDSMIHelpers.get_device_handles_from_gpu_selections - Unable to convert {gpu_selection}")
return False, gpu_selection
return True, selected_device_handles
2023-03-28 15:32:17 -05:00
def handle_gpus(self, args, logger, subcommand):
"""This function will run execute the subcommands based on the number
of gpus passed in via args.
params:
args - argparser args to pass to subcommand
logger (AMDSMILogger) - Logger to print out output
subcommand (AMDSMICommands) - Function that can handle multiple gpus
return:
tuple(bool, device_handle) :
bool - True if executed subcommand for multiple devices
device_handle - Return the device_handle if the list of devices is a length of 1
(handled_multiple_gpus, device_handle)
"""
if isinstance(args.gpu, list):
if len(args.gpu) > 1:
for device_handle in args.gpu:
# Handle multiple_devices to print all output at once
subcommand(args, multiple_devices=True, gpu=device_handle)
2023-04-21 15:10:38 -05:00
logger.print_output(multiple_device_enabled=True)
2023-03-28 15:32:17 -05:00
return True, args.gpu
elif len(args.gpu) == 1:
args.gpu = args.gpu[0]
return False, args.gpu
else:
raise IndexError("args.gpu should not be an empty list")
else:
return False, args.gpu
2023-04-21 08:02:53 -05:00
def handle_watch(self, args, subcommand, logger):
2023-03-06 06:20:21 -06:00
"""This function will run the subcommand multiple times based
on the passed watch, watch_time, and iterations passed in.
params:
args - argparser args to pass to subcommand
subcommand (AMDSMICommands) - Function that can handle
watching output (Currently: metric & process)
2023-04-21 08:02:53 -05:00
logger (AMDSMILogger) - Logger for accessing config values
2023-03-06 06:20:21 -06:00
return:
Nothing
"""
# Set the values for watching as the args will cleared
watch = args.watch
watch_time = args.watch_time
iterations = args.iterations
# Set the args values to None so we don't loop recursively
args.watch = None
args.watch_time = None
args.iterations = None
2023-04-21 08:02:53 -05:00
# Set the signal handler to flush a delmiter to file if the format is json
print("'CTRL' + 'C' to stop watching output:")
2023-03-06 06:20:21 -06:00
if watch_time: # Run for set amount of time
iterations_ran = 0
end_time = time.time() + watch_time
while time.time() <= end_time:
subcommand(args, watching_output=True)
# Handle iterations limit
iterations_ran += 1
2023-04-21 08:02:53 -05:00
if iterations is not None:
if iterations <= iterations_ran:
2023-03-06 06:20:21 -06:00
break
time.sleep(watch)
2023-04-21 08:02:53 -05:00
elif iterations is not None: # Run for a set amount of iterations
2023-03-06 06:20:21 -06:00
for iteration in range(iterations):
subcommand(args, watching_output=True)
if iteration == iterations - 1: # Break on iteration completion
break
time.sleep(watch)
else: # Run indefinitely as watch_time and iterations are not set
while True:
subcommand(args, watching_output=True)
time.sleep(watch)
return 1
def get_gpu_id_from_device_handle(self, input_device_handle):
"""Get the gpu index from the device_handle.
amdsmi_get_processor_handles() returns the list of device_handles in order of gpu_index
2023-03-06 06:20:21 -06:00
"""
device_handles = amdsmi_interface.amdsmi_get_processor_handles()
2023-03-06 06:20:21 -06:00
for gpu_index, device_handle in enumerate(device_handles):
if input_device_handle.value == device_handle.value:
return gpu_index
raise IndexError("Unable to find gpu ID from device_handle")
def get_amd_gpu_bdfs(self):
"""Return a list of GPU BDFs visibile to amdsmi
Returns:
list[BDF]: List of GPU BDFs
"""
gpu_bdfs = []
device_handles = amdsmi_interface.amdsmi_get_processor_handles()
2023-03-06 06:20:21 -06:00
for device_handle in device_handles:
bdf = amdsmi_interface.amdsmi_get_gpu_device_bdf(device_handle)
2023-03-06 06:20:21 -06:00
gpu_bdfs.append(bdf)
return gpu_bdfs
def is_amd_device(self, device_handle):
""" Return whether the specified device is an AMD device or not
param device: DRM device identifier
"""
# Get card vendor id
asic_info = amdsmi_interface.amdsmi_get_gpu_asic_info(device_handle)
2023-03-06 06:20:21 -06:00
return asic_info['vendor_id'] == AMD_VENDOR_ID
2023-03-28 15:32:17 -05:00
def get_perf_levels(self):
perf_levels_str = [clock.name for clock in amdsmi_interface.AmdSmiDevPerfLevel]
perf_levels_int = list(set(clock.value for clock in amdsmi_interface.AmdSmiDevPerfLevel))
return perf_levels_str, perf_levels_int
def get_compute_partition_types(self):
compute_partitions_str = [partition.name for partition in amdsmi_interface.AmdSmiComputePartitionType]
if 'INVALID' in compute_partitions_str:
compute_partitions_str.remove('INVALID')
return compute_partitions_str
def get_memory_partition_types(self):
memory_partitions_str = [partition.name for partition in amdsmi_interface.AmdSmiMemoryPartitionType]
if 'UNKNOWN' in memory_partitions_str:
memory_partitions_str.remove('UNKNOWN')
return memory_partitions_str
def get_clock_types(self):
clock_types_str = [clock.name for clock in amdsmi_interface.AmdSmiClkType]
clock_types_int = list(set(clock.value for clock in amdsmi_interface.AmdSmiClkType))
return clock_types_str, clock_types_int
def validate_clock_type(self, input_clock_type):
valid_clock_types_str, valid_clock_types_int = self.get_clock_types()
valid_clock_input = False
if isinstance(input_clock_type, str):
for clock_type in valid_clock_types_str:
if input_clock_type.lower() == clock_type.lower():
input_clock_type = clock_type # Set input_clock_type to enum value in AmdSmiClkType
valid_clock_input = True
break
elif isinstance(input_clock_type, int):
if input_clock_type in valid_clock_types_int:
input_clock_type = amdsmi_interface.AmdSmiClkType(input_clock_type)
valid_clock_input = True
return valid_clock_input, input_clock_type
2023-03-28 15:32:17 -05:00
def confirm_out_of_spec_warning(self, auto_respond=False):
""" Print the warning for running outside of specification and prompt user to accept the terms.
@param auto_respond: Response to automatically provide for all prompts
"""
print('''
******WARNING******\n
Operating your AMD GPU outside of official AMD specifications or outside of
factory settings, including but not limited to the conducting of overclocking,
over-volting or under-volting (including use of this interface software,
even if such software has been directly or indirectly provided by AMD or otherwise
affiliated in any way with AMD), may cause damage to your AMD GPU, system components
and/or result in system failure, as well as cause other problems.
DAMAGES CAUSED BY USE OF YOUR AMD GPU OUTSIDE OF OFFICIAL AMD SPECIFICATIONS OR
OUTSIDE OF FACTORY SETTINGS ARE NOT COVERED UNDER ANY AMD PRODUCT WARRANTY AND
MAY NOT BE COVERED BY YOUR BOARD OR SYSTEM MANUFACTURER'S WARRANTY.
Please use this utility with caution.
''')
if not auto_respond:
user_input = input('Do you accept these terms? [y/n] ')
else:
user_input = auto_respond
if user_input in ['y', 'Y', 'yes', 'Yes', 'YES']:
return
else:
sys.exit('Confirmation not given. Exiting without setting value')
def is_valid_profile(self, profile):
profile_presets = amdsmi_interface.amdsmi_wrapper.amdsmi_power_profile_preset_masks_t__enumvalues
if profile in profile_presets:
return True, profile_presets[profile]
else:
return False, profile_presets.values()
2023-04-21 08:02:53 -05:00
def convert_bytes_to_readable(self, bytes_input):
for unit in ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"]:
if abs(bytes_input) < 1024:
return f"{bytes_input:3.1f} {unit}"
bytes_input /= 1024
return f"{bytes_input:.1f} YB"