[SWDEV-488276] Add partition 2.0 functionality (#44)

Changes:
* CLI:
  - Updated amd-smi partition
  - Updated amd-smi partition -c
  - Updated amd-smi partition -m
  - Updated amd-smi partition -a
  - Updated amd-smi set -M <NPS1/NPS2/NPS4/NPS8>
  - Updated amd-smi set -C <SPX/DPX/QPX/TPX/CPX>
  - Updated amd-smi set -C <ACCELERATOR_TYPE> or <PROFILE_INDEX>
    Where PROFILE_INDEX = available ACCELERATOR_TYPES
  - Updated amd-smi set --help, now includes more detail for
    amd-smi set -C <ACCELERATOR_TYPE> or <PROFILE_INDEX>

* API:
  - Added amdsmi_get_gpu_memory_partition_config
  - Added amdsmi_set_gpu_memory_partition_mode
  - Added amdsmi_get_gpu_accelerator_partition_profile_config
  - Updated amdsmi_get_gpu_accelerator_partition_profile_config
  - Added amdsmi_set_gpu_accelerator_partition_profile

Signed-off-by: Charis Poag <Charis.Poag@amd.com>

[ROCm/amdsmi commit: c1cd2b46ef]
Этот коммит содержится в:
Poag, Charis
2025-01-15 20:28:45 -06:00
коммит произвёл Maisam Arif
родитель 8f203f8bca
Коммит fa81bcb513
18 изменённых файлов: 2637 добавлений и 447 удалений
+229 -136
Просмотреть файл
@@ -4156,14 +4156,35 @@ class AMDSMICommands():
self.logger.store_output(args.gpu, 'perfdeterminism', f"Successfully enabled performance determinism and set GFX clock frequency to {args.perf_determinism}")
if args.compute_partition:
compute_partition = amdsmi_interface.AmdSmiComputePartitionType[args.compute_partition]
try:
amdsmi_interface.amdsmi_set_gpu_compute_partition(args.gpu, compute_partition)
(accelerator_set_choices, accelerator_profiles) = self.helpers.get_accelerator_choices_types_indices()
logging.debug("args.compute_partition: %s; Accelerator_set_choices: %s", str(args.compute_partition), str(json.dumps(accelerator_set_choices, indent=4)))
if args.compute_partition in accelerator_profiles['profile_types']:
compute_partition = amdsmi_interface.AmdSmiComputePartitionType[args.compute_partition]
index = accelerator_profiles['profile_types'].index(args.compute_partition)
attempted_to_set = f"Attempted to set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]} on {gpu_string}"
amdsmi_interface.amdsmi_set_gpu_compute_partition(args.gpu, compute_partition)
self.logger.store_output(args.gpu, 'accelerator_partition', f"Successfully set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]})")
elif args.compute_partition in accelerator_profiles['profile_indices']:
compute_partition = int(args.compute_partition)
index = accelerator_profiles['profile_indices'].index(args.compute_partition)
attempted_to_set = f"Attempted to set accelerator partition to {accelerator_profiles['profile_types'][int(index)]} (profile #{args.compute_partition}) on {gpu_string}"
amdsmi_interface.amdsmi_set_gpu_accelerator_partition_profile(args.gpu, compute_partition)
self.logger.store_output(args.gpu, 'accelerator_partition', f"Successfully set accelerator partition to {accelerator_profiles['profile_types'][int(index)]} (profile #{args.compute_partition})")
else:
raise ValueError(f"Invalid accelerator configuration {args.compute_partition} on {gpu_string}")
except amdsmi_exception.AmdSmiLibraryException as e:
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
raise PermissionError('Command requires elevation') from e
raise ValueError(f"Unable to set compute partition to {args.compute_partition} on {gpu_string}") from e
self.logger.store_output(args.gpu, 'computepartition', f"Successfully set compute partition to {args.compute_partition}")
elif e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE:
print(f"\n{attempted_to_set}\n"
f"\n[AMDSMI_STATUS_SETTING_UNAVAILABLE] Please check amd-smi partition --memory --accelerator for available profiles.\n"
"Users may need to switch memory partition to another mode in order to enable the desired accelerator partition.\n")
raise ValueError(f"[AMDSMI_STATUS_SETTING_UNAVAILABLE] Unable to set accelerator partition to {args.compute_partition} on {gpu_string}") from e
else:
raise ValueError(f"Unable to set accelerator partition to {args.compute_partition} on {gpu_string}") from e
if args.memory_partition:
lock = multiprocessing.Lock()
lock.acquire()
@@ -4172,49 +4193,18 @@ class AMDSMICommands():
# Info used if AMDSMI_STATUS_INVAL is caught & to set progress bar #
####################################################################
try:
memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(args.gpu) # this info likely actually comes from different apis than used here
memory_dict = {'caps': "N/A", 'current': "N/A"}
memory_partition_config = amdsmi_interface.amdsmi_get_gpu_memory_partition_config(args.gpu)
memory_dict['caps'] = str(memory_partition_config['partition_caps']).replace("]", "").replace("[", "").replace("\'", "").replace(" ", "")
memory_dict['current'] = memory_partition_config['mp_mode']
except amdsmi_exception.AmdSmiLibraryException as e:
memory_partition = "N/A"
logging.debug("Failed to get current memory partition for GPU %s | %s", gpu_id, e.get_error_info())
try:
mem_caps_str = "N/A"
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(args.gpu)
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
mem_caps = temp_mem_caps.nps_cap_mask
if temp_mem_caps.amdsmi_nps_flags_t == None:
mem_caps_list = []
if mem_caps & 1 == 1:
mem_caps_list.append("NPS1")
if mem_caps & 2 == 2:
mem_caps_list.append("NPS2")
if mem_caps & 4 == 4:
mem_caps_list.append("NPS4")
if mem_caps & 8 == 8:
mem_caps_list.append("NPS8")
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "")
else:
mem_caps = temp_mem_caps.amdsmi_nps_flags_t
mem_caps_list = []
if mem_caps.nps1_cap == 1:
mem_caps_list.append("NPS1")
if mem_caps.nps2_cap == 1:
mem_caps_list.append("NPS2")
if mem_caps.nps4_cap == 1:
mem_caps_list.append("NPS4")
if mem_caps.nps8_cap == 1:
mem_caps_list.append("NPS8")
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
if mem_caps_str == "":
mem_caps_str = "N/A"
except amdsmi_exception.AmdSmiLibraryException as e:
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
memory_dict = {'caps': mem_caps_str, 'current': memory_partition}
###############################################################
# memory partition set starts here #
###############################################################
showProgressBar = False
if ((str(memory_dict['current']) != "N/A") and (str(args.memory_partition) in mem_caps_str)
if ((str(memory_dict['current']) != "N/A") and (str(args.memory_partition) in memory_dict['caps'])
and ((str(memory_dict['current']) != str(args.memory_partition)))):
showProgressBar = True # Only show progress bar if
# 1) Device can set memory partition modes
@@ -4259,7 +4249,7 @@ class AMDSMICommands():
raise PermissionError('Command requires elevation') from e
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
out = f"[AMDSMI_STATUS_INVAL] Unable to set memory partition to {args.memory_partition} on {gpu_string}"
print(f"Valid Memory partition Modes: {mem_caps_str}\n")
print(f"Valid Memory partition Modes: {memory_dict['caps']}\n")
self.logger.store_output(args.gpu, 'memory_partition', out)
self.logger.print_output()
self.logger.clear_multiple_devices_ouput()
@@ -5711,15 +5701,21 @@ class AMDSMICommands():
if accelerator:
args.accelerator = accelerator
###########################################
# amd-smi partition (no args) #
###########################################
# if no args are present, then everything should be displayed
if not args.current and not args.memory and not args.accelerator:
args.current = True
args.memory = True
args.accelerator = True
###########################################
# amd-smi partition --current #
###########################################
if args.current:
self.logger.table_header = ''.rjust(7)
current_header = "GPU_ID".ljust(13) + \
current_header = "GPU_ID".ljust(8) + \
"MEMORY".ljust(8) + \
"ACCELERATOR_TYPE".ljust(18) + \
"ACCELERATOR_PROFILE_INDEX".ljust(27) + \
@@ -5733,11 +5729,11 @@ class AMDSMICommands():
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
profile_type = partition_dict['partition_profile']['profile_type']
profile_index = partition_dict['partition_profile']['profile_index']
partition_id = partition_dict['partition_id']
partition_id = str(partition_dict['partition_id']).replace("[", "").replace("]", "").replace(" ", "")
except amdsmi_exception.AmdSmiLibraryException as e:
profile_type = "N/A"
profile_index = "N/A"
partition_id = "N/A"
partition_id = "0"
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
try:
current_mem_cap = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu)
@@ -5756,65 +5752,52 @@ class AMDSMICommands():
tabular_output.append(tabular_output_dict)
self.logger.multiple_device_output = tabular_output
self.logger.table_title = "CURRENT_PARTITION"
self.logger.print_output(multiple_device_enabled=True, tabular=True)
self.logger.table_title = "\nCURRENT_PARTITION"
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
self.logger.clear_multiple_devices_ouput()
###########################################
# amd-smi partition --memory #
###########################################
if args.memory:
tabular_output = []
self.logger.table_header = ''.rjust(7)
current_header = "GPU_ID".ljust(8) + \
"MEMORY_PARTITION_CAPS".ljust(23) + \
"CURRENT_MEMORY_PARTITION".ljust(26)
self.logger.table_header = current_header + self.logger.table_header.strip()
for gpu in args.gpu:
gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu)
mem_caps_str = "N/A"
current_memory_partition = "N/A"
try:
memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu) # this info likely actually comes from different apis than used here
memory_partition_config = amdsmi_interface.amdsmi_get_gpu_memory_partition_config(gpu)
mem_caps_str = str(memory_partition_config['partition_caps']).replace("]", "").replace("[", "").replace("\'", "").replace(" ", "")
current_memory_partition = memory_partition_config['mp_mode']
except amdsmi_exception.AmdSmiLibraryException as e:
memory_partition = "N/A"
logging.debug("Failed to get current memory partition for GPU %s | %s", gpu_id, e.get_error_info())
try:
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
if temp_mem_caps.amdsmi_nps_flags_t == None:
mem_caps = temp_mem_caps.nps_cap_mask
mem_caps_list = []
if mem_caps & 1 == 1:
mem_caps_list.append("NPS1")
if mem_caps & 2 == 2:
mem_caps_list.append("NPS2")
if mem_caps & 4 == 4:
mem_caps_list.append("NPS4")
if mem_caps & 8 == 8:
mem_caps_list.append("NPS8")
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "")
else:
mem_caps = temp_mem_caps.amdsmi_nps_flags_t
mem_caps_list = []
if mem_caps.nps1_cap == 1:
mem_caps_list.append("NPS1")
if mem_caps.nps2_cap == 1:
mem_caps_list.append("NPS2")
if mem_caps.nps4_cap == 1:
mem_caps_list.append("NPS4")
if mem_caps.nps8_cap == 1:
mem_caps_list.append("NPS8")
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
if mem_caps_str == "":
mem_caps_str = "N/A"
except amdsmi_exception.AmdSmiLibraryException as e:
mem_caps_str = "N/A"
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
tabular_output_dict = {"gpu_id": gpu_id,
"memory_partition_caps": mem_caps_str,
"current_memory_partition": current_memory_partition}
tabular_output.append(tabular_output_dict)
memory_dict = {'caps': mem_caps_str, 'current': memory_partition}
self.logger.store_output(gpu, 'memory_partition', memory_dict)
self.logger.store_multiple_device_output()
self.logger.print_output(multiple_device_enabled=True)
self.logger.multiple_device_output = tabular_output
self.logger.table_title = "\nMEMORY_PARTITION"
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
self.logger.clear_multiple_devices_ouput()
###########################################
# amd-smi partition --accelerator #
###########################################
if args.accelerator:
self.logger.table_header = ''.rjust(7)
current_header = "GPU_ID".ljust(13) + \
current_header = "GPU_ID".ljust(8) + \
"PROFILE_INDEX".ljust(15) + \
"MEMORY_PARTITION_CAPS".ljust(23) + \
"ACCELERATOR_TYPE".ljust(18) + \
"PARTITION_ID".ljust(14) + \
"PARTITION_ID".ljust(17) + \
"NUM_PARTITIONS".ljust(16) + \
"NUM_RESOURCES".ljust(15) + \
"RESOURCE_INDEX".ljust(16) + \
@@ -5824,74 +5807,184 @@ class AMDSMICommands():
self.logger.table_header = current_header + self.logger.table_header.strip()
tabular_output = []
prev_gpu_id = "N/A"
for gpu in args.gpu:
gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu)
tabular_output_dict = {"gpu_id": "N/A",
"profile_index": "N/A",
"memory_partition_caps": "N/A",
"accelerator_type": "N/A",
"partition_id": "0",
"num_partitions": "N/A",
"num_resources": "N/A",
"resource_index": "N/A",
"resource_type": "N/A",
"resource_instances": "N/A",
"resources_shared": "N/A"}
try:
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
profile_type = partition_dict['partition_profile']['profile_type']
profile_index = partition_dict['partition_profile']['profile_index']
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
parition_id = partition_dict['partition_id']
num_resources = partition_dict['partition_profile']['num_resources']
resources = partition_dict['partition_profile']['resources']
partition_id = str(partition_dict['partition_id']).replace("[", "").replace("]", "").replace(" ", "")
current_accelerator_type = partition_dict['partition_profile']['profile_type']
# save only the primary GPU node's partition_id (the 1st listed device; non N/A one)
# else keep current_partition_id unchanged for displaying in accelerator resource's output
if partition_id != "N/A":
current_partition_id = partition_id
if temp_mem_caps.amdsmi_nps_flags_t == None:
mem_caps = temp_mem_caps.nps_cap_mask
mem_caps_list = []
if mem_caps & 1 == 1:
mem_caps_list.append("NPS1")
if mem_caps & 2 == 2:
mem_caps_list.append("NPS2")
if mem_caps & 4 == 4:
mem_caps_list.append("NPS4")
if mem_caps & 8 == 8:
mem_caps_list.append("NPS8")
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
else:
mem_caps = temp_mem_caps.amdsmi_nps_flags_t
mem_caps_list = []
if mem_caps.nps1_cap == 1:
mem_caps_list.append("NPS1")
if mem_caps.nps2_cap == 1:
mem_caps_list.append("NPS2")
if mem_caps.nps4_cap == 1:
mem_caps_list.append("NPS4")
if mem_caps.nps8_cap == 1:
mem_caps_list.append("NPS8")
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
if mem_caps_str == "":
mem_caps_str = "N/A"
except amdsmi_exception.AmdSmiLibraryException as e:
profile_type = "N/A"
profile_index = "N/A"
temp_mem_caps = "N/A"
parition_id = "N/A"
num_resources = "N/A"
resources = "N/A"
partition_id = "0"
mem_caps_str = "N/A"
num_partitions = 0
current_accelerator_type = "N/A"
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
if profile_type == 0:
profile_type = "N/A"
try:
partition_config_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile_config(gpu)
logging.debug("amdsmi_commands.py | partition_config_dict: " + str(json.dumps(partition_config_dict, indent=4)))
num_profiles = partition_config_dict['num_profiles']
num_resource_profiles = partition_config_dict['num_resource_profiles']
tabular_output_dict = {"gpu_id": gpu_id,
resource_index = 0
prev_accelerator_type = "N/A"
for p in range(0, num_profiles):
accelerator_type = partition_config_dict['profiles'][p]['profile_type']
profile_index = partition_config_dict['profiles'][p]['profile_index']
num_partitions = partition_config_dict['profiles'][p]['num_partitions']
mem_caps_str = str(partition_config_dict['profiles'][p]['memory_caps']).replace("]", "").replace("[", "").replace("\'", "").replace(" ", "")
# 2 modifications based on the current accelerator type:
# 1) display a * for the current accelerator type, otherwise display as normal
# 2) display partition id only for the current accelerator profile (the *'d one)
if current_accelerator_type == accelerator_type:
accelerator_type = accelerator_type + "*"
partition_id = current_partition_id
else:
partition_id = "N/A"
# only display the first instance of the gpu_id, rest are empty strings
if prev_gpu_id != gpu_id:
tabular_gpu_id = gpu_id
prev_gpu_id = gpu_id
else:
tabular_gpu_id = ""
logging.debug("amdsmi_commands.py | tabular_gpu_id: " + str(tabular_gpu_id))
if num_resource_profiles == 0:
if prev_accelerator_type != accelerator_type: # only print the first instance of the resources
tabular_output_dict = {"gpu_id": tabular_gpu_id,
"profile_index": profile_index,
"memory_partition_caps": mem_caps_str,
"accelerator_type": profile_type,
"partition_id": parition_id,
"num_partitions": 0,
"num_resources": num_resources,
"resource_index": resources,
"resource_type": resources,
"resource_instances": resources,
"resources_shared": resources}
tabular_output.append(tabular_output_dict)
"accelerator_type": accelerator_type,
"partition_id": partition_id,
"num_partitions": num_partitions,
"num_resources": num_resource_profiles,
"resource_index": "N/A",
"resource_type": "N/A",
"resource_instances": "N/A",
"resources_shared": "N/A"}
prev_accelerator_type = accelerator_type
tabular_output.append(tabular_output_dict)
continue
for r in range(0, num_resource_profiles):
logging.debug("amdsmi_commands.py | p: " + str(p) + "; r: " + str(r)
+ "; accelerator_type: " + str(accelerator_type))
resource_type = partition_config_dict['profiles'][p]['resources'][r]['resource_type']
resource_instances = partition_config_dict['profiles'][p]['resources'][r]['partition_resource']
resources_shared = partition_config_dict['profiles'][p]['resources'][r]['num_partitions_share_resource']
if prev_accelerator_type != accelerator_type: # only print the first instance of the resources
tabular_output_dict = {"gpu_id": tabular_gpu_id,
"profile_index": profile_index,
"memory_partition_caps": mem_caps_str,
"accelerator_type": accelerator_type,
"partition_id": partition_id,
"num_partitions": num_partitions,
"num_resources": num_resource_profiles,
"resource_index": resource_index,
"resource_type": resource_type,
"resource_instances": resource_instances,
"resources_shared": resources_shared}
prev_accelerator_type = accelerator_type
else:
tabular_output_dict = {"gpu_id": "",
"profile_index": "",
"memory_partition_caps": "",
"accelerator_type": "",
"partition_id": "",
"num_partitions": "",
"num_resources": "",
"resource_index": resource_index,
"resource_type": resource_type,
"resource_instances": resource_instances,
"resources_shared": resources_shared}
resource_index += 1
tabular_output.append(tabular_output_dict)
except amdsmi_exception.AmdSmiLibraryException as e:
tabular_output.append(tabular_output_dict)
self.logger.multiple_device_output = tabular_output
self.logger.table_title = "ACCELERATOR_PARTITION_PROFILES"
self.logger.print_output(multiple_device_enabled=True, tabular=True)
self.logger.table_title = "\nACCELERATOR_PARTITION_PROFILES"
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
self.logger.clear_multiple_devices_ouput()
#########################################
# print accelerator partition resources #
#########################################
self.logger.table_header = ''.rjust(7)
current_header = "RESOURCE_INDEX".ljust(16) + \
"RESOURCE_TYPE".ljust(15) + \
"RESOURCE_INSTANCES".ljust(20) + \
"RESOURCES_SHARED".ljust(18)
self.logger.table_header = current_header + self.logger.table_header.strip()
tabular_output = []
for gpu in args.gpu:
gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu)
tabular_output_dict = {"resource_index": "N/A",
"resource_type": "N/A",
"resource_instances": "N/A",
"resources_shared": "N/A"}
try:
partition_config_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile_config(gpu)
logging.debug("amdsmi_commands.py | partition_config_dict: " + str(json.dumps(partition_config_dict, indent=4)))
num_profiles = partition_config_dict['num_profiles']
num_resource_profiles = partition_config_dict['num_resource_profiles']
if num_resource_profiles == 0:
tabular_output.append(tabular_output_dict)
continue
resource_index = 0
for p in range(0, num_profiles):
for r in range(0, num_resource_profiles):
resource_type = partition_config_dict['profiles'][p]['resources'][r]['resource_type']
resource_instances = partition_config_dict['profiles'][p]['resources'][r]['partition_resource']
resources_shared = partition_config_dict['profiles'][p]['resources'][r]['num_partitions_share_resource']
tabular_output_dict = {
"resource_index": resource_index,
"resource_type": resource_type,
"resource_instances": resource_instances,
"resources_shared": resources_shared}
resource_index += 1
tabular_output.append(tabular_output_dict)
except amdsmi_exception.AmdSmiLibraryException as e:
tabular_output.append(tabular_output_dict)
self.logger.multiple_device_output = tabular_output
self.logger.table_title = "\nACCELERATOR_PARTITION_RESOURCES"
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
self.logger.clear_multiple_devices_ouput()
# print legend
legend_parts = [
"\n\nLegend:",
" * = Current mode"]
legend_output = "\n".join(legend_parts)
if self.logger.destination == 'stdout':
print(legend_output)
else:
with self.logger.destination.open('a', encoding="utf-8") as output_file:
output_file.write(legend_output + '\n')
def _event_thread(self, commands, i):
devices = commands.device_handles
+24 -5
Просмотреть файл
@@ -27,6 +27,7 @@ import sys
import time
import re
import multiprocessing
import json
from typing import List, Union
from enum import Enum
@@ -681,12 +682,30 @@ class AMDSMIHelpers():
perf_levels_int = list(set(clock.value for clock in amdsmi_interface.AmdSmiDevPerfLevel))
return perf_levels_str, perf_levels_int
def get_accelerator_partition_profile_config(self):
device_handles = amdsmi_interface.amdsmi_get_processor_handles()
accelerator_partition_profiles = {'profile_indices':[], 'profile_types':[], 'memory_caps': []}
for dev in device_handles:
try:
profile = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile_config(dev)
num_profiles = profile['num_profiles']
for p in range(num_profiles):
accelerator_partition_profiles['profile_indices'].append(str(profile['profiles'][p]['profile_index']))
accelerator_partition_profiles['profile_types'].append(profile['profiles'][p]['profile_type'])
accelerator_partition_profiles['memory_caps'].append(profile['profiles'][p]['memory_caps'])
break # Only need to get the profiles for one device
except amdsmi_interface.AmdSmiLibraryException as e:
break
return accelerator_partition_profiles
def get_compute_partition_types(self):
compute_partitions_str = [partition.name for partition in amdsmi_interface.AmdSmiComputePartitionType]
if 'INVALID' in compute_partitions_str:
compute_partitions_str.remove('INVALID')
return compute_partitions_str
def get_accelerator_choices_types_indices(self):
return_val = ("N/A", {'profile_indices':[], 'profile_types':[]})
accelerator_partition_profiles = self.get_accelerator_partition_profile_config()
if len(accelerator_partition_profiles['profile_types']) != 0:
compute_partitions_str = accelerator_partition_profiles['profile_types'] + accelerator_partition_profiles['profile_indices']
accelerator_choices = ", ".join(compute_partitions_str)
return_val = (accelerator_choices, accelerator_partition_profiles)
return return_val
def get_memory_partition_types(self):
memory_partitions_str = [partition.name for partition in amdsmi_interface.AmdSmiMemoryPartitionType]
+21 -33
Просмотреть файл
@@ -102,14 +102,24 @@ class AMDSMILogger():
return output_dict
def _convert_json_to_tabular(self, json_object: Dict[str, any]):
# TODO make dynamic
def _convert_json_to_tabular(self, json_object: Dict[str, any], dynamic=False):
# TODO make dynamic - convert other python CLI outputs to use (as needed)
# Update: using dynamic=true provides dynamic re-sizing based on key name length
table_values = ''
stored_gpu = ''
stored_timestamp = ''
for key, value in json_object.items():
string_value = str(value)
if key == 'gpu':
if key == 'partition_id':
# Special case for partition_id: 8 partitions + 7 comma + 2 spaces = 17
table_values += string_value.ljust(17)
continue
key_length = len(key) + 2
if dynamic and len(key) > 0:
stored_gpu = string_value
table_values += string_value.ljust(key_length)
elif key == 'gpu':
stored_gpu = string_value
table_values += string_value.rjust(3)
elif key == 'timestamp':
@@ -144,30 +154,6 @@ class AMDSMILogger():
elif key == "link_status":
for i in value:
table_values += str(i).ljust(3)
elif key == "memory":
table_values += string_value.ljust(8)
elif key == "accelerator_type":
table_values += string_value.ljust(18)
elif key == "partition_id":
table_values += string_value.ljust(14)
elif key == "accelerator_profile_index":
table_values += string_value.ljust(27)
elif key == "profile_index":
table_values += string_value.ljust(15)
elif key == "memory_partition_caps":
table_values += string_value.ljust(23)
elif key == "num_partitions":
table_values += string_value.ljust(16)
elif key == "num_resources":
table_values += string_value.ljust(15)
elif key == "resource_index":
table_values += string_value.ljust(16)
elif key == "resource_type":
table_values += string_value.ljust(15)
elif key == "resource_instances":
table_values += string_value.ljust(20)
elif key == "resources_shared":
table_values += string_value.ljust(18)
elif key == "RW":
table_values += string_value.ljust(57)
elif key in ('pviol', 'tviol'):
@@ -494,12 +480,14 @@ class AMDSMILogger():
self.output = {}
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False):
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False, dynamic=False):
""" Print current output acording to format and then destination
params:
multiple_device_enabled (bool) - True if printing output from
multiple devices
watching_output (bool) - True if printing watch output
dynamic (bool) - Defaults to False. True turns on dynamic resizing for
left justified table output
return:
Nothing
"""
@@ -516,7 +504,7 @@ class AMDSMILogger():
elif self.is_human_readable_format():
# If tabular output is enabled, redirect to _print_tabular_output
if tabular:
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output)
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output, dynamic=dynamic)
else:
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
watching_output=watching_output)
@@ -788,7 +776,7 @@ class AMDSMILogger():
output_file.write(human_readable_output + '\n')
def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False):
def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False, dynamic=False):
primary_table = ''
secondary_table = ''
@@ -808,7 +796,7 @@ class AMDSMILogger():
for key, value in device_output.items():
if key != 'process_list':
primary_table_output[key] = value
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
primary_table += self._convert_json_to_tabular(primary_table_output, dynamic=dynamic) + '\n'
else: # Single device output
if 'process_list' in self.output:
process_table_dict = {}
@@ -822,7 +810,7 @@ class AMDSMILogger():
for key, value in self.output.items():
if key != 'process_list':
primary_table_output[key] = value
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
primary_table += self._convert_json_to_tabular(primary_table_output, dynamic=dynamic) + '\n'
primary_table = primary_table.rstrip()
secondary_table = secondary_table.rstrip()
@@ -879,7 +867,7 @@ class AMDSMILogger():
for key, value in device_output.items():
if key != 'process_list':
primary_table_output[key] = value
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
primary_table += self._convert_json_to_tabular(primary_table_output, dynamic=dynamic) + '\n'
primary_table = primary_table.rstrip() # Remove trailing new line
secondary_table = secondary_table.rstrip()
+25 -17
Просмотреть файл
@@ -173,6 +173,14 @@ class AMDSMIParser(argparse.ArgumentParser):
else:
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(string_value, outputformat)
def _is_command_supported(self, user_input, acceptable_values, command_name):
if acceptable_values == "N/A":
raise amdsmi_cli_exceptions.AmdSmiCommandNotSupportedException(command_name, self.helpers.get_output_format())
elif str(user_input).upper() not in acceptable_values:
print(f"Valid inputs are {acceptable_values}")
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(str(user_input).upper(), self.helpers.get_output_format())
else:
return str(user_input).upper()
def _limit_select(self):
"""Custom action for setting clock limits"""
@@ -401,7 +409,7 @@ class AMDSMIParser(argparse.ArgumentParser):
return _CoreSelectAction
def _add_command_modifiers(self, subcommand_parser):
def _add_command_modifiers(self, subcommand_parser: argparse.ArgumentParser):
json_help = "Displays output in JSON format (human readable by default)."
csv_help = "Displays output in CSV format (human readable by default)."
file_help = "Saves output into a file on the provided path (stdout by default)."
@@ -460,7 +468,7 @@ class AMDSMIParser(argparse.ArgumentParser):
return value
def _add_device_arguments(self, subcommand_parser, required=False):
def _add_device_arguments(self, subcommand_parser: argparse.ArgumentParser, required=False):
# Device arguments help text
gpu_help = f"Select a GPU ID, BDF, or UUID from the possible choices:\n{self.gpu_choices_str}"
vf_help = "Gets general information about the specified VF (timeslice, fb info, …).\
@@ -583,7 +591,7 @@ class AMDSMIParser(argparse.ArgumentParser):
return _ValidateOverdrivePercent
def _add_version_parser(self, subparsers, func):
def _add_version_parser(self, subparsers: argparse._SubParsersAction, func):
# Subparser help text
version_help = "Display version information"
@@ -597,7 +605,7 @@ class AMDSMIParser(argparse.ArgumentParser):
self._add_command_modifiers(version_parser)
def _add_list_parser(self, subparsers, func):
def _add_list_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_amdgpu_initialized():
# The list subcommand is only applicable to systems with amdgpu initialized
return
@@ -619,7 +627,7 @@ class AMDSMIParser(argparse.ArgumentParser):
self._add_device_arguments(list_parser, required=False)
def _add_static_parser(self, subparsers, func):
def _add_static_parser(self, subparsers: argparse._SubParsersAction, func):
# Subparser help text
static_help = "Gets static information about the specified GPU"
static_subcommand_help = "If no GPU is specified, returns static information for all GPUs on the system.\
@@ -925,7 +933,7 @@ class AMDSMIParser(argparse.ArgumentParser):
self._add_command_modifiers(metric_parser)
def _add_process_parser(self, subparsers, func):
def _add_process_parser(self, subparsers: argparse._SubParsersAction, func):
if self.helpers.is_hypervisor():
# Don't add this subparser on Hypervisors
# This subparser is only available to Guest and Baremetal systems
@@ -969,7 +977,7 @@ class AMDSMIParser(argparse.ArgumentParser):
process_parser.add_argument('-n', '--name', action='store', type=lambda value: self._is_valid_string(value, '--name'), required=False, help=name_help)
def _add_profile_parser(self, subparsers, func):
def _add_profile_parser(self, subparsers: argparse._SubParsersAction, func):
if not (self.helpers.is_windows() and self.helpers.is_hypervisor()):
# This subparser only applies to Hypervisors
return
@@ -990,7 +998,7 @@ class AMDSMIParser(argparse.ArgumentParser):
self._add_device_arguments(profile_parser, required=False)
def _add_event_parser(self, subparsers, func):
def _add_event_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_amdgpu_initialized():
# The event subcommand is only applicable to systems with amdgpu initialized
return
@@ -1011,7 +1019,7 @@ class AMDSMIParser(argparse.ArgumentParser):
self._add_device_arguments(event_parser, required=False)
def _add_topology_parser(self, subparsers, func):
def _add_topology_parser(self, subparsers: argparse._SubParsersAction, func):
if not(self.helpers.is_baremetal() and self.helpers.is_linux()):
# This subparser is only applicable to Baremetal Linux
return
@@ -1059,7 +1067,7 @@ class AMDSMIParser(argparse.ArgumentParser):
topology_parser.add_argument('-z', '--bi-dir', action='store_true', required=False, help=bi_dir_help)
def _add_set_value_parser(self, subparsers, func):
def _add_set_value_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_linux():
# This subparser is only applicable to Linux
return
@@ -1078,9 +1086,9 @@ class AMDSMIParser(argparse.ArgumentParser):
set_profile_help = f"Set power profile level (#) or choose one of available profiles:\n\t{power_profile_choices_str}"
perf_det_choices_str = ", ".join(self.helpers.get_perf_det_levels())
set_perf_det_help = f"Set performance determinism and select one of the corresponding performance levels:\n\t{perf_det_choices_str}"
compute_partition_choices_str = ", ".join(self.helpers.get_compute_partition_types())
(accelerator_set_choices, _) = self.helpers.get_accelerator_choices_types_indices()
memory_partition_choices_str = ", ".join(self.helpers.get_memory_partition_types())
set_compute_partition_help = f"Set one of the following the compute partition modes:\n\t{compute_partition_choices_str}"
set_compute_partition_help = f"Set one of the following the accelerator type or profile index:\n\t{accelerator_set_choices}.\n\tUse `sudo amd-smi partition --accelerator` to find acceptable values."
set_memory_partition_help = f"Set one of the following the memory partition modes:\n\t{memory_partition_choices_str}"
power_cap_min, power_cap_max = self.helpers.get_power_caps()
power_cap_max = self.helpers.convert_SI_unit(power_cap_max, AMDSMIHelpers.SI_Unit.MICRO)
@@ -1128,7 +1136,7 @@ class AMDSMIParser(argparse.ArgumentParser):
set_value_exclusive_group.add_argument('-l', '--perf-level', action='store', choices=self.helpers.get_perf_levels()[0], type=str.upper, required=False, help=set_perf_level_help, metavar='LEVEL')
set_value_exclusive_group.add_argument('-P', '--profile', action='store', required=False, help=set_profile_help, metavar='SETPROFILE')
set_value_exclusive_group.add_argument('-d', '--perf-determinism', action='store', type=lambda value: self._not_negative_int(value, '--perf-determinism'), required=False, help=set_perf_det_help, metavar='SCLKMAX')
set_value_exclusive_group.add_argument('-C', '--compute-partition', action='store', choices=self.helpers.get_compute_partition_types(), type=str.upper, required=False, help=set_compute_partition_help, metavar='PARTITION')
set_value_exclusive_group.add_argument('-C', '--compute-partition', action='store', choices=accelerator_set_choices, type=lambda value: self._is_command_supported(value, accelerator_set_choices, '--compute-partition'), required=False, help=set_compute_partition_help, metavar='<ACCELERATOR_TYPE> or <PROFILE_INDEX>')
set_value_exclusive_group.add_argument('-M', '--memory-partition', action='store', choices=self.helpers.get_memory_partition_types(), type=str.upper, required=False, help=set_memory_partition_help, metavar='PARTITION')
set_value_exclusive_group.add_argument('-o', '--power-cap', action='store', type=lambda value: self._positive_int(value, '--power-cap'), required=False, help=set_power_cap_help, metavar='WATTS')
set_value_exclusive_group.add_argument('-p', '--soc-pstate', action='store', required=False, type=lambda value: self._not_negative_int(value, '--soc-pstate'), help=set_soc_pstate_help, metavar='POLICY_ID')
@@ -1162,7 +1170,7 @@ class AMDSMIParser(argparse.ArgumentParser):
self._add_command_modifiers(set_value_parser)
def _add_reset_parser(self, subparsers, func):
def _add_reset_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_linux():
# This subparser is only applicable to Linux
return
@@ -1215,7 +1223,7 @@ class AMDSMIParser(argparse.ArgumentParser):
reset_exclusive_group.add_argument('-l', '--clean-local-data', action='store_true', required=False, help=reset_gpu_clean_local_data_help)
def _add_monitor_parser(self, subparsers, func):
def _add_monitor_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_linux():
# This subparser is only applicable to Linux
return
@@ -1314,7 +1322,7 @@ class AMDSMIParser(argparse.ArgumentParser):
rocm_smi_parser.add_argument('-f', '--showclkfrq', action='store_true', required=False, help=showclkfrq_help)
def _add_xgmi_parser(self, subparsers, func):
def _add_xgmi_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_amdgpu_initialized():
# The xgmi subcommand is only applicable to systems with amdgpu initialized
return
@@ -1344,7 +1352,7 @@ class AMDSMIParser(argparse.ArgumentParser):
xgmi_parser.add_argument('-l', '--link-status', action='store_true', required=False, help=xgmi_link_status_help)
def _add_partition_parser(self, subparsers, func):
def _add_partition_parser(self, subparsers: argparse._SubParsersAction, func):
if not self.helpers.is_amdgpu_initialized():
# The partition subcommand is only applicable to systems with amdgpu initialized
return
+189 -52
Просмотреть файл
@@ -70,6 +70,7 @@ typedef enum {
#define AMDSMI_MAX_ACCELERATOR_PROFILE 32
#define AMDSMI_MAX_CP_PROFILE_RESOURCES 32
#define AMDSMI_MAX_ACCELERATOR_PARTITIONS 8
#define AMDSMI_MAX_NUM_NUMA_NODES 32
#define AMDSMI_GPU_UUID_SIZE 38
@@ -259,8 +260,8 @@ typedef enum {
AMDSMI_STATUS_FILE_NOT_FOUND = 52, //!< file or directory not found
AMDSMI_STATUS_ARG_PTR_NULL = 53, //!< Parsed argument is invalid
AMDSMI_STATUS_AMDGPU_RESTART_ERR = 54, //!< AMDGPU restart failed
AMDSMI_STATUS_SETTING_UNAVAILABLE = 55, //!< Setting is not available
AMDSMI_STATUS_CORRUPTED_EEPROM = 56, //!< EEPROM is corrupted
AMDSMI_STATUS_SETTING_UNAVAILABLE = 55, //!< Setting is not available
AMDSMI_STATUS_CORRUPTED_EEPROM = 56, //!< EEPROM is corrupted
// General errors
AMDSMI_STATUS_MAP_ERROR = 0xFFFFFFFE, //!< The internal library error did not map to a status code
@@ -292,19 +293,35 @@ typedef enum {
* various accelerator partitioning settings.
*/
typedef enum {
AMDSMI_ACCELERATOR_PARTITION_INVALID = 0,
AMDSMI_ACCELERATOR_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
//!< together with shared memory
AMDSMI_ACCELERATOR_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
//!< together with shared memory
AMDSMI_ACCELERATOR_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
//!< work together with shared memory
AMDSMI_ACCELERATOR_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
//!< work together with shared memory
AMDSMI_ACCELERATOR_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
//!< shared memory
AMDSMI_ACCELERATOR_PARTITION_INVALID = 0,
AMDSMI_ACCELERATOR_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
//!< together with shared memory
AMDSMI_ACCELERATOR_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
//!< together with shared memory
AMDSMI_ACCELERATOR_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
//!< work together with shared memory
AMDSMI_ACCELERATOR_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
//!< work together with shared memory
AMDSMI_ACCELERATOR_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
//!< shared memory
AMDSMI_ACCELERATOR_PARTITION_MAX
} amdsmi_accelerator_partition_type_t;
/**
* @brief Accelerator Partition Resource Type.
* This enum is used to identify
* various accelerator resource types.
*/
typedef enum {
AMDSMI_ACCELERATOR_XCC,
AMDSMI_ACCELERATOR_ENCODER,
AMDSMI_ACCELERATOR_DECODER,
AMDSMI_ACCELERATOR_DMA,
AMDSMI_ACCELERATOR_JPEG,
AMDSMI_ACCELERATOR_MAX
} amdsmi_accelerator_partition_resource_type_t;
/**
* @brief Compute Partition. This enum is used to identify
* various compute partitioning settings.
@@ -329,19 +346,19 @@ typedef enum {
*/
typedef enum {
AMDSMI_MEMORY_PARTITION_UNKNOWN = 0,
AMDSMI_MEMORY_PARTITION_NPS1, //!< NPS1 - All CCD & XCD data is interleaved
//!< accross all 8 HBM stacks (all stacks/1).
AMDSMI_MEMORY_PARTITION_NPS2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
//!< accross the 4 HBM stacks per AID pair
//!< (8 stacks/2).
AMDSMI_MEMORY_PARTITION_NPS4, //!< NPS4 - Each XCD data is interleaved accross
//!< accross 2 (or single) HBM stacks
//!< (8 stacks/8 or 8 stacks/4).
AMDSMI_MEMORY_PARTITION_NPS8, //!< NPS8 - Each XCD uses a single HBM stack
//!< (8 stacks/8). Or each XCD uses a single
//!< HBM stack & CCDs share 2 non-interleaved
//!< HBM stacks on its AID
//!< (AID[1,2,3] = 6 stacks/6).
AMDSMI_MEMORY_PARTITION_NPS1 = 1, //!< NPS1 - All CCD & XCD data is interleaved
//!< accross all 8 HBM stacks (all stacks/1).
AMDSMI_MEMORY_PARTITION_NPS2 = 2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
//!< accross the 4 HBM stacks per AID pair
//!< (8 stacks/2).
AMDSMI_MEMORY_PARTITION_NPS4 = 4, //!< NPS4 - Each XCD data is interleaved
//!< accross 2 (or single) HBM stacks
//!< (8 stacks/8 or 8 stacks/4).
AMDSMI_MEMORY_PARTITION_NPS8 = 8, //!< NPS8 - Each XCD uses a single HBM stack
//!< (8 stacks/8). Or each XCD uses a single
//!< HBM stack & CCDs share 2 non-interleaved
//!< HBM stacks on its AID
//!< (AID[1,2,3] = 6 stacks/6).
} amdsmi_memory_partition_type_t;
/**
@@ -661,34 +678,77 @@ typedef struct {
} amdsmi_kfd_info_t;
/**
* @brief Possible Memory Partition Modes.
* This union is used to identify various memory partitioning settings.
* @brief Possible Memory Partition Capabilities.
* This union is used to identify various memory partition capabilities.
*/
typedef union {
struct nps_flags_ {
uint32_t nps1_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
uint32_t nps2_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
uint32_t nps4_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
uint32_t nps8_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
uint32_t reserved :28;
} amdsmi_nps_flags_t;
uint32_t nps_cap_mask;
struct nps_flags_ {
uint32_t nps1_cap :1; //!< bool 1 = true; 0 = false
uint32_t nps2_cap :1; //!< bool 1 = true; 0 = false
uint32_t nps4_cap :1; //!< bool 1 = true; 0 = false
uint32_t nps8_cap :1; //!< bool 1 = true; 0 = false
uint32_t reserved :28;
} amdsmi_nps_flags_t;
uint32_t nps_cap_mask;
} amdsmi_nps_caps_t;
/**
* @brief Possible Memory Partition Modes.
* This union is used to identify various memory partitioning settings.
* @brief Memory Partition Configuration.
* This structure is used to identify various memory partition configurations.
*/
typedef struct {
amdsmi_accelerator_partition_type_t profile_type; // SPX, DPX, QPX, CPX and so on
uint32_t num_partitions; // On MI300X, SPX: 1, DPX: 2, QPX: 4, CPX: 8, length of resources array
amdsmi_nps_caps_t memory_caps; // Possible memory partition capabilities
uint32_t profile_index;
uint32_t num_resources; // length of index_of_resources_profile
uint32_t resources[AMDSMI_MAX_ACCELERATOR_PARTITIONS][AMDSMI_MAX_CP_PROFILE_RESOURCES];
uint64_t reserved[13];
amdsmi_nps_caps_t partition_caps;
amdsmi_memory_partition_type_t mp_mode;
uint32_t num_numa_ranges;
struct numa_range_ {
amdsmi_vram_type_t memory_type;
uint64_t start;
uint64_t end;
} numa_range[AMDSMI_MAX_NUM_NUMA_NODES];
uint64_t reserved[11];
} amdsmi_memory_partition_config_t;
/**
* @brief Accelerator Partition Profile.
* This structure is used to identify the current accelerator partition profile.
*/
typedef struct {
amdsmi_accelerator_partition_type_t profile_type; //!< SPX, DPX, QPX, CPX and so on
uint32_t num_partitions; //!< On MI300X: SPX=>1, DPX=>2, QPX=>4, CPX=>8; length of resources
amdsmi_nps_caps_t memory_caps; //!< Possible memory partition capabilities
uint32_t profile_index; //!< Index in the profiles array in amdsmi_accelerator_partition_profile_t
uint32_t num_resources; //!< length of index_of_resources_profile
uint32_t resources[AMDSMI_MAX_ACCELERATOR_PARTITIONS][AMDSMI_MAX_CP_PROFILE_RESOURCES];
uint64_t reserved[13];
} amdsmi_accelerator_partition_profile_t;
/**
* @brief Accelerator Partition Resources.
* This struct is used to identify various partition resource profiles.
*/
typedef struct {
uint32_t profile_index;
amdsmi_accelerator_partition_resource_type_t resource_type;
uint32_t partition_resource; //!< Resources a partition can use, which may be shared
uint32_t num_partitions_share_resource; //!< If it is greater than 1, then resource is shared.
uint64_t reserved[6];
} amdsmi_accelerator_partition_resource_profile_t;
/**
* @brief Accelerator Partition Profile Configurations.
* This struct is used to identify various partition profiles.
*/
typedef struct {
uint32_t num_profiles; //!< The length of profiles array
uint32_t num_resource_profiles;
amdsmi_accelerator_partition_resource_profile_t resource_profiles[AMDSMI_MAX_CP_PROFILE_RESOURCES];
uint32_t default_profile_index; //!< The index of the default profile in the profiles array
amdsmi_accelerator_partition_profile_t profiles[AMDSMI_MAX_ACCELERATOR_PROFILE];
uint64_t reserved[30];
} amdsmi_accelerator_partition_profile_config_t;
typedef enum {
AMDSMI_LINK_TYPE_INTERNAL,
AMDSMI_LINK_TYPE_XGMI,
@@ -4583,26 +4643,103 @@ amdsmi_get_gpu_memory_partition(amdsmi_processor_handle processor_handle, char *
*
*/
amdsmi_status_t
amdsmi_set_gpu_memory_partition(amdsmi_processor_handle processor_handle, amdsmi_memory_partition_type_t memory_partition);
amdsmi_set_gpu_memory_partition(amdsmi_processor_handle processor_handle,
amdsmi_memory_partition_type_t memory_partition);
/**
* @brief Version 2.0: Returns current gpu memory partition capabilities
*
* @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf}
*
* @param[in] processor_handle a processor handle
*
* @param[out] config reference to the accelerator partition profile.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_get_gpu_memory_partition_config(amdsmi_processor_handle processor_handle,
amdsmi_memory_partition_config_t *config);
/**
* @brief Version 2.0: Set accelerator partition setting based on profile_index from amdsmi_get_gpu_accelerator_partition_profile_config
*
* @platform{gpu_bm_linux} @platform{host}
*
* @param[in] processor_handle a processor handle
*
* @param[in] mode Enum representing memory partition to set to
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_set_gpu_memory_partition_mode(amdsmi_processor_handle processor_handle,
amdsmi_memory_partition_type_t mode);
/** @} */ // end of memory_partition
/*****************************************************************************/
/** @defgroup accelerator_partition_profile Accelerator Partition Profile Functions
/** @defgroup accelerator_partition Accelerator Partition Profile Functions
* These functions are used to configure and query the device's
* accelerator parition profile setting.
* @{
*/
// TODO: declare rest of partition profile functions and complete doc commentary.
/*
Get the current accelerator partition profile. The function will return current profile.
*/
/**
* @brief Version 2.0: Returns gpu accelerator partition caps as currently configured in the system
* User must use admin/sudo privledges to run this API, or API will not be able to
* read resources. Otherwise, API will fill in the structure with as much information as
* it can.
*
* @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf}
*
* @param[in] processor_handle Device which to query
*
* @param[out] profile_config reference to the accelerator partition config.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_get_gpu_accelerator_partition_profile_config(amdsmi_processor_handle processor_handle,
amdsmi_accelerator_partition_profile_config_t *profile_config);
/**
* @brief Version 2.0: Returns current gpu accelerator partition capabilities
*
* @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf}
*
* @param[in] processor_handle Device which to query
*
* @param[out] profile reference to the accelerator partition profile.
* Must be allocated by user.
*
* @param[inout] partition_id array of ids for current accelerator profile.
* Must be allocated by user.
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_get_gpu_accelerator_partition_profile(amdsmi_processor_handle processor_handle,
amdsmi_accelerator_partition_profile_t *profile,
uint32_t *partition_id);
/** @} */ // end of accelerator_partition_profile
/**
* @brief Version 2.0: Set accelerator partition setting based on profile_index
* from amdsmi_get_gpu_accelerator_partition_profile_config
*
* @platform{gpu_bm_linux} @platform{host}
*
* @param[in] processor_handle Device which to query
*
* @param[in] profile_index Represents index of a partition user wants to set
*
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
*/
amdsmi_status_t
amdsmi_set_gpu_accelerator_partition_profile(amdsmi_processor_handle processor_handle,
uint32_t profile_index);
/** @} End accelerator_partition */
/*****************************************************************************/
/** @defgroup EvntNotif Event Notification Functions
+4
Просмотреть файл
@@ -224,6 +224,10 @@ from .amdsmi_interface import amdsmi_set_gpu_compute_partition
from .amdsmi_interface import amdsmi_get_gpu_memory_partition
from .amdsmi_interface import amdsmi_set_gpu_memory_partition
from .amdsmi_interface import amdsmi_get_gpu_accelerator_partition_profile
from .amdsmi_interface import amdsmi_get_gpu_accelerator_partition_profile_config
from .amdsmi_interface import amdsmi_get_gpu_memory_partition_config
from .amdsmi_interface import amdsmi_set_gpu_accelerator_partition_profile
from .amdsmi_interface import amdsmi_set_gpu_memory_partition_mode
# # Individual GPU Metrics Functions
from .amdsmi_interface import amdsmi_get_gpu_metrics_header_info
+2
Просмотреть файл
@@ -87,6 +87,8 @@ class AmdSmiLibraryException(AmdSmiException):
amdsmi_wrapper.AMDSMI_STATUS_ARG_PTR_NULL : "AMDSMI_STATUS_ARG_PTR_NULL - Parsed argument is invalid",
amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR : "AMDSMI_STATUS_MAP_ERROR - The internal library error did not map to a status code",
amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR: "AMDSMI_STATUS_AMDGPU_RESTART_ERR - AMDGPU restart failed, please check dmsg for errors",
amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE: "AMDSMI_STATUS_SETTING_UNAVAILABLE - Setting is not available",
amdsmi_wrapper.AMDSMI_STATUS_CORRUPTED_EEPROM: "AMDSMI_STATUS_CORRUPTED_EEPROM - Setting is not available",
amdsmi_wrapper.AMDSMI_STATUS_UNKNOWN_ERROR : "AMDSMI_STATUS_UNKNOWN_ERROR - An unknown error occurred"
}
+192 -9
Просмотреть файл
@@ -19,6 +19,8 @@
import ctypes
import re
import json
import logging
from typing import Union, Any, Dict, List
from enum import IntEnum
from collections.abc import Iterable
@@ -288,13 +290,30 @@ class AmdSmiVoltageType(IntEnum):
VDDGFX = amdsmi_wrapper.AMDSMI_VOLT_TYPE_VDDGFX
INVALID = amdsmi_wrapper.AMDSMI_VOLT_TYPE_INVALID
class AmdSmiAcceleratorPartitionResourceType(IntEnum):
XCC = amdsmi_wrapper.AMDSMI_ACCELERATOR_XCC
ENCODER = amdsmi_wrapper.AMDSMI_ACCELERATOR_ENCODER
DECODER = amdsmi_wrapper.AMDSMI_ACCELERATOR_DECODER
DMA = amdsmi_wrapper.AMDSMI_ACCELERATOR_DMA
JPEG = amdsmi_wrapper.AMDSMI_ACCELERATOR_JPEG
MAX = amdsmi_wrapper.AMDSMI_ACCELERATOR_MAX
class AmdSmiAcceleratorPartitionType(IntEnum):
SPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_SPX
DPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_DPX
TPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_TPX
QPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_QPX
CPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_CPX
INVALID = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_INVALID
class AmdSmiComputePartitionType(IntEnum):
CPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_CPX
SPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_SPX
DPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_DPX
TPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_TPX
QPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_QPX
CPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_CPX
INVALID = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_INVALID
@@ -2729,6 +2748,7 @@ def amdsmi_get_gpu_compute_partition(processor_handle: amdsmi_wrapper.amdsmi_pro
def amdsmi_set_gpu_compute_partition(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
compute_partition: AmdSmiComputePartitionType):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
@@ -2743,6 +2763,21 @@ def amdsmi_set_gpu_compute_partition(processor_handle: amdsmi_wrapper.amdsmi_pro
)
)
def amdsmi_set_gpu_accelerator_partition_profile(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
profile_index: int):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(profile_index, int):
raise AmdSmiParameterException(profile_index, int)
_check_res(
amdsmi_wrapper.amdsmi_set_gpu_accelerator_partition_profile(
processor_handle, profile_index
)
)
def amdsmi_get_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_processor_handle):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
@@ -2763,6 +2798,39 @@ def amdsmi_get_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_proc
return memory_partition.value.decode("utf-8")
def amdsmi_get_gpu_memory_partition_config(processor_handle: amdsmi_wrapper.amdsmi_processor_handle):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
config = amdsmi_wrapper.amdsmi_memory_partition_config_t()
_check_res(
amdsmi_wrapper.amdsmi_get_gpu_memory_partition_config(
processor_handle, config
)
)
mem_caps_list = []
if config.partition_caps.amdsmi_nps_flags_t.nps1_cap == 1:
mem_caps_list.append("NPS1")
if config.partition_caps.amdsmi_nps_flags_t.nps2_cap == 1:
mem_caps_list.append("NPS2")
if config.partition_caps.amdsmi_nps_flags_t.nps4_cap == 1:
mem_caps_list.append("NPS4")
if config.partition_caps.amdsmi_nps_flags_t.nps8_cap == 1:
mem_caps_list.append("NPS8")
return_dict = {
"partition_caps": mem_caps_list,
"mp_mode": amdsmi_wrapper.amdsmi_memory_partition_type_t__enumvalues[
config.mp_mode].replace("AMDSMI_MEMORY_PARTITION_", "").replace("UNKNOWN", "N/A"),
"num_numa_ranges": "N/A",
"numa_range": "N/A",
}
logging.debug("amdsmi_interface.py | amdsmi_get_gpu_memory_partition_config | return_dictionary = \n" + str(json.dumps(return_dict, indent=4)))
return return_dict
def amdsmi_set_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
memory_partition: AmdSmiMemoryPartitionType):
@@ -2780,6 +2848,21 @@ def amdsmi_set_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_proc
)
)
def amdsmi_set_gpu_memory_partition_mode(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
memory_partition: AmdSmiMemoryPartitionType):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
if not isinstance(memory_partition, AmdSmiMemoryPartitionType):
raise AmdSmiParameterException(memory_partition, AmdSmiMemoryPartitionType)
_check_res(
amdsmi_wrapper.amdsmi_set_gpu_memory_partition(
processor_handle, memory_partition
)
)
def amdsmi_get_gpu_accelerator_partition_profile(
processor_handle: amdsmi_wrapper.amdsmi_processor_handle
@@ -2788,29 +2871,129 @@ def amdsmi_get_gpu_accelerator_partition_profile(
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
partition_id = ctypes.c_uint32()
length = 8
partition_id = [0, 0, 0, 0, 0, 0, 0, 0]
partition_id_list = (ctypes.c_uint32 * length)(*partition_id)
profile = amdsmi_wrapper.amdsmi_accelerator_partition_profile_t()
_check_res(
amdsmi_wrapper.amdsmi_get_gpu_accelerator_partition_profile(processor_handle,
ctypes.byref(profile),
ctypes.byref(partition_id))
ctypes.byref(profile), partition_id_list)
)
profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[profile.profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "")
profile_type_ret = profile_type_ret.replace("INVALID", "N/A")
length = profile.num_partitions
partition_ids = []
for i in range(profile.num_partitions):
partition_ids.append(partition_id_list[i])
last_element = 0
if length > 0:
last_element = length - 1
if ((partition_ids[last_element] == 0)
and not((profile_type_ret == str("SPX")) or (profile_type_ret == str("N/A")))):
partition_ids = "N/A"
mem_caps_list = []
if profile.memory_caps.amdsmi_nps_flags_t.nps1_cap == 1:
mem_caps_list.append("NPS1")
if profile.memory_caps.amdsmi_nps_flags_t.nps2_cap == 1:
mem_caps_list.append("NPS2")
if profile.memory_caps.amdsmi_nps_flags_t.nps4_cap == 1:
mem_caps_list.append("NPS4")
if profile.memory_caps.amdsmi_nps_flags_t.nps8_cap == 1:
mem_caps_list.append("NPS8")
partition_profile_dict = {
"profile_type" : profile.profile_type,
"profile_type" : profile_type_ret,
"num_partitions" : profile.num_partitions,
"profile_index" : profile.profile_index,
"memory_caps" : profile.memory_caps,
"memory_caps": mem_caps_list,
"num_resources" : profile.num_resources,
"resources" : "N/A"
}
return {
"partition_id" : partition_id.value,
return_dictionary = {
"partition_id" : partition_ids,
"partition_profile" : partition_profile_dict
}
logging.debug("amdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile | return_dictionary = \n" + str(json.dumps(return_dictionary, indent=4)))
return return_dictionary
def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: amdsmi_wrapper.amdsmi_processor_handle) -> Dict:
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
raise AmdSmiParameterException(
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
)
config = amdsmi_wrapper.amdsmi_accelerator_partition_profile_config_t()
_check_res(amdsmi_wrapper.amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle,
ctypes.byref(config)))
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | START - "
+ "config.num_profiles = " + str(config.num_profiles)
+ "\n; config.num_resource_profiles = " + str(config.num_resource_profiles)
+ "\n; config.resource_profiles = " + str(config.resource_profiles)
+ "\n; config.default_profile_index = " + str(config.default_profile_index)
+ "\n; config.profiles = " + str(config.profiles))
profiles = []
resource_idx = 0
for i in range(config.num_profiles):
profile = config.profiles[i]
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | profile = " + str(profile))
profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[
config.profiles[i].profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "")
profile_type_ret = profile_type_ret.replace("INVALID", "N/A")
resources = []
mem_caps_list = []
if profile.memory_caps.amdsmi_nps_flags_t.nps1_cap == 1:
mem_caps_list.append("NPS1")
if profile.memory_caps.amdsmi_nps_flags_t.nps2_cap == 1:
mem_caps_list.append("NPS2")
if profile.memory_caps.amdsmi_nps_flags_t.nps4_cap == 1:
mem_caps_list.append("NPS4")
if profile.memory_caps.amdsmi_nps_flags_t.nps8_cap == 1:
mem_caps_list.append("NPS8")
for r in range(config.num_resource_profiles):
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | i = " + str(i) + "; r = " + str(r) + "; resource_idx = " + str(resource_idx))
res_profile = config.resource_profiles[resource_idx]
resource_profiles_ret = amdsmi_wrapper.amdsmi_accelerator_partition_resource_type_t__enumvalues[
res_profile.resource_type].replace("AMDSMI_ACCELERATOR_", "")
resource_profile_dict = {
"profile_index": res_profile.profile_index,
"resource_type": resource_profiles_ret,
"partition_resource": res_profile.partition_resource,
"num_partitions_share_resource": res_profile.num_partitions_share_resource,
}
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | resource_profile_dict = " + str(resource_profile_dict))
resources.append(resource_profile_dict)
resource_idx += 1
profile_dict = {
"profile_type": profile_type_ret,
"num_partitions": profile.num_partitions,
"profile_index": profile.profile_index,
"memory_caps": mem_caps_list,
"num_resources": profile.num_resources,
"resources": resources
}
profiles.append(profile_dict)
config_dict = {
"num_profiles": config.num_profiles,
"num_resource_profiles": config.num_resource_profiles,
"resource_profiles": resources,
"default_profile_index": config.default_profile_index,
"profiles": profiles,
}
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | END - config_dict = \n" + str(json.dumps(config_dict, indent=4)))
return config_dict
def amdsmi_get_xgmi_info(processor_handle: amdsmi_wrapper.amdsmi_processor_handle):
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
+124 -15
Просмотреть файл
@@ -285,6 +285,7 @@ amdsmi_status_t__enumvalues = {
53: 'AMDSMI_STATUS_ARG_PTR_NULL',
54: 'AMDSMI_STATUS_AMDGPU_RESTART_ERR',
55: 'AMDSMI_STATUS_SETTING_UNAVAILABLE',
56: 'AMDSMI_STATUS_CORRUPTED_EEPROM',
4294967294: 'AMDSMI_STATUS_MAP_ERROR',
4294967295: 'AMDSMI_STATUS_UNKNOWN_ERROR',
}
@@ -329,6 +330,7 @@ AMDSMI_STATUS_FILE_NOT_FOUND = 52
AMDSMI_STATUS_ARG_PTR_NULL = 53
AMDSMI_STATUS_AMDGPU_RESTART_ERR = 54
AMDSMI_STATUS_SETTING_UNAVAILABLE = 55
AMDSMI_STATUS_CORRUPTED_EEPROM = 56
AMDSMI_STATUS_MAP_ERROR = 4294967294
AMDSMI_STATUS_UNKNOWN_ERROR = 4294967295
amdsmi_status_t = ctypes.c_uint32 # enum
@@ -372,6 +374,7 @@ amdsmi_accelerator_partition_type_t__enumvalues = {
3: 'AMDSMI_ACCELERATOR_PARTITION_TPX',
4: 'AMDSMI_ACCELERATOR_PARTITION_QPX',
5: 'AMDSMI_ACCELERATOR_PARTITION_CPX',
6: 'AMDSMI_ACCELERATOR_PARTITION_MAX',
}
AMDSMI_ACCELERATOR_PARTITION_INVALID = 0
AMDSMI_ACCELERATOR_PARTITION_SPX = 1
@@ -379,8 +382,26 @@ AMDSMI_ACCELERATOR_PARTITION_DPX = 2
AMDSMI_ACCELERATOR_PARTITION_TPX = 3
AMDSMI_ACCELERATOR_PARTITION_QPX = 4
AMDSMI_ACCELERATOR_PARTITION_CPX = 5
AMDSMI_ACCELERATOR_PARTITION_MAX = 6
amdsmi_accelerator_partition_type_t = ctypes.c_uint32 # enum
# values for enumeration 'amdsmi_accelerator_partition_resource_type_t'
amdsmi_accelerator_partition_resource_type_t__enumvalues = {
0: 'AMDSMI_ACCELERATOR_XCC',
1: 'AMDSMI_ACCELERATOR_ENCODER',
2: 'AMDSMI_ACCELERATOR_DECODER',
3: 'AMDSMI_ACCELERATOR_DMA',
4: 'AMDSMI_ACCELERATOR_JPEG',
5: 'AMDSMI_ACCELERATOR_MAX',
}
AMDSMI_ACCELERATOR_XCC = 0
AMDSMI_ACCELERATOR_ENCODER = 1
AMDSMI_ACCELERATOR_DECODER = 2
AMDSMI_ACCELERATOR_DMA = 3
AMDSMI_ACCELERATOR_JPEG = 4
AMDSMI_ACCELERATOR_MAX = 5
amdsmi_accelerator_partition_resource_type_t = ctypes.c_uint32 # enum
# values for enumeration 'amdsmi_compute_partition_type_t'
amdsmi_compute_partition_type_t__enumvalues = {
0: 'AMDSMI_COMPUTE_PARTITION_INVALID',
@@ -403,14 +424,14 @@ amdsmi_memory_partition_type_t__enumvalues = {
0: 'AMDSMI_MEMORY_PARTITION_UNKNOWN',
1: 'AMDSMI_MEMORY_PARTITION_NPS1',
2: 'AMDSMI_MEMORY_PARTITION_NPS2',
3: 'AMDSMI_MEMORY_PARTITION_NPS4',
4: 'AMDSMI_MEMORY_PARTITION_NPS8',
4: 'AMDSMI_MEMORY_PARTITION_NPS4',
8: 'AMDSMI_MEMORY_PARTITION_NPS8',
}
AMDSMI_MEMORY_PARTITION_UNKNOWN = 0
AMDSMI_MEMORY_PARTITION_NPS1 = 1
AMDSMI_MEMORY_PARTITION_NPS2 = 2
AMDSMI_MEMORY_PARTITION_NPS4 = 3
AMDSMI_MEMORY_PARTITION_NPS8 = 4
AMDSMI_MEMORY_PARTITION_NPS4 = 4
AMDSMI_MEMORY_PARTITION_NPS8 = 8
amdsmi_memory_partition_type_t = ctypes.c_uint32 # enum
# values for enumeration 'amdsmi_temperature_type_t'
@@ -979,6 +1000,31 @@ union_amdsmi_nps_caps_t._fields_ = [
]
amdsmi_nps_caps_t = union_amdsmi_nps_caps_t
class struct_amdsmi_memory_partition_config_t(Structure):
pass
class struct_numa_range_(Structure):
pass
struct_numa_range_._pack_ = 1 # source:False
struct_numa_range_._fields_ = [
('memory_type', amdsmi_vram_type_t),
('PADDING_0', ctypes.c_ubyte * 4),
('start', ctypes.c_uint64),
('end', ctypes.c_uint64),
]
struct_amdsmi_memory_partition_config_t._pack_ = 1 # source:False
struct_amdsmi_memory_partition_config_t._fields_ = [
('partition_caps', amdsmi_nps_caps_t),
('mp_mode', amdsmi_memory_partition_type_t),
('num_numa_ranges', ctypes.c_uint32),
('PADDING_0', ctypes.c_ubyte * 4),
('numa_range', struct_numa_range_ * 32),
('reserved', ctypes.c_uint64 * 11),
]
amdsmi_memory_partition_config_t = struct_amdsmi_memory_partition_config_t
class struct_amdsmi_accelerator_partition_profile_t(Structure):
pass
@@ -995,6 +1041,34 @@ struct_amdsmi_accelerator_partition_profile_t._fields_ = [
]
amdsmi_accelerator_partition_profile_t = struct_amdsmi_accelerator_partition_profile_t
class struct_amdsmi_accelerator_partition_resource_profile_t(Structure):
pass
struct_amdsmi_accelerator_partition_resource_profile_t._pack_ = 1 # source:False
struct_amdsmi_accelerator_partition_resource_profile_t._fields_ = [
('profile_index', ctypes.c_uint32),
('resource_type', amdsmi_accelerator_partition_resource_type_t),
('partition_resource', ctypes.c_uint32),
('num_partitions_share_resource', ctypes.c_uint32),
('reserved', ctypes.c_uint64 * 6),
]
amdsmi_accelerator_partition_resource_profile_t = struct_amdsmi_accelerator_partition_resource_profile_t
class struct_amdsmi_accelerator_partition_profile_config_t(Structure):
pass
struct_amdsmi_accelerator_partition_profile_config_t._pack_ = 1 # source:False
struct_amdsmi_accelerator_partition_profile_config_t._fields_ = [
('num_profiles', ctypes.c_uint32),
('num_resource_profiles', ctypes.c_uint32),
('resource_profiles', struct_amdsmi_accelerator_partition_resource_profile_t * 32),
('default_profile_index', ctypes.c_uint32),
('PADDING_0', ctypes.c_ubyte * 4),
('profiles', struct_amdsmi_accelerator_partition_profile_t * 32),
('reserved', ctypes.c_uint64 * 30),
]
amdsmi_accelerator_partition_profile_config_t = struct_amdsmi_accelerator_partition_profile_config_t
# values for enumeration 'amdsmi_link_type_t'
amdsmi_link_type_t__enumvalues = {
@@ -2181,6 +2255,12 @@ amdsmi_get_gpu_memory_usage.argtypes = [amdsmi_processor_handle, amdsmi_memory_t
amdsmi_get_gpu_bad_page_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_bad_page_info
amdsmi_get_gpu_bad_page_info.restype = amdsmi_status_t
amdsmi_get_gpu_bad_page_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(struct_amdsmi_retired_page_record_t)]
amdsmi_get_gpu_bad_page_threshold = _libraries['libamd_smi.so'].amdsmi_get_gpu_bad_page_threshold
amdsmi_get_gpu_bad_page_threshold.restype = amdsmi_status_t
amdsmi_get_gpu_bad_page_threshold.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)]
amdsmi_gpu_validate_ras_eeprom = _libraries['libamd_smi.so'].amdsmi_gpu_validate_ras_eeprom
amdsmi_gpu_validate_ras_eeprom.restype = amdsmi_status_t
amdsmi_gpu_validate_ras_eeprom.argtypes = [amdsmi_processor_handle]
amdsmi_get_gpu_ras_feature_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_ras_feature_info
amdsmi_get_gpu_ras_feature_info.restype = amdsmi_status_t
amdsmi_get_gpu_ras_feature_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_ras_feature_t)]
@@ -2382,9 +2462,21 @@ amdsmi_get_gpu_memory_partition.argtypes = [amdsmi_processor_handle, ctypes.POIN
amdsmi_set_gpu_memory_partition = _libraries['libamd_smi.so'].amdsmi_set_gpu_memory_partition
amdsmi_set_gpu_memory_partition.restype = amdsmi_status_t
amdsmi_set_gpu_memory_partition.argtypes = [amdsmi_processor_handle, amdsmi_memory_partition_type_t]
amdsmi_get_gpu_memory_partition_config = _libraries['libamd_smi.so'].amdsmi_get_gpu_memory_partition_config
amdsmi_get_gpu_memory_partition_config.restype = amdsmi_status_t
amdsmi_get_gpu_memory_partition_config.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_memory_partition_config_t)]
amdsmi_set_gpu_memory_partition_mode = _libraries['libamd_smi.so'].amdsmi_set_gpu_memory_partition_mode
amdsmi_set_gpu_memory_partition_mode.restype = amdsmi_status_t
amdsmi_set_gpu_memory_partition_mode.argtypes = [amdsmi_processor_handle, amdsmi_memory_partition_type_t]
amdsmi_get_gpu_accelerator_partition_profile_config = _libraries['libamd_smi.so'].amdsmi_get_gpu_accelerator_partition_profile_config
amdsmi_get_gpu_accelerator_partition_profile_config.restype = amdsmi_status_t
amdsmi_get_gpu_accelerator_partition_profile_config.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_accelerator_partition_profile_config_t)]
amdsmi_get_gpu_accelerator_partition_profile = _libraries['libamd_smi.so'].amdsmi_get_gpu_accelerator_partition_profile
amdsmi_get_gpu_accelerator_partition_profile.restype = amdsmi_status_t
amdsmi_get_gpu_accelerator_partition_profile.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_accelerator_partition_profile_t), ctypes.POINTER(ctypes.c_uint32)]
amdsmi_set_gpu_accelerator_partition_profile = _libraries['libamd_smi.so'].amdsmi_set_gpu_accelerator_partition_profile
amdsmi_set_gpu_accelerator_partition_profile.restype = amdsmi_status_t
amdsmi_set_gpu_accelerator_partition_profile.argtypes = [amdsmi_processor_handle, uint32_t]
amdsmi_init_gpu_event_notification = _libraries['libamd_smi.so'].amdsmi_init_gpu_event_notification
amdsmi_init_gpu_event_notification.restype = amdsmi_status_t
amdsmi_init_gpu_event_notification.argtypes = [amdsmi_processor_handle]
@@ -2591,13 +2683,17 @@ amdsmi_get_esmi_err_msg = _libraries['libamd_smi.so'].amdsmi_get_esmi_err_msg
amdsmi_get_esmi_err_msg.restype = amdsmi_status_t
amdsmi_get_esmi_err_msg.argtypes = [amdsmi_status_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
__all__ = \
['AGG_BW0', 'AMDSMI_ACCELERATOR_PARTITION_CPX',
['AGG_BW0', 'AMDSMI_ACCELERATOR_DECODER',
'AMDSMI_ACCELERATOR_DMA', 'AMDSMI_ACCELERATOR_ENCODER',
'AMDSMI_ACCELERATOR_JPEG', 'AMDSMI_ACCELERATOR_MAX',
'AMDSMI_ACCELERATOR_PARTITION_CPX',
'AMDSMI_ACCELERATOR_PARTITION_DPX',
'AMDSMI_ACCELERATOR_PARTITION_INVALID',
'AMDSMI_ACCELERATOR_PARTITION_MAX',
'AMDSMI_ACCELERATOR_PARTITION_QPX',
'AMDSMI_ACCELERATOR_PARTITION_SPX',
'AMDSMI_ACCELERATOR_PARTITION_TPX', 'AMDSMI_AVERAGE_POWER',
'AMDSMI_CACHE_PROPERTY_CPU_CACHE',
'AMDSMI_ACCELERATOR_PARTITION_TPX', 'AMDSMI_ACCELERATOR_XCC',
'AMDSMI_AVERAGE_POWER', 'AMDSMI_CACHE_PROPERTY_CPU_CACHE',
'AMDSMI_CACHE_PROPERTY_DATA_CACHE',
'AMDSMI_CACHE_PROPERTY_ENABLED',
'AMDSMI_CACHE_PROPERTY_INST_CACHE',
@@ -2737,6 +2833,7 @@ __all__ = \
'AMDSMI_REG_XGMI', 'AMDSMI_STATUS_ADDRESS_FAULT',
'AMDSMI_STATUS_AMDGPU_RESTART_ERR', 'AMDSMI_STATUS_API_FAILED',
'AMDSMI_STATUS_ARG_PTR_NULL', 'AMDSMI_STATUS_BUSY',
'AMDSMI_STATUS_CORRUPTED_EEPROM',
'AMDSMI_STATUS_DRIVER_NOT_LOADED', 'AMDSMI_STATUS_DRM_ERROR',
'AMDSMI_STATUS_FAIL_LOAD_MODULE',
'AMDSMI_STATUS_FAIL_LOAD_SYMBOL', 'AMDSMI_STATUS_FILE_ERROR',
@@ -2801,7 +2898,10 @@ __all__ = \
'AMDSMI_XGMI_STATUS_ERROR', 'AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS',
'AMDSMI_XGMI_STATUS_NO_ERRORS', 'CLK_LIMIT_MAX', 'CLK_LIMIT_MIN',
'RD_BW0', 'WR_BW0', 'amd_metrics_table_header_t',
'amdsmi_accelerator_partition_profile_config_t',
'amdsmi_accelerator_partition_profile_t',
'amdsmi_accelerator_partition_resource_profile_t',
'amdsmi_accelerator_partition_resource_type_t',
'amdsmi_accelerator_partition_type_t', 'amdsmi_asic_info_t',
'amdsmi_bdf_t', 'amdsmi_bit_field_t', 'amdsmi_board_info_t',
'amdsmi_cache_property_type_t', 'amdsmi_card_form_factor_t',
@@ -2849,9 +2949,11 @@ __all__ = \
'amdsmi_get_energy_count', 'amdsmi_get_esmi_err_msg',
'amdsmi_get_fw_info',
'amdsmi_get_gpu_accelerator_partition_profile',
'amdsmi_get_gpu_accelerator_partition_profile_config',
'amdsmi_get_gpu_activity', 'amdsmi_get_gpu_asic_info',
'amdsmi_get_gpu_available_counters',
'amdsmi_get_gpu_bad_page_info', 'amdsmi_get_gpu_bdf_id',
'amdsmi_get_gpu_bad_page_info',
'amdsmi_get_gpu_bad_page_threshold', 'amdsmi_get_gpu_bdf_id',
'amdsmi_get_gpu_board_info', 'amdsmi_get_gpu_cache_info',
'amdsmi_get_gpu_compute_partition',
'amdsmi_get_gpu_compute_process_gpus',
@@ -2865,6 +2967,7 @@ __all__ = \
'amdsmi_get_gpu_id', 'amdsmi_get_gpu_kfd_info',
'amdsmi_get_gpu_mem_overdrive_level',
'amdsmi_get_gpu_memory_partition',
'amdsmi_get_gpu_memory_partition_config',
'amdsmi_get_gpu_memory_reserved_pages',
'amdsmi_get_gpu_memory_total', 'amdsmi_get_gpu_memory_usage',
'amdsmi_get_gpu_metrics_header_info',
@@ -2905,15 +3008,16 @@ __all__ = \
'amdsmi_gpu_cache_info_t', 'amdsmi_gpu_control_counter',
'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter',
'amdsmi_gpu_destroy_counter', 'amdsmi_gpu_metrics_t',
'amdsmi_gpu_read_counter', 'amdsmi_gpu_xcp_metrics_t',
'amdsmi_gpu_xgmi_error_status', 'amdsmi_hsmp_freqlimit_src_names',
'amdsmi_hsmp_metrics_table_t', 'amdsmi_init',
'amdsmi_init_flags_t', 'amdsmi_init_gpu_event_notification',
'amdsmi_io_bw_encoding_t', 'amdsmi_io_link_type_t',
'amdsmi_is_P2P_accessible',
'amdsmi_gpu_read_counter', 'amdsmi_gpu_validate_ras_eeprom',
'amdsmi_gpu_xcp_metrics_t', 'amdsmi_gpu_xgmi_error_status',
'amdsmi_hsmp_freqlimit_src_names', 'amdsmi_hsmp_metrics_table_t',
'amdsmi_init', 'amdsmi_init_flags_t',
'amdsmi_init_gpu_event_notification', 'amdsmi_io_bw_encoding_t',
'amdsmi_io_link_type_t', 'amdsmi_is_P2P_accessible',
'amdsmi_is_gpu_power_management_enabled', 'amdsmi_kfd_info_t',
'amdsmi_link_id_bw_type_t', 'amdsmi_link_metrics_t',
'amdsmi_link_type_t', 'amdsmi_memory_page_status_t',
'amdsmi_memory_partition_config_t',
'amdsmi_memory_partition_type_t', 'amdsmi_memory_type_t',
'amdsmi_mm_ip_t', 'amdsmi_name_value_t', 'amdsmi_nps_caps_t',
'amdsmi_od_vddc_point_t', 'amdsmi_od_volt_curve_t',
@@ -2936,10 +3040,12 @@ __all__ = \
'amdsmi_set_cpu_socket_boostlimit',
'amdsmi_set_cpu_socket_lclk_dpm_level',
'amdsmi_set_cpu_socket_power_cap', 'amdsmi_set_cpu_xgmi_width',
'amdsmi_set_gpu_accelerator_partition_profile',
'amdsmi_set_gpu_clk_limit', 'amdsmi_set_gpu_clk_range',
'amdsmi_set_gpu_compute_partition',
'amdsmi_set_gpu_event_notification_mask',
'amdsmi_set_gpu_fan_speed', 'amdsmi_set_gpu_memory_partition',
'amdsmi_set_gpu_memory_partition_mode',
'amdsmi_set_gpu_od_clk_info', 'amdsmi_set_gpu_od_volt_info',
'amdsmi_set_gpu_overdrive_level', 'amdsmi_set_gpu_pci_bandwidth',
'amdsmi_set_gpu_perf_determinism_mode',
@@ -2962,7 +3068,9 @@ __all__ = \
'amdsmi_xgmi_link_status_t', 'amdsmi_xgmi_link_status_type_t',
'amdsmi_xgmi_status_t', 'processor_type_t', 'size_t',
'struct__links', 'struct_amd_metrics_table_header_t',
'struct_amdsmi_accelerator_partition_profile_config_t',
'struct_amdsmi_accelerator_partition_profile_t',
'struct_amdsmi_accelerator_partition_resource_profile_t',
'struct_amdsmi_asic_info_t', 'struct_amdsmi_board_info_t',
'struct_amdsmi_clk_info_t', 'struct_amdsmi_counter_value_t',
'struct_amdsmi_ddr_bw_metrics_t', 'struct_amdsmi_dimm_power_t',
@@ -2977,6 +3085,7 @@ __all__ = \
'struct_amdsmi_gpu_xcp_metrics_t',
'struct_amdsmi_hsmp_metrics_table_t', 'struct_amdsmi_kfd_info_t',
'struct_amdsmi_link_id_bw_type_t', 'struct_amdsmi_link_metrics_t',
'struct_amdsmi_memory_partition_config_t',
'struct_amdsmi_name_value_t', 'struct_amdsmi_od_vddc_point_t',
'struct_amdsmi_od_volt_curve_t',
'struct_amdsmi_od_volt_freq_data_t',
@@ -2996,7 +3105,7 @@ __all__ = \
'struct_amdsmi_vram_usage_t', 'struct_amdsmi_xgmi_info_t',
'struct_amdsmi_xgmi_link_status_t', 'struct_cache_',
'struct_engine_usage_', 'struct_fw_info_list_',
'struct_memory_usage_', 'struct_nps_flags_',
'struct_memory_usage_', 'struct_nps_flags_', 'struct_numa_range_',
'struct_pcie_metric_', 'struct_pcie_static_',
'struct_amdsmi_bdf_t', 'uint32_t', 'uint64_t', 'uint8_t',
'union_amdsmi_bdf_t', 'union_amdsmi_nps_caps_t']
+211
Просмотреть файл
@@ -440,6 +440,31 @@ typedef enum {
typedef rsmi_memory_partition_type_t rsmi_memory_partition_type;
/// \endcond
/**
* @brief XCP resources.
* This enum is used to identify
* various accelerator resource types.
*/
typedef enum {
RSMI_ACCELERATOR_XCC,
RSMI_ACCELERATOR_ENCODER,
RSMI_ACCELERATOR_DECODER,
RSMI_ACCELERATOR_DMA,
RSMI_ACCELERATOR_JPEG,
RSMI_ACCELERATOR_MAX
} rsmi_accelerator_partition_resource_type_t;
/**
* @brief Accelerator Partition Resources.
* This struct is used to identify various partition resource profiles.
*/
typedef struct {
rsmi_accelerator_partition_resource_type_t resource_type;
uint32_t partition_resource; //!< Resources a partition can use, which may be shared
uint32_t num_partitions_share_resource; //!< If it is greater than 1, then resource is shared.
uint64_t reserved[6];
} rsmi_accelerator_partition_resource_profile_t;
/**
* @brief Temperature Metrics. This enum is used to identify various
* temperature metrics. Corresponding values will be in millidegress
@@ -4625,6 +4650,192 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
*/
rsmi_status_t rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id);
/**
* @brief Retrieves the available compute partition capabilities
* for a desired device
*
* @details
* Given a device index @p dv_ind and a string @p compute_partition_caps ,
* and uint32 @p len , this function will attempt to obtain the device's
* available compute partition capabilities string. Upon successful
* retreival, the obtained device's available compute partition capablilities
* string shall be stored in the passed @p compute_partition_caps
* char string variable.
*
* @param[in] dv_ind a device index
*
* @param[inout] compute_partition_caps a pointer to a char string variable,
* which the device's available compute partition capabilities will be written to.
*
* @param[in] len the length of the caller provided buffer @p len ,
* suggested length is 30 or greater.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
* large enough to hold the entire memory partition value. In this case,
* only @p len bytes will be written.
*
*/
rsmi_status_t
rsmi_dev_compute_partition_capabilities_get(uint32_t dv_ind, char *compute_partition_caps,
uint32_t len);
/**
* @brief Retrieves the compute partition supported xcp configs
* for a desired device
*
* @details
* Given a device index @p dv_ind and a string @p supported_configs ,
* and uint32 @p len , this function will attempt to obtain the device's
* compute partition supported xcp configs string. Upon successful
* retreival, the obtained device's available compute partition supported xcp configs
* string shall be stored in the passed @p supported_configs
* char string variable.
*
* @param[in] dv_ind a device index
*
* @param[inout] supported_configs a pointer to a char string variable,
* which the device's compute partition supported xcp configs will be written to.
*
* @param[in] len the length of the caller provided buffer @p len ,
* suggested length is 30 or greater.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
* large enough to hold the entire memory partition value. In this case,
* only @p len bytes will be written.
*
*/
rsmi_status_t
rsmi_dev_compute_partition_supported_xcp_configs_get(uint32_t dv_ind, char *supported_configs,
uint32_t len);
/**
* @brief Retrieves the compute partition supported NPS configs
* for a desired device
*
* @details
* Given a device index @p dv_ind and a string @p supported_configs ,
* and uint32 @p len , this function will attempt to obtain the device's
* compute partition supported NPS configs string. Upon successful
* retreival, the obtained device's available compute partition supported NPS configs
* string shall be stored in the passed @p supported_configs
* char string variable.
*
* @param[in] dv_ind a device index
*
* @param[inout] supported_configs a pointer to a char string variable,
* which the device's compute partition supported NPS configs will be written to.
*
* @param[in] len the length of the caller provided buffer @p len ,
* suggested length is 30 or greater.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
* large enough to hold the entire memory partition value. In this case,
* only @p len bytes will be written.
*
*/
rsmi_status_t
rsmi_dev_compute_partition_supported_nps_configs_get(uint32_t dv_ind, char *supported_configs,
uint32_t len);
/**
* @brief Retrieves the current compute partition xcp config
* for a desired device
*
* @details
* Given a device index @p dv_ind and a string @p current_xcp_config ,
* and uint32 @p len , this function will attempt to obtain the device's
* curren tcompute partition xcp config string. Upon successful
* retreival, the obtained device's current compute partition xcp config
* string shall be stored in the passed @p current_xcp_config
* char string variable.
*
* @param[in] dv_ind a device index
*
* @param[inout] supported_configs a pointer to a char string variable,
* which the device's current compute partition xcp config will be written to.
*
* @param[in] len the length of the caller provided buffer @p len ,
* suggested length is 30 or greater.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
* large enough to hold the entire memory partition value. In this case,
* only @p len bytes will be written.
*
*/
rsmi_status_t rsmi_dev_current_compute_xcp_config_get(uint32_t dv_ind, char *current_xcp_config,
uint32_t len);
/**
* @brief Modifies a selected device's compute partition XCP config setting.
*
* @details Given a device index @p dv_ind, a type of compute partition
* @p xcp_config, this function will attempt to update the selected
* device's compute partition XCP config.
*
* @param[in] dv_ind a device index
*
* @param[in] xcp_config using enum ::rsmi_compute_partition_type_t,
* define what the selected device's compute partition XCP config should be
* updated to.
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_PERMISSION function requires root access
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
* @retval ::RSMI_STATUS_SETTING_UNAVAILABLE the provided setting is
* unavailable for current device
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function
* @retval ::RSMI_STATUS_BUSY A resource or mutex could not be acquired
* because it is already being used - device is busy
*
*/
rsmi_status_t
rsmi_dev_compute_partition_xcp_config_set(uint32_t dv_ind,
rsmi_compute_partition_type_t xcp_config);
/**
* @brief Retrieves a selected device's compute partition resource profile.
*
* @details Given a device index @p dv_ind, a pointer to a requested resorce of
* rsmi_accelerator_partition_resource_type_t @p type, and a rsmi_accelerator_partition_resource_profile_t
* @p profile this function will write the current XCP config's
* resource profile to its @p profile.
*
* @param[in] dv_ind a device index
*
* @param[in] type a pointer to a requested resource using enum ::rsmi_accelerator_partition_resource_type_t
*
* @param[inout] profile a pointer to the requested rsmi_accelerator_partition_resource_profile_t details
*
* @retval ::RSMI_STATUS_SUCCESS call was successful
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
* support this function with the given arguments
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
*/
rsmi_status_t rsmi_dev_compute_partition_resource_profile_get(uint32_t dv_ind,
rsmi_accelerator_partition_resource_type_t *type,
rsmi_accelerator_partition_resource_profile_t *profile);
/** @} */ // end of ComputePartition
/*****************************************************************************/
+20
Просмотреть файл
@@ -163,6 +163,26 @@ enum DevInfoTypes {
kDevComputePartition,
kDevMemoryPartition,
kDevAvailableMemoryPartition,
kDevSupportedXcpConfigs,
kDevSupportedNpsConfigs,
kDevXcpConfig,
/**
* Possible xcp config resources start
*/
kDevDecoderInst,
kDevDecoderShared,
kDevEncoderInst,
kDevEncoderShared,
kDevDmaInst,
kDevDmaShared,
kDevJpegInst,
kDevJpegShared,
kDevXccInst,
kDevXccShared,
/**
* Possible xcp config resources end
*/
// The information read from pci core sysfs
kDevPCieTypeStart = 1000,
+2
Просмотреть файл
@@ -118,6 +118,8 @@ rsmi_status_t rsmi_dev_number_of_computes_get(uint32_t dv_ind, uint32_t* num_com
std::string leftTrim(const std::string &s);
std::string rightTrim(const std::string &s);
std::string trim(const std::string &s);
std::string trimAllWhiteSpace(const std::string &s);
std::string removeWhitespace(const std::string &s);
std::string removeNewLines(const std::string &s);
std::string removeString(const std::string origStr,
+518
Просмотреть файл
@@ -5618,6 +5618,524 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
CATCH
}
rsmi_status_t rsmi_dev_compute_partition_capabilities_get(
uint32_t dv_ind, char *compute_partition_caps, uint32_t len) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
LOG_TRACE(ss);
DEVICE_MUTEX
std::string availableComputePartitions;
rsmi_status_t ret =
get_dev_value_line(amd::smi::kDevAvailableComputePartition,
dv_ind, &availableComputePartitions);
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | FAIL "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevAvailableComputePartition)
<< " | Data: could not retrieve requested data"
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return ret;
}
std::size_t length = availableComputePartitions.copy(compute_partition_caps, len-1);
compute_partition_caps[length]='\0';
if (len < (availableComputePartitions.size() + 1)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevAvailableComputePartition)
<< " | Cause: requested size was insufficient"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INSUFFICIENT_SIZE;
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevAvailableComputePartition)
<< " | Data: " << compute_partition_caps
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
rsmi_status_t rsmi_dev_compute_partition_supported_xcp_configs_get(uint32_t dv_ind,
char *supported_configs, uint32_t len) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
LOG_TRACE(ss);
DEVICE_MUTEX
std::string supported_xcp_configs;
rsmi_status_t ret =
get_dev_value_line(amd::smi::kDevSupportedXcpConfigs,
dv_ind, &supported_xcp_configs);
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | FAIL "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
<< " | Data: could not retrieve requested data"
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return ret;
}
std::size_t length = supported_xcp_configs.copy(supported_configs, len-1);
supported_configs[length]='\0';
if (len < (supported_xcp_configs.size() + 1)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
<< " | Cause: requested size was insufficient"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INSUFFICIENT_SIZE;
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
<< " | Data: " << supported_configs
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
rsmi_status_t rsmi_dev_compute_partition_supported_nps_configs_get(uint32_t dv_ind,
char *supported_configs, uint32_t len) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
LOG_TRACE(ss);
DEVICE_MUTEX
std::string supported_nps_configs;
rsmi_status_t ret =
get_dev_value_line(amd::smi::kDevSupportedNpsConfigs,
dv_ind, &supported_nps_configs);
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | FAIL "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedNpsConfigs)
<< " | Data: could not retrieve requested data"
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return ret;
}
std::size_t length = supported_nps_configs.copy(supported_configs, len-1);
supported_configs[length]='\0';
if (len < (supported_nps_configs.size() + 1)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedNpsConfigs)
<< " | Cause: requested size was insufficient"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INSUFFICIENT_SIZE;
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedNpsConfigs)
<< " | Data: " << supported_configs
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
rsmi_status_t rsmi_dev_current_compute_xcp_config_get(
uint32_t dv_ind, char *current_xcp_config, uint32_t len) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
LOG_TRACE(ss);
DEVICE_MUTEX
std::string currentXcpConfigStr;
rsmi_status_t ret =
get_dev_value_line(amd::smi::kDevXcpConfig,
dv_ind, &currentXcpConfigStr);
if (ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | FAIL "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Data: could not retrieve requested data"
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_ERROR(ss);
return ret;
}
std::size_t length = currentXcpConfigStr.copy(current_xcp_config, len-1);
current_xcp_config[length]='\0';
if (len < (currentXcpConfigStr.size() + 1)) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Cause: requested size was insufficient"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INSUFFICIENT_SIZE;
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Data: " << currentXcpConfigStr
<< " | Returning = "
<< getRSMIStatusString(ret) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
rsmi_status_t
rsmi_dev_compute_partition_xcp_config_set(uint32_t dv_ind,
rsmi_compute_partition_type_t xcp_config) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
LOG_TRACE(ss);
REQUIRE_ROOT_ACCESS
if (!amd::smi::is_sudo_user()) {
return RSMI_STATUS_PERMISSION;
}
std::string currentXcpConfig = "";
std::string newXcpConfigStr = "";
std::string availableXcpConfigsStr = "";
const int kLen30 = 30;
char available_xcp_configs[kLen30];
available_xcp_configs[0] = '\0';
const int kLen5 = 5;
char current_xcp_config[kLen5];
current_xcp_config[0] = '\0';
switch (xcp_config) {
case RSMI_COMPUTE_PARTITION_CPX:
case RSMI_COMPUTE_PARTITION_SPX:
case RSMI_COMPUTE_PARTITION_DPX:
case RSMI_COMPUTE_PARTITION_TPX:
case RSMI_COMPUTE_PARTITION_QPX:
newXcpConfigStr =
mapRSMIToStringComputePartitionTypes.at(xcp_config);
break;
case RSMI_COMPUTE_PARTITION_INVALID:
default:
newXcpConfigStr =
mapRSMIToStringComputePartitionTypes.at(RSMI_COMPUTE_PARTITION_INVALID);
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Data: " << newXcpConfigStr
<< " | Cause: requested setting was invalid"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
// Confirm what we are trying to set is available, otherwise provide
// RSMI_STATUS_INVALID_ARGS
rsmi_status_t available_ret =
rsmi_dev_compute_partition_supported_xcp_configs_get(dv_ind, available_xcp_configs, kLen30);
if (available_ret != RSMI_STATUS_SUCCESS) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
<< " | Data: " << newXcpConfigStr
<< " | Cause: could not find an available xcp configs file"
<< " | Returning = "
<< getRSMIStatusString(available_ret) << " |";
LOG_ERROR(ss);
return available_ret;
} else {
availableXcpConfigsStr = available_xcp_configs;
}
bool isXcpConfigAvailable =
amd::smi::containsString(availableXcpConfigsStr,
newXcpConfigStr);
if (!isXcpConfigAvailable) {
ss << __PRETTY_FUNCTION__
<< " | Fail - Detected that the requested xcp config is not available"
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Data (newXcpConfigStr): " << newXcpConfigStr
<< " | Data (availableXcpConfigsStr): " << availableXcpConfigsStr;
LOG_ERROR(ss);
// We do not return RSMI_STATUS_INVALID_ARGS
// Instead we try setting anyways as requested
// write will provide the correct error code
}
ss << __PRETTY_FUNCTION__ << " | about to try writing |"
<< newXcpConfigStr
<< "| size of string = " << newXcpConfigStr.size()
<< "| size of c-string = "<< std::dec
<< sizeof(newXcpConfigStr.c_str())/sizeof(newXcpConfigStr[0])
<< "| sizeof string = " << std::dec
<< sizeof(newXcpConfigStr);
LOG_DEBUG(ss);
GET_DEV_FROM_INDX
DEVICE_MUTEX
int ret = dev->writeDevInfo(amd::smi::kDevXcpConfig,
newXcpConfigStr);
rsmi_status_t returnResponse = amd::smi::ErrnoToRsmiStatus(ret);
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Data: " << newXcpConfigStr
<< " | Returning = "
<< getRSMIStatusString(returnResponse) << " |";
LOG_TRACE(ss);
return returnResponse;
CATCH
}
rsmi_status_t rsmi_dev_compute_partition_resource_profile_get(uint32_t dv_ind,
rsmi_accelerator_partition_resource_type_t *type,
rsmi_accelerator_partition_resource_profile_t *profile) {
TRY
std::ostringstream ss;
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
LOG_TRACE(ss);
if (type == nullptr) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Cause: user sent invalid arguments, type was a null ptr"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS, false);
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
// initialize the profile
profile->partition_resource = std::numeric_limits<uint32_t>::max();
profile->num_partitions_share_resource = std::numeric_limits<uint32_t>::max();
DEVICE_MUTEX
rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED;
// check if user provided supported resource types
// Note: RSMI_ACCELERATOR_MAX is == largest enum value
bool isAcceleratorTypeValid = false;
for (int i = 0; i <= RSMI_ACCELERATOR_MAX; i++) {
if (*type == i) {
isAcceleratorTypeValid = true;
break;
}
}
if (isAcceleratorTypeValid == false) {
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Fail "
<< " | Device #: " << dv_ind
<< " | Type: "
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
<< " | Cause: user sent invalid arguments, type was out of range"
<< " | Returning = "
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS, false);
LOG_ERROR(ss);
return RSMI_STATUS_INVALID_ARGS;
}
amd::smi::DevInfoTypes dev_info_type_inst;
amd::smi::DevInfoTypes dev_info_type_shared;
if (*type == RSMI_ACCELERATOR_XCC) {
profile->resource_type = RSMI_ACCELERATOR_XCC;
dev_info_type_inst = amd::smi::kDevXccInst;
dev_info_type_shared = amd::smi::kDevXccShared;
std::string val_str;
ret = get_dev_value_str(amd::smi::kDevXccInst, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->partition_resource = static_cast<uint32_t>(val_ul);
}
}
val_str.clear();
ret = get_dev_value_str(amd::smi::kDevXccShared, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
}
}
}
if (*type == RSMI_ACCELERATOR_ENCODER) {
profile->resource_type = RSMI_ACCELERATOR_ENCODER;
dev_info_type_inst = amd::smi::kDevEncoderInst;
dev_info_type_shared = amd::smi::kDevEncoderShared;
std::string val_str;
ret = get_dev_value_str(amd::smi::kDevEncoderInst, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->partition_resource = static_cast<uint32_t>(val_ul);
}
}
val_str.clear();
ret = get_dev_value_str(amd::smi::kDevEncoderShared, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
}
}
}
if (*type == RSMI_ACCELERATOR_DECODER) {
profile->resource_type = RSMI_ACCELERATOR_DECODER;
dev_info_type_inst = amd::smi::kDevDecoderInst;
dev_info_type_shared = amd::smi::kDevDecoderShared;
std::string val_str;
ret = get_dev_value_str(amd::smi::kDevDecoderInst, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->partition_resource = static_cast<uint32_t>(val_ul);
}
}
val_str.clear();
ret = get_dev_value_str(amd::smi::kDevDecoderShared, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
}
}
}
if (*type == RSMI_ACCELERATOR_DMA) {
profile->resource_type = RSMI_ACCELERATOR_DMA;
dev_info_type_inst = amd::smi::kDevDmaInst;
dev_info_type_shared = amd::smi::kDevDmaShared;
std::string val_str;
ret = get_dev_value_str(amd::smi::kDevDmaInst, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->partition_resource = static_cast<uint32_t>(val_ul);
}
}
val_str.clear();
ret = get_dev_value_str(amd::smi::kDevDmaShared, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
}
}
}
// RSMI_ACCELERATOR_MAX == RSMI_ACCELERATOR_JPEG
if (*type == RSMI_ACCELERATOR_JPEG) {
profile->resource_type = RSMI_ACCELERATOR_JPEG;
dev_info_type_inst = amd::smi::kDevJpegInst;
dev_info_type_shared = amd::smi::kDevJpegShared;
std::string val_str;
ret = get_dev_value_str(amd::smi::kDevJpegInst, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->partition_resource = static_cast<uint32_t>(val_ul);
}
}
val_str.clear();
ret = get_dev_value_str(amd::smi::kDevJpegShared, dv_ind, &val_str);
if (ret == RSMI_STATUS_SUCCESS) {
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
}
}
}
ss << __PRETTY_FUNCTION__
<< " | ======= end ======= "
<< " | Success "
<< " | Device #: " << dv_ind
<< " | Type (partition_resource): "
<< amd::smi::Device::get_type_string(dev_info_type_inst)
<< " | Data: " << profile->partition_resource
<< " | Type (num_partitions_share_resource): "
<< amd::smi::Device::get_type_string(dev_info_type_shared)
<< " | Data: " << profile->num_partitions_share_resource
<< " | Returning = "
<< getRSMIStatusString(ret, false) << " |";
LOG_TRACE(ss);
return ret;
CATCH
}
static rsmi_status_t get_memory_partition(uint32_t dv_ind,
std::string &memory_partition) {
TRY
+58
Просмотреть файл
@@ -120,6 +120,21 @@ static const char *kDevAvailableComputePartitionFName =
static const char *kDevComputePartitionFName = "current_compute_partition";
static const char *kDevMemoryPartitionFName = "current_memory_partition";
static const char *kDevAvailableMemoryPartitionFName = "available_memory_partition";
static const char *kDevSupportedXcpConfigsFName = "compute_partition_config/supported_xcp_configs";
static const char *kDevSupportedNpsConfigsFName = "compute_partition_config/supported_nps_configs";
static const char *kDevXcpConfigFName = "compute_partition_config/xcp_config";
// XCP config resource files - not every file will exist in all ASICs (ex. Decoders vs Encoders)
static const char *kDevDecoderInstFName = "compute_partition_config/dec/num_inst";
static const char *kDevDecoderSharedFName = "compute_partition_config/dec/num_shared";
static const char *kDevEncoderInstFName = "compute_partition_config/enc/num_inst";
static const char *kDevEncoderSharedFName = "compute_partition_config/enc/num_shared";
static const char *kDevDmaInstFName = "compute_partition_config/dma/num_inst";
static const char *kDevDmaSharedFName = "compute_partition_config/dma/num_shared";
static const char *kDevJpegInstFName = "compute_partition_config/jpeg/num_inst";
static const char *kDevJpegSharedFName = "compute_partition_config/jpeg/num_shared";
static const char *kDevXccInstFName = "compute_partition_config/xcc/num_inst";
static const char *kDevXccSharedFName = "compute_partition_config/xcc/num_shared";
// Firmware version files
static const char *kDevFwVersionAsdFName = "fw_version/asd_fw_version";
@@ -309,6 +324,21 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
{kDevComputePartition, kDevComputePartitionFName},
{kDevMemoryPartition, kDevMemoryPartitionFName},
{kDevAvailableMemoryPartition, kDevAvailableMemoryPartitionFName},
{kDevSupportedXcpConfigs, kDevSupportedXcpConfigsFName},
{kDevSupportedNpsConfigs, kDevSupportedNpsConfigsFName},
{kDevXcpConfig, kDevXcpConfigFName},
// XCP config resource files
{kDevDecoderInst, kDevDecoderInstFName},
{kDevDecoderShared, kDevDecoderSharedFName},
{kDevEncoderInst, kDevEncoderInstFName},
{kDevEncoderShared, kDevEncoderSharedFName},
{kDevDmaInst, kDevDmaInstFName},
{kDevDmaShared, kDevDmaSharedFName},
{kDevJpegInst, kDevJpegInstFName},
{kDevJpegShared, kDevJpegSharedFName},
{kDevXccInst, kDevXccInstFName},
{kDevXccShared, kDevXccSharedFName},
};
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
@@ -466,6 +496,20 @@ Device::devInfoTypesStrings = {
{kDevXgmiPlpd, "kDevXgmiPlpd"},
{kDevProcessIsolation, "kDevProcessIsolation"},
{kDevShaderClean, "kDevShaderClean"},
{kDevSupportedXcpConfigs, "kDevSupportedXcpConfigs"},
{kDevSupportedNpsConfigs, "kDevSupportedNpsConfigs"},
{kDevXcpConfig, "kDevXcpConfig"},
{kDevDecoderInst, "kDevDecoderInst"},
{kDevDecoderShared, "kDevDecoderShared"},
{kDevEncoderInst, "kDevEncoderInst"},
{kDevEncoderShared, "kDevEncoderShared"},
{kDevDmaInst, "kDevDmaInst"},
{kDevDmaShared, "kDevDmaShared"},
{kDevJpegInst, "kDevJpegInst"},
{kDevJpegShared, "kDevJpegShared"},
{kDevXccInst, "kDevXccInst"},
{kDevXccShared, "kDevXccShared"},
};
static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
@@ -946,6 +990,7 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
return writeDevInfoStr(type, val);
case kDevComputePartition:
case kDevMemoryPartition:
case kDevXcpConfig:
return writeDevInfoStr(type, val, true);
default:
@@ -1292,6 +1337,19 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
case kDevXGMIPhysicalID:
case kDevAvailableMemoryPartition:
case kDevProcessIsolation:
case kDevSupportedXcpConfigs:
case kDevSupportedNpsConfigs:
case kDevXcpConfig:
case kDevDecoderInst:
case kDevDecoderShared:
case kDevEncoderInst:
case kDevEncoderShared:
case kDevDmaInst:
case kDevDmaShared:
case kDevJpegInst:
case kDevJpegShared:
case kDevXccInst:
case kDevXccShared:
return readDevInfoStr(type, val);
break;
+46 -4
Просмотреть файл
@@ -747,6 +747,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
// location_id, bdf, domain, bus, device,
// partition_id}
std::multimap<uint64_t, systemNode> allSystemNodes;
std::set<uint32_t> gpuNodeIdsFound;
uint32_t node_id = 0;
static const int BYTE = 8;
while (true) {
@@ -755,9 +756,24 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
int ret_unique_id = read_node_properties(node_id, "unique_id", &unique_id);
int ret_loc_id =
read_node_properties(node_id, "location_id", &location_id);
read_node_properties(node_id, "domain", &domain);
if (ret_gpu_id == 0 &&
!(ret_unique_id != 0 || ret_loc_id != 0 || ret_unique_id != 0)) {
int ret_domain = read_node_properties(node_id, "domain", &domain);
bool isANode = (ret_gpu_id == 0 &&
(ret_domain == 0 && ret_loc_id == 0));
ss << __PRETTY_FUNCTION__ << " | isAGpuNode: "
<< (isANode ? "TRUE" : "FALSE") << "; is_vm_guest(): "
<< (is_vm_guest() ? "TRUE" : "FALSE")
<< "\nret_gpu_id: " << ret_gpu_id
<< "; ret_domain: " << ret_domain
<< "; ret_loc_id: " << ret_loc_id
<< "; ret_unique_id: " << ret_unique_id
<< "\n[node_id = " << print_unsigned_hex_and_int(node_id) << "\n"
<< "; gpu_id = " << print_unsigned_hex_and_int(gpu_id) << "\n"
<< "; unique_id = " << print_unsigned_hex_and_int(unique_id) << "\n"
<< "; location_id = " << print_unsigned_hex_and_int(location_id) << "\n"
<< "; domain = " << print_unsigned_hex_and_int(domain)
<< "]\n";
LOG_DEBUG(ss);
if (isANode || (is_vm_guest() && ret_gpu_id == 0)) {
// Do not try to build a node if one of these fields
// do not exist in KFD (0 as values okay)
systemNode myNode;
@@ -776,6 +792,24 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
myNode.s_function = myNode.s_location_id & 0x7;
myNode.s_partition_id = ((myNode.s_location_id >> 28) & 0xF);
if (gpu_id != 0) { // only add gpu nodes, 0 = CPU
auto ret = gpuNodeIdsFound.insert(node_id);
if (ret.second != false) {
// only print out nodes which do not already exist
ss << __PRETTY_FUNCTION__ << " | isAGpuNode: "
<< (isANode ? "TRUE" : "FALSE") << "; is_vm_guest(): "
<< (is_vm_guest() ? "TRUE" : "FALSE")
<< "\nret_gpu_id: " << ret_gpu_id
<< "; ret_domain: " << ret_domain
<< "; ret_loc_id: " << ret_loc_id
<< "; ret_unique_id: " << ret_unique_id
<< "\n[node_id = " << print_unsigned_hex_and_int(node_id) << "\n"
<< "; gpu_id = " << print_unsigned_hex_and_int(gpu_id) << "\n"
<< "; unique_id = " << print_unsigned_hex_and_int(unique_id) << "\n"
<< "; location_id = " << print_unsigned_hex_and_int(location_id) << "\n"
<< "; domain = " << print_unsigned_hex_and_int(domain) << "\n"
<< "]\n";
LOG_DEBUG(ss);
}
allSystemNodes.emplace(unique_id, myNode);
}
} else {
@@ -866,7 +900,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
<< "; partition_id = " << std::to_string(i->second.s_partition_id)
<< "], ";
LOG_DEBUG(ss);
AddToDeviceList(d_name, primaryBdfId);
ss << __PRETTY_FUNCTION__ << " | AddToDeviceList #1 (secondary node) \n"
<< "; bdf: " << print_unsigned_hex_and_int(primaryBdfId) << "\n";
LOG_DEBUG(ss);
} else {
ss << __PRETTY_FUNCTION__ << " | primary node add ; "
<< " BDF = " << std::to_string(UINT64_MAX);
@@ -894,6 +930,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
<< "; partition_id = " << std::to_string(i->second.s_partition_id)
<< "], ";
LOG_DEBUG(ss);
ss << __PRETTY_FUNCTION__ << " | AddToDeviceList #2 (primary node) \n"
<< "; bdf: " << print_unsigned_hex_and_int(UINT64_MAX) << "\n";
LOG_DEBUG(ss);
AddToDeviceList(d_name, UINT64_MAX);
}
@@ -1029,6 +1068,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
<< "; partition_id = " << std::to_string(it->second.s_partition_id)
<< "], ";
LOG_DEBUG(ss);
ss << __PRETTY_FUNCTION__ << " | AddToDeviceList #3 (secondary node add #2) \n"
<< "; bdf: " << print_unsigned_hex_and_int(myBdfId) << "\n";
LOG_DEBUG(ss);
AddToDeviceList(secNode, myBdfId);
allSystemNodes.erase(it++);
numb_nodes--;
+21 -3
Просмотреть файл
@@ -383,6 +383,7 @@ std::string removeNewLines(const std::string &s) {
return s;
}
// Trims white space from both ends of string
std::string trim(const std::string &s) {
if (!s.empty()) {
// remove new lines -> trim white space at ends
@@ -392,6 +393,23 @@ std::string trim(const std::string &s) {
return s;
}
// Trims white space from both ends of string and removes all white space
std::string trimAllWhiteSpace(const std::string &s) {
if (!s.empty()) {
// remove new lines -> trim white space at ends
std::string noNewLines = trim(s);
return removeWhitespace(noNewLines);
}
return s;
}
std::string removeWhitespace(const std::string &s) {
if (!s.empty()) {
return std::regex_replace(s, std::regex("\\s+"), "");
}
return s;
}
// Given original string and string to remove (removeMe)
// Return will provide the resulting modified string with the removed string(s)
std::string removeString(const std::string origStr,
@@ -908,18 +926,18 @@ std::string getBuildType() {
}
const char *my_fname(void) {
std::string emptyRet="";
#ifdef _GNU_SOURCE
Dl_info dl_info;
dladdr((void *)my_fname, &dl_info);
dladdr(reinterpret_cast<void *>(my_fname), &dl_info);
return (dl_info.dli_fname);
#else
std::string emptyRet = "";
return emptyRet.c_str();
#endif
}
std::string getMyLibPath(void) {
std::string libName = "rocm-smi-lib";
std::string libName = "amd-smi-lib";
std::string path = std::string(my_fname());
if (path.empty()) {
path = "Could not find library path for " + libName;
Разница между файлами не показана из-за своего большого размера Загрузить разницу
+1 -1
Просмотреть файл
@@ -128,7 +128,7 @@ amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const {
amdsmi_status_t ret;
uint32_t fd = 0;
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;;
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
return drm_.amdgpu_query_vbios(fd, info);
}