[SWDEV-488276] Add partition 2.0 functionality (#44)
Changes:
* CLI:
- Updated amd-smi partition
- Updated amd-smi partition -c
- Updated amd-smi partition -m
- Updated amd-smi partition -a
- Updated amd-smi set -M <NPS1/NPS2/NPS4/NPS8>
- Updated amd-smi set -C <SPX/DPX/QPX/TPX/CPX>
- Updated amd-smi set -C <ACCELERATOR_TYPE> or <PROFILE_INDEX>
Where PROFILE_INDEX = available ACCELERATOR_TYPES
- Updated amd-smi set --help, now includes more detail for
amd-smi set -C <ACCELERATOR_TYPE> or <PROFILE_INDEX>
* API:
- Added amdsmi_get_gpu_memory_partition_config
- Added amdsmi_set_gpu_memory_partition_mode
- Added amdsmi_get_gpu_accelerator_partition_profile_config
- Updated amdsmi_get_gpu_accelerator_partition_profile_config
- Added amdsmi_set_gpu_accelerator_partition_profile
Signed-off-by: Charis Poag <Charis.Poag@amd.com>
[ROCm/amdsmi commit: c1cd2b46ef]
Этот коммит содержится в:
коммит произвёл
Maisam Arif
родитель
8f203f8bca
Коммит
fa81bcb513
@@ -4156,14 +4156,35 @@ class AMDSMICommands():
|
||||
|
||||
self.logger.store_output(args.gpu, 'perfdeterminism', f"Successfully enabled performance determinism and set GFX clock frequency to {args.perf_determinism}")
|
||||
if args.compute_partition:
|
||||
compute_partition = amdsmi_interface.AmdSmiComputePartitionType[args.compute_partition]
|
||||
try:
|
||||
amdsmi_interface.amdsmi_set_gpu_compute_partition(args.gpu, compute_partition)
|
||||
(accelerator_set_choices, accelerator_profiles) = self.helpers.get_accelerator_choices_types_indices()
|
||||
logging.debug("args.compute_partition: %s; Accelerator_set_choices: %s", str(args.compute_partition), str(json.dumps(accelerator_set_choices, indent=4)))
|
||||
if args.compute_partition in accelerator_profiles['profile_types']:
|
||||
compute_partition = amdsmi_interface.AmdSmiComputePartitionType[args.compute_partition]
|
||||
index = accelerator_profiles['profile_types'].index(args.compute_partition)
|
||||
attempted_to_set = f"Attempted to set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]} on {gpu_string}"
|
||||
amdsmi_interface.amdsmi_set_gpu_compute_partition(args.gpu, compute_partition)
|
||||
self.logger.store_output(args.gpu, 'accelerator_partition', f"Successfully set accelerator partition to {args.compute_partition} (profile #{accelerator_profiles['profile_indices'][int(index)]})")
|
||||
elif args.compute_partition in accelerator_profiles['profile_indices']:
|
||||
compute_partition = int(args.compute_partition)
|
||||
index = accelerator_profiles['profile_indices'].index(args.compute_partition)
|
||||
attempted_to_set = f"Attempted to set accelerator partition to {accelerator_profiles['profile_types'][int(index)]} (profile #{args.compute_partition}) on {gpu_string}"
|
||||
amdsmi_interface.amdsmi_set_gpu_accelerator_partition_profile(args.gpu, compute_partition)
|
||||
self.logger.store_output(args.gpu, 'accelerator_partition', f"Successfully set accelerator partition to {accelerator_profiles['profile_types'][int(index)]} (profile #{args.compute_partition})")
|
||||
else:
|
||||
raise ValueError(f"Invalid accelerator configuration {args.compute_partition} on {gpu_string}")
|
||||
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_NO_PERM:
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
raise ValueError(f"Unable to set compute partition to {args.compute_partition} on {gpu_string}") from e
|
||||
self.logger.store_output(args.gpu, 'computepartition', f"Successfully set compute partition to {args.compute_partition}")
|
||||
elif e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE:
|
||||
print(f"\n{attempted_to_set}\n"
|
||||
f"\n[AMDSMI_STATUS_SETTING_UNAVAILABLE] Please check amd-smi partition --memory --accelerator for available profiles.\n"
|
||||
"Users may need to switch memory partition to another mode in order to enable the desired accelerator partition.\n")
|
||||
raise ValueError(f"[AMDSMI_STATUS_SETTING_UNAVAILABLE] Unable to set accelerator partition to {args.compute_partition} on {gpu_string}") from e
|
||||
else:
|
||||
raise ValueError(f"Unable to set accelerator partition to {args.compute_partition} on {gpu_string}") from e
|
||||
|
||||
if args.memory_partition:
|
||||
lock = multiprocessing.Lock()
|
||||
lock.acquire()
|
||||
@@ -4172,49 +4193,18 @@ class AMDSMICommands():
|
||||
# Info used if AMDSMI_STATUS_INVAL is caught & to set progress bar #
|
||||
####################################################################
|
||||
try:
|
||||
memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(args.gpu) # this info likely actually comes from different apis than used here
|
||||
memory_dict = {'caps': "N/A", 'current': "N/A"}
|
||||
memory_partition_config = amdsmi_interface.amdsmi_get_gpu_memory_partition_config(args.gpu)
|
||||
memory_dict['caps'] = str(memory_partition_config['partition_caps']).replace("]", "").replace("[", "").replace("\'", "").replace(" ", "")
|
||||
memory_dict['current'] = memory_partition_config['mp_mode']
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_partition = "N/A"
|
||||
logging.debug("Failed to get current memory partition for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
try:
|
||||
mem_caps_str = "N/A"
|
||||
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(args.gpu)
|
||||
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
|
||||
mem_caps = temp_mem_caps.nps_cap_mask
|
||||
if temp_mem_caps.amdsmi_nps_flags_t == None:
|
||||
mem_caps_list = []
|
||||
if mem_caps & 1 == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if mem_caps & 2 == 2:
|
||||
mem_caps_list.append("NPS2")
|
||||
if mem_caps & 4 == 4:
|
||||
mem_caps_list.append("NPS4")
|
||||
if mem_caps & 8 == 8:
|
||||
mem_caps_list.append("NPS8")
|
||||
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "")
|
||||
else:
|
||||
mem_caps = temp_mem_caps.amdsmi_nps_flags_t
|
||||
mem_caps_list = []
|
||||
if mem_caps.nps1_cap == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if mem_caps.nps2_cap == 1:
|
||||
mem_caps_list.append("NPS2")
|
||||
if mem_caps.nps4_cap == 1:
|
||||
mem_caps_list.append("NPS4")
|
||||
if mem_caps.nps8_cap == 1:
|
||||
mem_caps_list.append("NPS8")
|
||||
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
|
||||
if mem_caps_str == "":
|
||||
mem_caps_str = "N/A"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
memory_dict = {'caps': mem_caps_str, 'current': memory_partition}
|
||||
|
||||
###############################################################
|
||||
# memory partition set starts here #
|
||||
###############################################################
|
||||
showProgressBar = False
|
||||
if ((str(memory_dict['current']) != "N/A") and (str(args.memory_partition) in mem_caps_str)
|
||||
if ((str(memory_dict['current']) != "N/A") and (str(args.memory_partition) in memory_dict['caps'])
|
||||
and ((str(memory_dict['current']) != str(args.memory_partition)))):
|
||||
showProgressBar = True # Only show progress bar if
|
||||
# 1) Device can set memory partition modes
|
||||
@@ -4259,7 +4249,7 @@ class AMDSMICommands():
|
||||
raise PermissionError('Command requires elevation') from e
|
||||
if e.get_error_code() == amdsmi_interface.amdsmi_wrapper.AMDSMI_STATUS_INVAL:
|
||||
out = f"[AMDSMI_STATUS_INVAL] Unable to set memory partition to {args.memory_partition} on {gpu_string}"
|
||||
print(f"Valid Memory partition Modes: {mem_caps_str}\n")
|
||||
print(f"Valid Memory partition Modes: {memory_dict['caps']}\n")
|
||||
self.logger.store_output(args.gpu, 'memory_partition', out)
|
||||
self.logger.print_output()
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
@@ -5711,15 +5701,21 @@ class AMDSMICommands():
|
||||
if accelerator:
|
||||
args.accelerator = accelerator
|
||||
|
||||
###########################################
|
||||
# amd-smi partition (no args) #
|
||||
###########################################
|
||||
# if no args are present, then everything should be displayed
|
||||
if not args.current and not args.memory and not args.accelerator:
|
||||
args.current = True
|
||||
args.memory = True
|
||||
args.accelerator = True
|
||||
|
||||
###########################################
|
||||
# amd-smi partition --current #
|
||||
###########################################
|
||||
if args.current:
|
||||
self.logger.table_header = ''.rjust(7)
|
||||
current_header = "GPU_ID".ljust(13) + \
|
||||
current_header = "GPU_ID".ljust(8) + \
|
||||
"MEMORY".ljust(8) + \
|
||||
"ACCELERATOR_TYPE".ljust(18) + \
|
||||
"ACCELERATOR_PROFILE_INDEX".ljust(27) + \
|
||||
@@ -5733,11 +5729,11 @@ class AMDSMICommands():
|
||||
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
|
||||
profile_type = partition_dict['partition_profile']['profile_type']
|
||||
profile_index = partition_dict['partition_profile']['profile_index']
|
||||
partition_id = partition_dict['partition_id']
|
||||
partition_id = str(partition_dict['partition_id']).replace("[", "").replace("]", "").replace(" ", "")
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
profile_type = "N/A"
|
||||
profile_index = "N/A"
|
||||
partition_id = "N/A"
|
||||
partition_id = "0"
|
||||
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
try:
|
||||
current_mem_cap = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu)
|
||||
@@ -5756,65 +5752,52 @@ class AMDSMICommands():
|
||||
tabular_output.append(tabular_output_dict)
|
||||
|
||||
self.logger.multiple_device_output = tabular_output
|
||||
self.logger.table_title = "CURRENT_PARTITION"
|
||||
self.logger.print_output(multiple_device_enabled=True, tabular=True)
|
||||
self.logger.table_title = "\nCURRENT_PARTITION"
|
||||
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
|
||||
###########################################
|
||||
# amd-smi partition --memory #
|
||||
###########################################
|
||||
if args.memory:
|
||||
tabular_output = []
|
||||
self.logger.table_header = ''.rjust(7)
|
||||
current_header = "GPU_ID".ljust(8) + \
|
||||
"MEMORY_PARTITION_CAPS".ljust(23) + \
|
||||
"CURRENT_MEMORY_PARTITION".ljust(26)
|
||||
self.logger.table_header = current_header + self.logger.table_header.strip()
|
||||
|
||||
for gpu in args.gpu:
|
||||
gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu)
|
||||
mem_caps_str = "N/A"
|
||||
current_memory_partition = "N/A"
|
||||
try:
|
||||
memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu) # this info likely actually comes from different apis than used here
|
||||
memory_partition_config = amdsmi_interface.amdsmi_get_gpu_memory_partition_config(gpu)
|
||||
mem_caps_str = str(memory_partition_config['partition_caps']).replace("]", "").replace("[", "").replace("\'", "").replace(" ", "")
|
||||
current_memory_partition = memory_partition_config['mp_mode']
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
memory_partition = "N/A"
|
||||
logging.debug("Failed to get current memory partition for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
try:
|
||||
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
|
||||
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
|
||||
|
||||
if temp_mem_caps.amdsmi_nps_flags_t == None:
|
||||
mem_caps = temp_mem_caps.nps_cap_mask
|
||||
mem_caps_list = []
|
||||
if mem_caps & 1 == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if mem_caps & 2 == 2:
|
||||
mem_caps_list.append("NPS2")
|
||||
if mem_caps & 4 == 4:
|
||||
mem_caps_list.append("NPS4")
|
||||
if mem_caps & 8 == 8:
|
||||
mem_caps_list.append("NPS8")
|
||||
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "")
|
||||
else:
|
||||
mem_caps = temp_mem_caps.amdsmi_nps_flags_t
|
||||
mem_caps_list = []
|
||||
if mem_caps.nps1_cap == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if mem_caps.nps2_cap == 1:
|
||||
mem_caps_list.append("NPS2")
|
||||
if mem_caps.nps4_cap == 1:
|
||||
mem_caps_list.append("NPS4")
|
||||
if mem_caps.nps8_cap == 1:
|
||||
mem_caps_list.append("NPS8")
|
||||
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
|
||||
if mem_caps_str == "":
|
||||
mem_caps_str = "N/A"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
mem_caps_str = "N/A"
|
||||
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
tabular_output_dict = {"gpu_id": gpu_id,
|
||||
"memory_partition_caps": mem_caps_str,
|
||||
"current_memory_partition": current_memory_partition}
|
||||
tabular_output.append(tabular_output_dict)
|
||||
|
||||
memory_dict = {'caps': mem_caps_str, 'current': memory_partition}
|
||||
self.logger.store_output(gpu, 'memory_partition', memory_dict)
|
||||
self.logger.store_multiple_device_output()
|
||||
self.logger.print_output(multiple_device_enabled=True)
|
||||
self.logger.multiple_device_output = tabular_output
|
||||
self.logger.table_title = "\nMEMORY_PARTITION"
|
||||
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
|
||||
###########################################
|
||||
# amd-smi partition --accelerator #
|
||||
###########################################
|
||||
if args.accelerator:
|
||||
self.logger.table_header = ''.rjust(7)
|
||||
current_header = "GPU_ID".ljust(13) + \
|
||||
current_header = "GPU_ID".ljust(8) + \
|
||||
"PROFILE_INDEX".ljust(15) + \
|
||||
"MEMORY_PARTITION_CAPS".ljust(23) + \
|
||||
"ACCELERATOR_TYPE".ljust(18) + \
|
||||
"PARTITION_ID".ljust(14) + \
|
||||
"PARTITION_ID".ljust(17) + \
|
||||
"NUM_PARTITIONS".ljust(16) + \
|
||||
"NUM_RESOURCES".ljust(15) + \
|
||||
"RESOURCE_INDEX".ljust(16) + \
|
||||
@@ -5824,74 +5807,184 @@ class AMDSMICommands():
|
||||
self.logger.table_header = current_header + self.logger.table_header.strip()
|
||||
|
||||
tabular_output = []
|
||||
prev_gpu_id = "N/A"
|
||||
for gpu in args.gpu:
|
||||
gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu)
|
||||
tabular_output_dict = {"gpu_id": "N/A",
|
||||
"profile_index": "N/A",
|
||||
"memory_partition_caps": "N/A",
|
||||
"accelerator_type": "N/A",
|
||||
"partition_id": "0",
|
||||
"num_partitions": "N/A",
|
||||
"num_resources": "N/A",
|
||||
"resource_index": "N/A",
|
||||
"resource_type": "N/A",
|
||||
"resource_instances": "N/A",
|
||||
"resources_shared": "N/A"}
|
||||
try:
|
||||
partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu)
|
||||
profile_type = partition_dict['partition_profile']['profile_type']
|
||||
profile_index = partition_dict['partition_profile']['profile_index']
|
||||
temp_mem_caps = partition_dict['partition_profile']['memory_caps']
|
||||
parition_id = partition_dict['partition_id']
|
||||
num_resources = partition_dict['partition_profile']['num_resources']
|
||||
resources = partition_dict['partition_profile']['resources']
|
||||
partition_id = str(partition_dict['partition_id']).replace("[", "").replace("]", "").replace(" ", "")
|
||||
current_accelerator_type = partition_dict['partition_profile']['profile_type']
|
||||
|
||||
# save only the primary GPU node's partition_id (the 1st listed device; non N/A one)
|
||||
# else keep current_partition_id unchanged for displaying in accelerator resource's output
|
||||
if partition_id != "N/A":
|
||||
current_partition_id = partition_id
|
||||
|
||||
if temp_mem_caps.amdsmi_nps_flags_t == None:
|
||||
mem_caps = temp_mem_caps.nps_cap_mask
|
||||
mem_caps_list = []
|
||||
if mem_caps & 1 == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if mem_caps & 2 == 2:
|
||||
mem_caps_list.append("NPS2")
|
||||
if mem_caps & 4 == 4:
|
||||
mem_caps_list.append("NPS4")
|
||||
if mem_caps & 8 == 8:
|
||||
mem_caps_list.append("NPS8")
|
||||
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
|
||||
else:
|
||||
mem_caps = temp_mem_caps.amdsmi_nps_flags_t
|
||||
mem_caps_list = []
|
||||
if mem_caps.nps1_cap == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if mem_caps.nps2_cap == 1:
|
||||
mem_caps_list.append("NPS2")
|
||||
if mem_caps.nps4_cap == 1:
|
||||
mem_caps_list.append("NPS4")
|
||||
if mem_caps.nps8_cap == 1:
|
||||
mem_caps_list.append("NPS8")
|
||||
mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "").replace("\'", "")
|
||||
if mem_caps_str == "":
|
||||
mem_caps_str = "N/A"
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
profile_type = "N/A"
|
||||
profile_index = "N/A"
|
||||
temp_mem_caps = "N/A"
|
||||
parition_id = "N/A"
|
||||
num_resources = "N/A"
|
||||
resources = "N/A"
|
||||
partition_id = "0"
|
||||
mem_caps_str = "N/A"
|
||||
num_partitions = 0
|
||||
current_accelerator_type = "N/A"
|
||||
logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info())
|
||||
|
||||
if profile_type == 0:
|
||||
profile_type = "N/A"
|
||||
try:
|
||||
partition_config_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile_config(gpu)
|
||||
logging.debug("amdsmi_commands.py | partition_config_dict: " + str(json.dumps(partition_config_dict, indent=4)))
|
||||
num_profiles = partition_config_dict['num_profiles']
|
||||
num_resource_profiles = partition_config_dict['num_resource_profiles']
|
||||
|
||||
tabular_output_dict = {"gpu_id": gpu_id,
|
||||
resource_index = 0
|
||||
prev_accelerator_type = "N/A"
|
||||
for p in range(0, num_profiles):
|
||||
accelerator_type = partition_config_dict['profiles'][p]['profile_type']
|
||||
profile_index = partition_config_dict['profiles'][p]['profile_index']
|
||||
num_partitions = partition_config_dict['profiles'][p]['num_partitions']
|
||||
mem_caps_str = str(partition_config_dict['profiles'][p]['memory_caps']).replace("]", "").replace("[", "").replace("\'", "").replace(" ", "")
|
||||
# 2 modifications based on the current accelerator type:
|
||||
# 1) display a * for the current accelerator type, otherwise display as normal
|
||||
# 2) display partition id only for the current accelerator profile (the *'d one)
|
||||
if current_accelerator_type == accelerator_type:
|
||||
accelerator_type = accelerator_type + "*"
|
||||
partition_id = current_partition_id
|
||||
else:
|
||||
partition_id = "N/A"
|
||||
# only display the first instance of the gpu_id, rest are empty strings
|
||||
if prev_gpu_id != gpu_id:
|
||||
tabular_gpu_id = gpu_id
|
||||
prev_gpu_id = gpu_id
|
||||
else:
|
||||
tabular_gpu_id = ""
|
||||
logging.debug("amdsmi_commands.py | tabular_gpu_id: " + str(tabular_gpu_id))
|
||||
|
||||
if num_resource_profiles == 0:
|
||||
if prev_accelerator_type != accelerator_type: # only print the first instance of the resources
|
||||
tabular_output_dict = {"gpu_id": tabular_gpu_id,
|
||||
"profile_index": profile_index,
|
||||
"memory_partition_caps": mem_caps_str,
|
||||
"accelerator_type": profile_type,
|
||||
"partition_id": parition_id,
|
||||
"num_partitions": 0,
|
||||
"num_resources": num_resources,
|
||||
"resource_index": resources,
|
||||
"resource_type": resources,
|
||||
"resource_instances": resources,
|
||||
"resources_shared": resources}
|
||||
tabular_output.append(tabular_output_dict)
|
||||
"accelerator_type": accelerator_type,
|
||||
"partition_id": partition_id,
|
||||
"num_partitions": num_partitions,
|
||||
"num_resources": num_resource_profiles,
|
||||
"resource_index": "N/A",
|
||||
"resource_type": "N/A",
|
||||
"resource_instances": "N/A",
|
||||
"resources_shared": "N/A"}
|
||||
prev_accelerator_type = accelerator_type
|
||||
tabular_output.append(tabular_output_dict)
|
||||
continue
|
||||
|
||||
for r in range(0, num_resource_profiles):
|
||||
logging.debug("amdsmi_commands.py | p: " + str(p) + "; r: " + str(r)
|
||||
+ "; accelerator_type: " + str(accelerator_type))
|
||||
resource_type = partition_config_dict['profiles'][p]['resources'][r]['resource_type']
|
||||
resource_instances = partition_config_dict['profiles'][p]['resources'][r]['partition_resource']
|
||||
resources_shared = partition_config_dict['profiles'][p]['resources'][r]['num_partitions_share_resource']
|
||||
if prev_accelerator_type != accelerator_type: # only print the first instance of the resources
|
||||
tabular_output_dict = {"gpu_id": tabular_gpu_id,
|
||||
"profile_index": profile_index,
|
||||
"memory_partition_caps": mem_caps_str,
|
||||
"accelerator_type": accelerator_type,
|
||||
"partition_id": partition_id,
|
||||
"num_partitions": num_partitions,
|
||||
"num_resources": num_resource_profiles,
|
||||
"resource_index": resource_index,
|
||||
"resource_type": resource_type,
|
||||
"resource_instances": resource_instances,
|
||||
"resources_shared": resources_shared}
|
||||
prev_accelerator_type = accelerator_type
|
||||
else:
|
||||
tabular_output_dict = {"gpu_id": "",
|
||||
"profile_index": "",
|
||||
"memory_partition_caps": "",
|
||||
"accelerator_type": "",
|
||||
"partition_id": "",
|
||||
"num_partitions": "",
|
||||
"num_resources": "",
|
||||
"resource_index": resource_index,
|
||||
"resource_type": resource_type,
|
||||
"resource_instances": resource_instances,
|
||||
"resources_shared": resources_shared}
|
||||
resource_index += 1
|
||||
tabular_output.append(tabular_output_dict)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
tabular_output.append(tabular_output_dict)
|
||||
|
||||
self.logger.multiple_device_output = tabular_output
|
||||
self.logger.table_title = "ACCELERATOR_PARTITION_PROFILES"
|
||||
self.logger.print_output(multiple_device_enabled=True, tabular=True)
|
||||
self.logger.table_title = "\nACCELERATOR_PARTITION_PROFILES"
|
||||
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
|
||||
#########################################
|
||||
# print accelerator partition resources #
|
||||
#########################################
|
||||
self.logger.table_header = ''.rjust(7)
|
||||
current_header = "RESOURCE_INDEX".ljust(16) + \
|
||||
"RESOURCE_TYPE".ljust(15) + \
|
||||
"RESOURCE_INSTANCES".ljust(20) + \
|
||||
"RESOURCES_SHARED".ljust(18)
|
||||
self.logger.table_header = current_header + self.logger.table_header.strip()
|
||||
|
||||
tabular_output = []
|
||||
for gpu in args.gpu:
|
||||
gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu)
|
||||
tabular_output_dict = {"resource_index": "N/A",
|
||||
"resource_type": "N/A",
|
||||
"resource_instances": "N/A",
|
||||
"resources_shared": "N/A"}
|
||||
try:
|
||||
partition_config_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile_config(gpu)
|
||||
logging.debug("amdsmi_commands.py | partition_config_dict: " + str(json.dumps(partition_config_dict, indent=4)))
|
||||
num_profiles = partition_config_dict['num_profiles']
|
||||
num_resource_profiles = partition_config_dict['num_resource_profiles']
|
||||
|
||||
if num_resource_profiles == 0:
|
||||
tabular_output.append(tabular_output_dict)
|
||||
continue
|
||||
|
||||
resource_index = 0
|
||||
for p in range(0, num_profiles):
|
||||
for r in range(0, num_resource_profiles):
|
||||
resource_type = partition_config_dict['profiles'][p]['resources'][r]['resource_type']
|
||||
resource_instances = partition_config_dict['profiles'][p]['resources'][r]['partition_resource']
|
||||
resources_shared = partition_config_dict['profiles'][p]['resources'][r]['num_partitions_share_resource']
|
||||
tabular_output_dict = {
|
||||
"resource_index": resource_index,
|
||||
"resource_type": resource_type,
|
||||
"resource_instances": resource_instances,
|
||||
"resources_shared": resources_shared}
|
||||
resource_index += 1
|
||||
tabular_output.append(tabular_output_dict)
|
||||
except amdsmi_exception.AmdSmiLibraryException as e:
|
||||
tabular_output.append(tabular_output_dict)
|
||||
|
||||
self.logger.multiple_device_output = tabular_output
|
||||
self.logger.table_title = "\nACCELERATOR_PARTITION_RESOURCES"
|
||||
self.logger.print_output(multiple_device_enabled=True, tabular=True, dynamic=True)
|
||||
self.logger.clear_multiple_devices_ouput()
|
||||
|
||||
# print legend
|
||||
legend_parts = [
|
||||
"\n\nLegend:",
|
||||
" * = Current mode"]
|
||||
legend_output = "\n".join(legend_parts)
|
||||
if self.logger.destination == 'stdout':
|
||||
print(legend_output)
|
||||
else:
|
||||
with self.logger.destination.open('a', encoding="utf-8") as output_file:
|
||||
output_file.write(legend_output + '\n')
|
||||
|
||||
def _event_thread(self, commands, i):
|
||||
devices = commands.device_handles
|
||||
|
||||
@@ -27,6 +27,7 @@ import sys
|
||||
import time
|
||||
import re
|
||||
import multiprocessing
|
||||
import json
|
||||
|
||||
from typing import List, Union
|
||||
from enum import Enum
|
||||
@@ -681,12 +682,30 @@ class AMDSMIHelpers():
|
||||
perf_levels_int = list(set(clock.value for clock in amdsmi_interface.AmdSmiDevPerfLevel))
|
||||
return perf_levels_str, perf_levels_int
|
||||
|
||||
def get_accelerator_partition_profile_config(self):
|
||||
device_handles = amdsmi_interface.amdsmi_get_processor_handles()
|
||||
accelerator_partition_profiles = {'profile_indices':[], 'profile_types':[], 'memory_caps': []}
|
||||
for dev in device_handles:
|
||||
try:
|
||||
profile = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile_config(dev)
|
||||
num_profiles = profile['num_profiles']
|
||||
for p in range(num_profiles):
|
||||
accelerator_partition_profiles['profile_indices'].append(str(profile['profiles'][p]['profile_index']))
|
||||
accelerator_partition_profiles['profile_types'].append(profile['profiles'][p]['profile_type'])
|
||||
accelerator_partition_profiles['memory_caps'].append(profile['profiles'][p]['memory_caps'])
|
||||
break # Only need to get the profiles for one device
|
||||
except amdsmi_interface.AmdSmiLibraryException as e:
|
||||
break
|
||||
return accelerator_partition_profiles
|
||||
|
||||
def get_compute_partition_types(self):
|
||||
compute_partitions_str = [partition.name for partition in amdsmi_interface.AmdSmiComputePartitionType]
|
||||
if 'INVALID' in compute_partitions_str:
|
||||
compute_partitions_str.remove('INVALID')
|
||||
return compute_partitions_str
|
||||
def get_accelerator_choices_types_indices(self):
|
||||
return_val = ("N/A", {'profile_indices':[], 'profile_types':[]})
|
||||
accelerator_partition_profiles = self.get_accelerator_partition_profile_config()
|
||||
if len(accelerator_partition_profiles['profile_types']) != 0:
|
||||
compute_partitions_str = accelerator_partition_profiles['profile_types'] + accelerator_partition_profiles['profile_indices']
|
||||
accelerator_choices = ", ".join(compute_partitions_str)
|
||||
return_val = (accelerator_choices, accelerator_partition_profiles)
|
||||
return return_val
|
||||
|
||||
def get_memory_partition_types(self):
|
||||
memory_partitions_str = [partition.name for partition in amdsmi_interface.AmdSmiMemoryPartitionType]
|
||||
|
||||
@@ -102,14 +102,24 @@ class AMDSMILogger():
|
||||
return output_dict
|
||||
|
||||
|
||||
def _convert_json_to_tabular(self, json_object: Dict[str, any]):
|
||||
# TODO make dynamic
|
||||
def _convert_json_to_tabular(self, json_object: Dict[str, any], dynamic=False):
|
||||
# TODO make dynamic - convert other python CLI outputs to use (as needed)
|
||||
# Update: using dynamic=true provides dynamic re-sizing based on key name length
|
||||
|
||||
table_values = ''
|
||||
stored_gpu = ''
|
||||
stored_timestamp = ''
|
||||
for key, value in json_object.items():
|
||||
string_value = str(value)
|
||||
if key == 'gpu':
|
||||
if key == 'partition_id':
|
||||
# Special case for partition_id: 8 partitions + 7 comma + 2 spaces = 17
|
||||
table_values += string_value.ljust(17)
|
||||
continue
|
||||
key_length = len(key) + 2
|
||||
if dynamic and len(key) > 0:
|
||||
stored_gpu = string_value
|
||||
table_values += string_value.ljust(key_length)
|
||||
elif key == 'gpu':
|
||||
stored_gpu = string_value
|
||||
table_values += string_value.rjust(3)
|
||||
elif key == 'timestamp':
|
||||
@@ -144,30 +154,6 @@ class AMDSMILogger():
|
||||
elif key == "link_status":
|
||||
for i in value:
|
||||
table_values += str(i).ljust(3)
|
||||
elif key == "memory":
|
||||
table_values += string_value.ljust(8)
|
||||
elif key == "accelerator_type":
|
||||
table_values += string_value.ljust(18)
|
||||
elif key == "partition_id":
|
||||
table_values += string_value.ljust(14)
|
||||
elif key == "accelerator_profile_index":
|
||||
table_values += string_value.ljust(27)
|
||||
elif key == "profile_index":
|
||||
table_values += string_value.ljust(15)
|
||||
elif key == "memory_partition_caps":
|
||||
table_values += string_value.ljust(23)
|
||||
elif key == "num_partitions":
|
||||
table_values += string_value.ljust(16)
|
||||
elif key == "num_resources":
|
||||
table_values += string_value.ljust(15)
|
||||
elif key == "resource_index":
|
||||
table_values += string_value.ljust(16)
|
||||
elif key == "resource_type":
|
||||
table_values += string_value.ljust(15)
|
||||
elif key == "resource_instances":
|
||||
table_values += string_value.ljust(20)
|
||||
elif key == "resources_shared":
|
||||
table_values += string_value.ljust(18)
|
||||
elif key == "RW":
|
||||
table_values += string_value.ljust(57)
|
||||
elif key in ('pviol', 'tviol'):
|
||||
@@ -494,12 +480,14 @@ class AMDSMILogger():
|
||||
self.output = {}
|
||||
|
||||
|
||||
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False):
|
||||
def print_output(self, multiple_device_enabled=False, watching_output=False, tabular=False, dual_csv_output=False, dynamic=False):
|
||||
""" Print current output acording to format and then destination
|
||||
params:
|
||||
multiple_device_enabled (bool) - True if printing output from
|
||||
multiple devices
|
||||
watching_output (bool) - True if printing watch output
|
||||
dynamic (bool) - Defaults to False. True turns on dynamic resizing for
|
||||
left justified table output
|
||||
return:
|
||||
Nothing
|
||||
"""
|
||||
@@ -516,7 +504,7 @@ class AMDSMILogger():
|
||||
elif self.is_human_readable_format():
|
||||
# If tabular output is enabled, redirect to _print_tabular_output
|
||||
if tabular:
|
||||
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output)
|
||||
self._print_tabular_output(multiple_device_enabled=multiple_device_enabled, watching_output=watching_output, dynamic=dynamic)
|
||||
else:
|
||||
self._print_human_readable_output(multiple_device_enabled=multiple_device_enabled,
|
||||
watching_output=watching_output)
|
||||
@@ -788,7 +776,7 @@ class AMDSMILogger():
|
||||
output_file.write(human_readable_output + '\n')
|
||||
|
||||
|
||||
def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False):
|
||||
def _print_tabular_output(self, multiple_device_enabled=False, watching_output=False, dynamic=False):
|
||||
primary_table = ''
|
||||
secondary_table = ''
|
||||
|
||||
@@ -808,7 +796,7 @@ class AMDSMILogger():
|
||||
for key, value in device_output.items():
|
||||
if key != 'process_list':
|
||||
primary_table_output[key] = value
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output, dynamic=dynamic) + '\n'
|
||||
else: # Single device output
|
||||
if 'process_list' in self.output:
|
||||
process_table_dict = {}
|
||||
@@ -822,7 +810,7 @@ class AMDSMILogger():
|
||||
for key, value in self.output.items():
|
||||
if key != 'process_list':
|
||||
primary_table_output[key] = value
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output, dynamic=dynamic) + '\n'
|
||||
primary_table = primary_table.rstrip()
|
||||
secondary_table = secondary_table.rstrip()
|
||||
|
||||
@@ -879,7 +867,7 @@ class AMDSMILogger():
|
||||
for key, value in device_output.items():
|
||||
if key != 'process_list':
|
||||
primary_table_output[key] = value
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output) + '\n'
|
||||
primary_table += self._convert_json_to_tabular(primary_table_output, dynamic=dynamic) + '\n'
|
||||
primary_table = primary_table.rstrip() # Remove trailing new line
|
||||
secondary_table = secondary_table.rstrip()
|
||||
|
||||
|
||||
@@ -173,6 +173,14 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
else:
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(string_value, outputformat)
|
||||
|
||||
def _is_command_supported(self, user_input, acceptable_values, command_name):
|
||||
if acceptable_values == "N/A":
|
||||
raise amdsmi_cli_exceptions.AmdSmiCommandNotSupportedException(command_name, self.helpers.get_output_format())
|
||||
elif str(user_input).upper() not in acceptable_values:
|
||||
print(f"Valid inputs are {acceptable_values}")
|
||||
raise amdsmi_cli_exceptions.AmdSmiInvalidParameterValueException(str(user_input).upper(), self.helpers.get_output_format())
|
||||
else:
|
||||
return str(user_input).upper()
|
||||
|
||||
def _limit_select(self):
|
||||
"""Custom action for setting clock limits"""
|
||||
@@ -401,7 +409,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
return _CoreSelectAction
|
||||
|
||||
|
||||
def _add_command_modifiers(self, subcommand_parser):
|
||||
def _add_command_modifiers(self, subcommand_parser: argparse.ArgumentParser):
|
||||
json_help = "Displays output in JSON format (human readable by default)."
|
||||
csv_help = "Displays output in CSV format (human readable by default)."
|
||||
file_help = "Saves output into a file on the provided path (stdout by default)."
|
||||
@@ -460,7 +468,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
return value
|
||||
|
||||
|
||||
def _add_device_arguments(self, subcommand_parser, required=False):
|
||||
def _add_device_arguments(self, subcommand_parser: argparse.ArgumentParser, required=False):
|
||||
# Device arguments help text
|
||||
gpu_help = f"Select a GPU ID, BDF, or UUID from the possible choices:\n{self.gpu_choices_str}"
|
||||
vf_help = "Gets general information about the specified VF (timeslice, fb info, …).\
|
||||
@@ -583,7 +591,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
return _ValidateOverdrivePercent
|
||||
|
||||
|
||||
def _add_version_parser(self, subparsers, func):
|
||||
def _add_version_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
# Subparser help text
|
||||
version_help = "Display version information"
|
||||
|
||||
@@ -597,7 +605,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_command_modifiers(version_parser)
|
||||
|
||||
|
||||
def _add_list_parser(self, subparsers, func):
|
||||
def _add_list_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_amdgpu_initialized():
|
||||
# The list subcommand is only applicable to systems with amdgpu initialized
|
||||
return
|
||||
@@ -619,7 +627,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_device_arguments(list_parser, required=False)
|
||||
|
||||
|
||||
def _add_static_parser(self, subparsers, func):
|
||||
def _add_static_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
# Subparser help text
|
||||
static_help = "Gets static information about the specified GPU"
|
||||
static_subcommand_help = "If no GPU is specified, returns static information for all GPUs on the system.\
|
||||
@@ -925,7 +933,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_command_modifiers(metric_parser)
|
||||
|
||||
|
||||
def _add_process_parser(self, subparsers, func):
|
||||
def _add_process_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if self.helpers.is_hypervisor():
|
||||
# Don't add this subparser on Hypervisors
|
||||
# This subparser is only available to Guest and Baremetal systems
|
||||
@@ -969,7 +977,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
process_parser.add_argument('-n', '--name', action='store', type=lambda value: self._is_valid_string(value, '--name'), required=False, help=name_help)
|
||||
|
||||
|
||||
def _add_profile_parser(self, subparsers, func):
|
||||
def _add_profile_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not (self.helpers.is_windows() and self.helpers.is_hypervisor()):
|
||||
# This subparser only applies to Hypervisors
|
||||
return
|
||||
@@ -990,7 +998,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_device_arguments(profile_parser, required=False)
|
||||
|
||||
|
||||
def _add_event_parser(self, subparsers, func):
|
||||
def _add_event_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_amdgpu_initialized():
|
||||
# The event subcommand is only applicable to systems with amdgpu initialized
|
||||
return
|
||||
@@ -1011,7 +1019,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_device_arguments(event_parser, required=False)
|
||||
|
||||
|
||||
def _add_topology_parser(self, subparsers, func):
|
||||
def _add_topology_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not(self.helpers.is_baremetal() and self.helpers.is_linux()):
|
||||
# This subparser is only applicable to Baremetal Linux
|
||||
return
|
||||
@@ -1059,7 +1067,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
topology_parser.add_argument('-z', '--bi-dir', action='store_true', required=False, help=bi_dir_help)
|
||||
|
||||
|
||||
def _add_set_value_parser(self, subparsers, func):
|
||||
def _add_set_value_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_linux():
|
||||
# This subparser is only applicable to Linux
|
||||
return
|
||||
@@ -1078,9 +1086,9 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_profile_help = f"Set power profile level (#) or choose one of available profiles:\n\t{power_profile_choices_str}"
|
||||
perf_det_choices_str = ", ".join(self.helpers.get_perf_det_levels())
|
||||
set_perf_det_help = f"Set performance determinism and select one of the corresponding performance levels:\n\t{perf_det_choices_str}"
|
||||
compute_partition_choices_str = ", ".join(self.helpers.get_compute_partition_types())
|
||||
(accelerator_set_choices, _) = self.helpers.get_accelerator_choices_types_indices()
|
||||
memory_partition_choices_str = ", ".join(self.helpers.get_memory_partition_types())
|
||||
set_compute_partition_help = f"Set one of the following the compute partition modes:\n\t{compute_partition_choices_str}"
|
||||
set_compute_partition_help = f"Set one of the following the accelerator type or profile index:\n\t{accelerator_set_choices}.\n\tUse `sudo amd-smi partition --accelerator` to find acceptable values."
|
||||
set_memory_partition_help = f"Set one of the following the memory partition modes:\n\t{memory_partition_choices_str}"
|
||||
power_cap_min, power_cap_max = self.helpers.get_power_caps()
|
||||
power_cap_max = self.helpers.convert_SI_unit(power_cap_max, AMDSMIHelpers.SI_Unit.MICRO)
|
||||
@@ -1128,7 +1136,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
set_value_exclusive_group.add_argument('-l', '--perf-level', action='store', choices=self.helpers.get_perf_levels()[0], type=str.upper, required=False, help=set_perf_level_help, metavar='LEVEL')
|
||||
set_value_exclusive_group.add_argument('-P', '--profile', action='store', required=False, help=set_profile_help, metavar='SETPROFILE')
|
||||
set_value_exclusive_group.add_argument('-d', '--perf-determinism', action='store', type=lambda value: self._not_negative_int(value, '--perf-determinism'), required=False, help=set_perf_det_help, metavar='SCLKMAX')
|
||||
set_value_exclusive_group.add_argument('-C', '--compute-partition', action='store', choices=self.helpers.get_compute_partition_types(), type=str.upper, required=False, help=set_compute_partition_help, metavar='PARTITION')
|
||||
set_value_exclusive_group.add_argument('-C', '--compute-partition', action='store', choices=accelerator_set_choices, type=lambda value: self._is_command_supported(value, accelerator_set_choices, '--compute-partition'), required=False, help=set_compute_partition_help, metavar='<ACCELERATOR_TYPE> or <PROFILE_INDEX>')
|
||||
set_value_exclusive_group.add_argument('-M', '--memory-partition', action='store', choices=self.helpers.get_memory_partition_types(), type=str.upper, required=False, help=set_memory_partition_help, metavar='PARTITION')
|
||||
set_value_exclusive_group.add_argument('-o', '--power-cap', action='store', type=lambda value: self._positive_int(value, '--power-cap'), required=False, help=set_power_cap_help, metavar='WATTS')
|
||||
set_value_exclusive_group.add_argument('-p', '--soc-pstate', action='store', required=False, type=lambda value: self._not_negative_int(value, '--soc-pstate'), help=set_soc_pstate_help, metavar='POLICY_ID')
|
||||
@@ -1162,7 +1170,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
self._add_command_modifiers(set_value_parser)
|
||||
|
||||
|
||||
def _add_reset_parser(self, subparsers, func):
|
||||
def _add_reset_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_linux():
|
||||
# This subparser is only applicable to Linux
|
||||
return
|
||||
@@ -1215,7 +1223,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
reset_exclusive_group.add_argument('-l', '--clean-local-data', action='store_true', required=False, help=reset_gpu_clean_local_data_help)
|
||||
|
||||
|
||||
def _add_monitor_parser(self, subparsers, func):
|
||||
def _add_monitor_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_linux():
|
||||
# This subparser is only applicable to Linux
|
||||
return
|
||||
@@ -1314,7 +1322,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
rocm_smi_parser.add_argument('-f', '--showclkfrq', action='store_true', required=False, help=showclkfrq_help)
|
||||
|
||||
|
||||
def _add_xgmi_parser(self, subparsers, func):
|
||||
def _add_xgmi_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_amdgpu_initialized():
|
||||
# The xgmi subcommand is only applicable to systems with amdgpu initialized
|
||||
return
|
||||
@@ -1344,7 +1352,7 @@ class AMDSMIParser(argparse.ArgumentParser):
|
||||
xgmi_parser.add_argument('-l', '--link-status', action='store_true', required=False, help=xgmi_link_status_help)
|
||||
|
||||
|
||||
def _add_partition_parser(self, subparsers, func):
|
||||
def _add_partition_parser(self, subparsers: argparse._SubParsersAction, func):
|
||||
if not self.helpers.is_amdgpu_initialized():
|
||||
# The partition subcommand is only applicable to systems with amdgpu initialized
|
||||
return
|
||||
|
||||
@@ -70,6 +70,7 @@ typedef enum {
|
||||
#define AMDSMI_MAX_ACCELERATOR_PROFILE 32
|
||||
#define AMDSMI_MAX_CP_PROFILE_RESOURCES 32
|
||||
#define AMDSMI_MAX_ACCELERATOR_PARTITIONS 8
|
||||
#define AMDSMI_MAX_NUM_NUMA_NODES 32
|
||||
|
||||
#define AMDSMI_GPU_UUID_SIZE 38
|
||||
|
||||
@@ -259,8 +260,8 @@ typedef enum {
|
||||
AMDSMI_STATUS_FILE_NOT_FOUND = 52, //!< file or directory not found
|
||||
AMDSMI_STATUS_ARG_PTR_NULL = 53, //!< Parsed argument is invalid
|
||||
AMDSMI_STATUS_AMDGPU_RESTART_ERR = 54, //!< AMDGPU restart failed
|
||||
AMDSMI_STATUS_SETTING_UNAVAILABLE = 55, //!< Setting is not available
|
||||
AMDSMI_STATUS_CORRUPTED_EEPROM = 56, //!< EEPROM is corrupted
|
||||
AMDSMI_STATUS_SETTING_UNAVAILABLE = 55, //!< Setting is not available
|
||||
AMDSMI_STATUS_CORRUPTED_EEPROM = 56, //!< EEPROM is corrupted
|
||||
|
||||
// General errors
|
||||
AMDSMI_STATUS_MAP_ERROR = 0xFFFFFFFE, //!< The internal library error did not map to a status code
|
||||
@@ -292,19 +293,35 @@ typedef enum {
|
||||
* various accelerator partitioning settings.
|
||||
*/
|
||||
typedef enum {
|
||||
AMDSMI_ACCELERATOR_PARTITION_INVALID = 0,
|
||||
AMDSMI_ACCELERATOR_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_INVALID = 0,
|
||||
AMDSMI_ACCELERATOR_PARTITION_SPX, //!< Single GPU mode (SPX)- All XCCs work
|
||||
//!< together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_DPX, //!< Dual GPU mode (DPX)- Half XCCs work
|
||||
//!< together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_TPX, //!< Triple GPU mode (TPX)- One-third XCCs
|
||||
//!< work together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_QPX, //!< Quad GPU mode (QPX)- Quarter XCCs
|
||||
//!< work together with shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_CPX, //!< Core mode (CPX)- Per-chip XCC with
|
||||
//!< shared memory
|
||||
AMDSMI_ACCELERATOR_PARTITION_MAX
|
||||
} amdsmi_accelerator_partition_type_t;
|
||||
|
||||
/**
|
||||
* @brief Accelerator Partition Resource Type.
|
||||
* This enum is used to identify
|
||||
* various accelerator resource types.
|
||||
*/
|
||||
typedef enum {
|
||||
AMDSMI_ACCELERATOR_XCC,
|
||||
AMDSMI_ACCELERATOR_ENCODER,
|
||||
AMDSMI_ACCELERATOR_DECODER,
|
||||
AMDSMI_ACCELERATOR_DMA,
|
||||
AMDSMI_ACCELERATOR_JPEG,
|
||||
AMDSMI_ACCELERATOR_MAX
|
||||
} amdsmi_accelerator_partition_resource_type_t;
|
||||
|
||||
|
||||
/**
|
||||
* @brief Compute Partition. This enum is used to identify
|
||||
* various compute partitioning settings.
|
||||
@@ -329,19 +346,19 @@ typedef enum {
|
||||
*/
|
||||
typedef enum {
|
||||
AMDSMI_MEMORY_PARTITION_UNKNOWN = 0,
|
||||
AMDSMI_MEMORY_PARTITION_NPS1, //!< NPS1 - All CCD & XCD data is interleaved
|
||||
//!< accross all 8 HBM stacks (all stacks/1).
|
||||
AMDSMI_MEMORY_PARTITION_NPS2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
|
||||
//!< accross the 4 HBM stacks per AID pair
|
||||
//!< (8 stacks/2).
|
||||
AMDSMI_MEMORY_PARTITION_NPS4, //!< NPS4 - Each XCD data is interleaved accross
|
||||
//!< accross 2 (or single) HBM stacks
|
||||
//!< (8 stacks/8 or 8 stacks/4).
|
||||
AMDSMI_MEMORY_PARTITION_NPS8, //!< NPS8 - Each XCD uses a single HBM stack
|
||||
//!< (8 stacks/8). Or each XCD uses a single
|
||||
//!< HBM stack & CCDs share 2 non-interleaved
|
||||
//!< HBM stacks on its AID
|
||||
//!< (AID[1,2,3] = 6 stacks/6).
|
||||
AMDSMI_MEMORY_PARTITION_NPS1 = 1, //!< NPS1 - All CCD & XCD data is interleaved
|
||||
//!< accross all 8 HBM stacks (all stacks/1).
|
||||
AMDSMI_MEMORY_PARTITION_NPS2 = 2, //!< NPS2 - 2 sets of CCDs or 4 XCD interleaved
|
||||
//!< accross the 4 HBM stacks per AID pair
|
||||
//!< (8 stacks/2).
|
||||
AMDSMI_MEMORY_PARTITION_NPS4 = 4, //!< NPS4 - Each XCD data is interleaved
|
||||
//!< accross 2 (or single) HBM stacks
|
||||
//!< (8 stacks/8 or 8 stacks/4).
|
||||
AMDSMI_MEMORY_PARTITION_NPS8 = 8, //!< NPS8 - Each XCD uses a single HBM stack
|
||||
//!< (8 stacks/8). Or each XCD uses a single
|
||||
//!< HBM stack & CCDs share 2 non-interleaved
|
||||
//!< HBM stacks on its AID
|
||||
//!< (AID[1,2,3] = 6 stacks/6).
|
||||
} amdsmi_memory_partition_type_t;
|
||||
|
||||
/**
|
||||
@@ -661,34 +678,77 @@ typedef struct {
|
||||
} amdsmi_kfd_info_t;
|
||||
|
||||
/**
|
||||
* @brief Possible Memory Partition Modes.
|
||||
* This union is used to identify various memory partitioning settings.
|
||||
* @brief Possible Memory Partition Capabilities.
|
||||
* This union is used to identify various memory partition capabilities.
|
||||
*/
|
||||
typedef union {
|
||||
struct nps_flags_ {
|
||||
uint32_t nps1_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
|
||||
uint32_t nps2_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
|
||||
uint32_t nps4_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
|
||||
uint32_t nps8_cap :1; // bool 1 = true; 0 = false; Max uint32 means unsupported
|
||||
uint32_t reserved :28;
|
||||
} amdsmi_nps_flags_t;
|
||||
uint32_t nps_cap_mask;
|
||||
struct nps_flags_ {
|
||||
uint32_t nps1_cap :1; //!< bool 1 = true; 0 = false
|
||||
uint32_t nps2_cap :1; //!< bool 1 = true; 0 = false
|
||||
uint32_t nps4_cap :1; //!< bool 1 = true; 0 = false
|
||||
uint32_t nps8_cap :1; //!< bool 1 = true; 0 = false
|
||||
uint32_t reserved :28;
|
||||
} amdsmi_nps_flags_t;
|
||||
|
||||
uint32_t nps_cap_mask;
|
||||
} amdsmi_nps_caps_t;
|
||||
|
||||
/**
|
||||
* @brief Possible Memory Partition Modes.
|
||||
* This union is used to identify various memory partitioning settings.
|
||||
* @brief Memory Partition Configuration.
|
||||
* This structure is used to identify various memory partition configurations.
|
||||
*/
|
||||
typedef struct {
|
||||
amdsmi_accelerator_partition_type_t profile_type; // SPX, DPX, QPX, CPX and so on
|
||||
uint32_t num_partitions; // On MI300X, SPX: 1, DPX: 2, QPX: 4, CPX: 8, length of resources array
|
||||
amdsmi_nps_caps_t memory_caps; // Possible memory partition capabilities
|
||||
uint32_t profile_index;
|
||||
uint32_t num_resources; // length of index_of_resources_profile
|
||||
uint32_t resources[AMDSMI_MAX_ACCELERATOR_PARTITIONS][AMDSMI_MAX_CP_PROFILE_RESOURCES];
|
||||
uint64_t reserved[13];
|
||||
amdsmi_nps_caps_t partition_caps;
|
||||
amdsmi_memory_partition_type_t mp_mode;
|
||||
uint32_t num_numa_ranges;
|
||||
struct numa_range_ {
|
||||
amdsmi_vram_type_t memory_type;
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
} numa_range[AMDSMI_MAX_NUM_NUMA_NODES];
|
||||
|
||||
uint64_t reserved[11];
|
||||
} amdsmi_memory_partition_config_t;
|
||||
|
||||
/**
|
||||
* @brief Accelerator Partition Profile.
|
||||
* This structure is used to identify the current accelerator partition profile.
|
||||
*/
|
||||
typedef struct {
|
||||
amdsmi_accelerator_partition_type_t profile_type; //!< SPX, DPX, QPX, CPX and so on
|
||||
uint32_t num_partitions; //!< On MI300X: SPX=>1, DPX=>2, QPX=>4, CPX=>8; length of resources
|
||||
amdsmi_nps_caps_t memory_caps; //!< Possible memory partition capabilities
|
||||
uint32_t profile_index; //!< Index in the profiles array in amdsmi_accelerator_partition_profile_t
|
||||
uint32_t num_resources; //!< length of index_of_resources_profile
|
||||
uint32_t resources[AMDSMI_MAX_ACCELERATOR_PARTITIONS][AMDSMI_MAX_CP_PROFILE_RESOURCES];
|
||||
uint64_t reserved[13];
|
||||
} amdsmi_accelerator_partition_profile_t;
|
||||
|
||||
/**
|
||||
* @brief Accelerator Partition Resources.
|
||||
* This struct is used to identify various partition resource profiles.
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t profile_index;
|
||||
amdsmi_accelerator_partition_resource_type_t resource_type;
|
||||
uint32_t partition_resource; //!< Resources a partition can use, which may be shared
|
||||
uint32_t num_partitions_share_resource; //!< If it is greater than 1, then resource is shared.
|
||||
uint64_t reserved[6];
|
||||
} amdsmi_accelerator_partition_resource_profile_t;
|
||||
|
||||
/**
|
||||
* @brief Accelerator Partition Profile Configurations.
|
||||
* This struct is used to identify various partition profiles.
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t num_profiles; //!< The length of profiles array
|
||||
uint32_t num_resource_profiles;
|
||||
amdsmi_accelerator_partition_resource_profile_t resource_profiles[AMDSMI_MAX_CP_PROFILE_RESOURCES];
|
||||
uint32_t default_profile_index; //!< The index of the default profile in the profiles array
|
||||
amdsmi_accelerator_partition_profile_t profiles[AMDSMI_MAX_ACCELERATOR_PROFILE];
|
||||
uint64_t reserved[30];
|
||||
} amdsmi_accelerator_partition_profile_config_t;
|
||||
|
||||
typedef enum {
|
||||
AMDSMI_LINK_TYPE_INTERNAL,
|
||||
AMDSMI_LINK_TYPE_XGMI,
|
||||
@@ -4583,26 +4643,103 @@ amdsmi_get_gpu_memory_partition(amdsmi_processor_handle processor_handle, char *
|
||||
*
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_set_gpu_memory_partition(amdsmi_processor_handle processor_handle, amdsmi_memory_partition_type_t memory_partition);
|
||||
amdsmi_set_gpu_memory_partition(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_memory_partition_type_t memory_partition);
|
||||
/**
|
||||
* @brief Version 2.0: Returns current gpu memory partition capabilities
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf}
|
||||
*
|
||||
* @param[in] processor_handle a processor handle
|
||||
*
|
||||
* @param[out] config reference to the accelerator partition profile.
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_get_gpu_memory_partition_config(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_memory_partition_config_t *config);
|
||||
|
||||
/**
|
||||
* @brief Version 2.0: Set accelerator partition setting based on profile_index from amdsmi_get_gpu_accelerator_partition_profile_config
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{host}
|
||||
*
|
||||
* @param[in] processor_handle a processor handle
|
||||
*
|
||||
* @param[in] mode Enum representing memory partition to set to
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_set_gpu_memory_partition_mode(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_memory_partition_type_t mode);
|
||||
|
||||
/** @} */ // end of memory_partition
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup accelerator_partition_profile Accelerator Partition Profile Functions
|
||||
/** @defgroup accelerator_partition Accelerator Partition Profile Functions
|
||||
* These functions are used to configure and query the device's
|
||||
* accelerator parition profile setting.
|
||||
* @{
|
||||
*/
|
||||
// TODO: declare rest of partition profile functions and complete doc commentary.
|
||||
/*
|
||||
Get the current accelerator partition profile. The function will return current profile.
|
||||
*/
|
||||
/**
|
||||
* @brief Version 2.0: Returns gpu accelerator partition caps as currently configured in the system
|
||||
* User must use admin/sudo privledges to run this API, or API will not be able to
|
||||
* read resources. Otherwise, API will fill in the structure with as much information as
|
||||
* it can.
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf}
|
||||
*
|
||||
* @param[in] processor_handle Device which to query
|
||||
*
|
||||
* @param[out] profile_config reference to the accelerator partition config.
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_get_gpu_accelerator_partition_profile_config(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_accelerator_partition_profile_config_t *profile_config);
|
||||
|
||||
/**
|
||||
* @brief Version 2.0: Returns current gpu accelerator partition capabilities
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{host} @platform{guest_1vf} @platform{guest_mvf}
|
||||
*
|
||||
* @param[in] processor_handle Device which to query
|
||||
*
|
||||
* @param[out] profile reference to the accelerator partition profile.
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* @param[inout] partition_id array of ids for current accelerator profile.
|
||||
* Must be allocated by user.
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_get_gpu_accelerator_partition_profile(amdsmi_processor_handle processor_handle,
|
||||
amdsmi_accelerator_partition_profile_t *profile,
|
||||
uint32_t *partition_id);
|
||||
|
||||
/** @} */ // end of accelerator_partition_profile
|
||||
/**
|
||||
* @brief Version 2.0: Set accelerator partition setting based on profile_index
|
||||
* from amdsmi_get_gpu_accelerator_partition_profile_config
|
||||
*
|
||||
* @platform{gpu_bm_linux} @platform{host}
|
||||
*
|
||||
* @param[in] processor_handle Device which to query
|
||||
*
|
||||
* @param[in] profile_index Represents index of a partition user wants to set
|
||||
*
|
||||
* @return ::amdsmi_status_t | ::AMDSMI_STATUS_SUCCESS on success, non-zero on fail
|
||||
*/
|
||||
amdsmi_status_t
|
||||
amdsmi_set_gpu_accelerator_partition_profile(amdsmi_processor_handle processor_handle,
|
||||
uint32_t profile_index);
|
||||
|
||||
/** @} End accelerator_partition */
|
||||
|
||||
/*****************************************************************************/
|
||||
/** @defgroup EvntNotif Event Notification Functions
|
||||
|
||||
@@ -224,6 +224,10 @@ from .amdsmi_interface import amdsmi_set_gpu_compute_partition
|
||||
from .amdsmi_interface import amdsmi_get_gpu_memory_partition
|
||||
from .amdsmi_interface import amdsmi_set_gpu_memory_partition
|
||||
from .amdsmi_interface import amdsmi_get_gpu_accelerator_partition_profile
|
||||
from .amdsmi_interface import amdsmi_get_gpu_accelerator_partition_profile_config
|
||||
from .amdsmi_interface import amdsmi_get_gpu_memory_partition_config
|
||||
from .amdsmi_interface import amdsmi_set_gpu_accelerator_partition_profile
|
||||
from .amdsmi_interface import amdsmi_set_gpu_memory_partition_mode
|
||||
|
||||
# # Individual GPU Metrics Functions
|
||||
from .amdsmi_interface import amdsmi_get_gpu_metrics_header_info
|
||||
|
||||
@@ -87,6 +87,8 @@ class AmdSmiLibraryException(AmdSmiException):
|
||||
amdsmi_wrapper.AMDSMI_STATUS_ARG_PTR_NULL : "AMDSMI_STATUS_ARG_PTR_NULL - Parsed argument is invalid",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_MAP_ERROR : "AMDSMI_STATUS_MAP_ERROR - The internal library error did not map to a status code",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_AMDGPU_RESTART_ERR: "AMDSMI_STATUS_AMDGPU_RESTART_ERR - AMDGPU restart failed, please check dmsg for errors",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_SETTING_UNAVAILABLE: "AMDSMI_STATUS_SETTING_UNAVAILABLE - Setting is not available",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_CORRUPTED_EEPROM: "AMDSMI_STATUS_CORRUPTED_EEPROM - Setting is not available",
|
||||
amdsmi_wrapper.AMDSMI_STATUS_UNKNOWN_ERROR : "AMDSMI_STATUS_UNKNOWN_ERROR - An unknown error occurred"
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@
|
||||
|
||||
import ctypes
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from typing import Union, Any, Dict, List
|
||||
from enum import IntEnum
|
||||
from collections.abc import Iterable
|
||||
@@ -288,13 +290,30 @@ class AmdSmiVoltageType(IntEnum):
|
||||
VDDGFX = amdsmi_wrapper.AMDSMI_VOLT_TYPE_VDDGFX
|
||||
INVALID = amdsmi_wrapper.AMDSMI_VOLT_TYPE_INVALID
|
||||
|
||||
class AmdSmiAcceleratorPartitionResourceType(IntEnum):
|
||||
XCC = amdsmi_wrapper.AMDSMI_ACCELERATOR_XCC
|
||||
ENCODER = amdsmi_wrapper.AMDSMI_ACCELERATOR_ENCODER
|
||||
DECODER = amdsmi_wrapper.AMDSMI_ACCELERATOR_DECODER
|
||||
DMA = amdsmi_wrapper.AMDSMI_ACCELERATOR_DMA
|
||||
JPEG = amdsmi_wrapper.AMDSMI_ACCELERATOR_JPEG
|
||||
MAX = amdsmi_wrapper.AMDSMI_ACCELERATOR_MAX
|
||||
|
||||
|
||||
class AmdSmiAcceleratorPartitionType(IntEnum):
|
||||
SPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_SPX
|
||||
DPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_DPX
|
||||
TPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_TPX
|
||||
QPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_QPX
|
||||
CPX = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_CPX
|
||||
INVALID = amdsmi_wrapper.AMDSMI_ACCELERATOR_PARTITION_INVALID
|
||||
|
||||
|
||||
class AmdSmiComputePartitionType(IntEnum):
|
||||
CPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_CPX
|
||||
SPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_SPX
|
||||
DPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_DPX
|
||||
TPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_TPX
|
||||
QPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_QPX
|
||||
CPX = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_CPX
|
||||
INVALID = amdsmi_wrapper.AMDSMI_COMPUTE_PARTITION_INVALID
|
||||
|
||||
|
||||
@@ -2729,6 +2748,7 @@ def amdsmi_get_gpu_compute_partition(processor_handle: amdsmi_wrapper.amdsmi_pro
|
||||
|
||||
def amdsmi_set_gpu_compute_partition(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
compute_partition: AmdSmiComputePartitionType):
|
||||
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
@@ -2743,6 +2763,21 @@ def amdsmi_set_gpu_compute_partition(processor_handle: amdsmi_wrapper.amdsmi_pro
|
||||
)
|
||||
)
|
||||
|
||||
def amdsmi_set_gpu_accelerator_partition_profile(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
profile_index: int):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
if not isinstance(profile_index, int):
|
||||
raise AmdSmiParameterException(profile_index, int)
|
||||
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_gpu_accelerator_partition_profile(
|
||||
processor_handle, profile_index
|
||||
)
|
||||
)
|
||||
|
||||
def amdsmi_get_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_processor_handle):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
@@ -2763,6 +2798,39 @@ def amdsmi_get_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_proc
|
||||
|
||||
return memory_partition.value.decode("utf-8")
|
||||
|
||||
def amdsmi_get_gpu_memory_partition_config(processor_handle: amdsmi_wrapper.amdsmi_processor_handle):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
config = amdsmi_wrapper.amdsmi_memory_partition_config_t()
|
||||
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_gpu_memory_partition_config(
|
||||
processor_handle, config
|
||||
)
|
||||
)
|
||||
mem_caps_list = []
|
||||
if config.partition_caps.amdsmi_nps_flags_t.nps1_cap == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if config.partition_caps.amdsmi_nps_flags_t.nps2_cap == 1:
|
||||
mem_caps_list.append("NPS2")
|
||||
if config.partition_caps.amdsmi_nps_flags_t.nps4_cap == 1:
|
||||
mem_caps_list.append("NPS4")
|
||||
if config.partition_caps.amdsmi_nps_flags_t.nps8_cap == 1:
|
||||
mem_caps_list.append("NPS8")
|
||||
|
||||
return_dict = {
|
||||
"partition_caps": mem_caps_list,
|
||||
"mp_mode": amdsmi_wrapper.amdsmi_memory_partition_type_t__enumvalues[
|
||||
config.mp_mode].replace("AMDSMI_MEMORY_PARTITION_", "").replace("UNKNOWN", "N/A"),
|
||||
"num_numa_ranges": "N/A",
|
||||
"numa_range": "N/A",
|
||||
}
|
||||
logging.debug("amdsmi_interface.py | amdsmi_get_gpu_memory_partition_config | return_dictionary = \n" + str(json.dumps(return_dict, indent=4)))
|
||||
return return_dict
|
||||
|
||||
|
||||
def amdsmi_set_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
memory_partition: AmdSmiMemoryPartitionType):
|
||||
@@ -2780,6 +2848,21 @@ def amdsmi_set_gpu_memory_partition(processor_handle: amdsmi_wrapper.amdsmi_proc
|
||||
)
|
||||
)
|
||||
|
||||
def amdsmi_set_gpu_memory_partition_mode(processor_handle: amdsmi_wrapper.amdsmi_processor_handle,
|
||||
memory_partition: AmdSmiMemoryPartitionType):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
if not isinstance(memory_partition, AmdSmiMemoryPartitionType):
|
||||
raise AmdSmiParameterException(memory_partition, AmdSmiMemoryPartitionType)
|
||||
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_set_gpu_memory_partition(
|
||||
processor_handle, memory_partition
|
||||
)
|
||||
)
|
||||
|
||||
def amdsmi_get_gpu_accelerator_partition_profile(
|
||||
processor_handle: amdsmi_wrapper.amdsmi_processor_handle
|
||||
@@ -2788,29 +2871,129 @@ def amdsmi_get_gpu_accelerator_partition_profile(
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
partition_id = ctypes.c_uint32()
|
||||
length = 8
|
||||
partition_id = [0, 0, 0, 0, 0, 0, 0, 0]
|
||||
partition_id_list = (ctypes.c_uint32 * length)(*partition_id)
|
||||
profile = amdsmi_wrapper.amdsmi_accelerator_partition_profile_t()
|
||||
|
||||
_check_res(
|
||||
amdsmi_wrapper.amdsmi_get_gpu_accelerator_partition_profile(processor_handle,
|
||||
ctypes.byref(profile),
|
||||
ctypes.byref(partition_id))
|
||||
ctypes.byref(profile), partition_id_list)
|
||||
)
|
||||
profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[profile.profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "")
|
||||
profile_type_ret = profile_type_ret.replace("INVALID", "N/A")
|
||||
|
||||
length = profile.num_partitions
|
||||
partition_ids = []
|
||||
for i in range(profile.num_partitions):
|
||||
partition_ids.append(partition_id_list[i])
|
||||
|
||||
last_element = 0
|
||||
if length > 0:
|
||||
last_element = length - 1
|
||||
if ((partition_ids[last_element] == 0)
|
||||
and not((profile_type_ret == str("SPX")) or (profile_type_ret == str("N/A")))):
|
||||
partition_ids = "N/A"
|
||||
|
||||
mem_caps_list = []
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps1_cap == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps2_cap == 1:
|
||||
mem_caps_list.append("NPS2")
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps4_cap == 1:
|
||||
mem_caps_list.append("NPS4")
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps8_cap == 1:
|
||||
mem_caps_list.append("NPS8")
|
||||
|
||||
partition_profile_dict = {
|
||||
"profile_type" : profile.profile_type,
|
||||
"profile_type" : profile_type_ret,
|
||||
"num_partitions" : profile.num_partitions,
|
||||
"profile_index" : profile.profile_index,
|
||||
"memory_caps" : profile.memory_caps,
|
||||
"memory_caps": mem_caps_list,
|
||||
"num_resources" : profile.num_resources,
|
||||
"resources" : "N/A"
|
||||
}
|
||||
|
||||
return {
|
||||
"partition_id" : partition_id.value,
|
||||
return_dictionary = {
|
||||
"partition_id" : partition_ids,
|
||||
"partition_profile" : partition_profile_dict
|
||||
}
|
||||
|
||||
logging.debug("amdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile | return_dictionary = \n" + str(json.dumps(return_dictionary, indent=4)))
|
||||
return return_dictionary
|
||||
|
||||
def amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle: amdsmi_wrapper.amdsmi_processor_handle) -> Dict:
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
raise AmdSmiParameterException(
|
||||
processor_handle, amdsmi_wrapper.amdsmi_processor_handle
|
||||
)
|
||||
|
||||
config = amdsmi_wrapper.amdsmi_accelerator_partition_profile_config_t()
|
||||
|
||||
_check_res(amdsmi_wrapper.amdsmi_get_gpu_accelerator_partition_profile_config(processor_handle,
|
||||
ctypes.byref(config)))
|
||||
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | START - "
|
||||
+ "config.num_profiles = " + str(config.num_profiles)
|
||||
+ "\n; config.num_resource_profiles = " + str(config.num_resource_profiles)
|
||||
+ "\n; config.resource_profiles = " + str(config.resource_profiles)
|
||||
+ "\n; config.default_profile_index = " + str(config.default_profile_index)
|
||||
+ "\n; config.profiles = " + str(config.profiles))
|
||||
|
||||
profiles = []
|
||||
resource_idx = 0
|
||||
for i in range(config.num_profiles):
|
||||
profile = config.profiles[i]
|
||||
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | profile = " + str(profile))
|
||||
profile_type_ret = amdsmi_wrapper.amdsmi_accelerator_partition_type_t__enumvalues[
|
||||
config.profiles[i].profile_type].replace("AMDSMI_ACCELERATOR_PARTITION_", "")
|
||||
profile_type_ret = profile_type_ret.replace("INVALID", "N/A")
|
||||
resources = []
|
||||
|
||||
|
||||
mem_caps_list = []
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps1_cap == 1:
|
||||
mem_caps_list.append("NPS1")
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps2_cap == 1:
|
||||
mem_caps_list.append("NPS2")
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps4_cap == 1:
|
||||
mem_caps_list.append("NPS4")
|
||||
if profile.memory_caps.amdsmi_nps_flags_t.nps8_cap == 1:
|
||||
mem_caps_list.append("NPS8")
|
||||
|
||||
for r in range(config.num_resource_profiles):
|
||||
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | i = " + str(i) + "; r = " + str(r) + "; resource_idx = " + str(resource_idx))
|
||||
res_profile = config.resource_profiles[resource_idx]
|
||||
resource_profiles_ret = amdsmi_wrapper.amdsmi_accelerator_partition_resource_type_t__enumvalues[
|
||||
res_profile.resource_type].replace("AMDSMI_ACCELERATOR_", "")
|
||||
resource_profile_dict = {
|
||||
"profile_index": res_profile.profile_index,
|
||||
"resource_type": resource_profiles_ret,
|
||||
"partition_resource": res_profile.partition_resource,
|
||||
"num_partitions_share_resource": res_profile.num_partitions_share_resource,
|
||||
}
|
||||
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | resource_profile_dict = " + str(resource_profile_dict))
|
||||
resources.append(resource_profile_dict)
|
||||
resource_idx += 1
|
||||
|
||||
profile_dict = {
|
||||
"profile_type": profile_type_ret,
|
||||
"num_partitions": profile.num_partitions,
|
||||
"profile_index": profile.profile_index,
|
||||
"memory_caps": mem_caps_list,
|
||||
"num_resources": profile.num_resources,
|
||||
"resources": resources
|
||||
}
|
||||
profiles.append(profile_dict)
|
||||
|
||||
config_dict = {
|
||||
"num_profiles": config.num_profiles,
|
||||
"num_resource_profiles": config.num_resource_profiles,
|
||||
"resource_profiles": resources,
|
||||
"default_profile_index": config.default_profile_index,
|
||||
"profiles": profiles,
|
||||
}
|
||||
logging.debug("\namdsmi_interface.py | amdsmi_get_gpu_accelerator_partition_profile_config | END - config_dict = \n" + str(json.dumps(config_dict, indent=4)))
|
||||
|
||||
return config_dict
|
||||
|
||||
def amdsmi_get_xgmi_info(processor_handle: amdsmi_wrapper.amdsmi_processor_handle):
|
||||
if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle):
|
||||
|
||||
@@ -285,6 +285,7 @@ amdsmi_status_t__enumvalues = {
|
||||
53: 'AMDSMI_STATUS_ARG_PTR_NULL',
|
||||
54: 'AMDSMI_STATUS_AMDGPU_RESTART_ERR',
|
||||
55: 'AMDSMI_STATUS_SETTING_UNAVAILABLE',
|
||||
56: 'AMDSMI_STATUS_CORRUPTED_EEPROM',
|
||||
4294967294: 'AMDSMI_STATUS_MAP_ERROR',
|
||||
4294967295: 'AMDSMI_STATUS_UNKNOWN_ERROR',
|
||||
}
|
||||
@@ -329,6 +330,7 @@ AMDSMI_STATUS_FILE_NOT_FOUND = 52
|
||||
AMDSMI_STATUS_ARG_PTR_NULL = 53
|
||||
AMDSMI_STATUS_AMDGPU_RESTART_ERR = 54
|
||||
AMDSMI_STATUS_SETTING_UNAVAILABLE = 55
|
||||
AMDSMI_STATUS_CORRUPTED_EEPROM = 56
|
||||
AMDSMI_STATUS_MAP_ERROR = 4294967294
|
||||
AMDSMI_STATUS_UNKNOWN_ERROR = 4294967295
|
||||
amdsmi_status_t = ctypes.c_uint32 # enum
|
||||
@@ -372,6 +374,7 @@ amdsmi_accelerator_partition_type_t__enumvalues = {
|
||||
3: 'AMDSMI_ACCELERATOR_PARTITION_TPX',
|
||||
4: 'AMDSMI_ACCELERATOR_PARTITION_QPX',
|
||||
5: 'AMDSMI_ACCELERATOR_PARTITION_CPX',
|
||||
6: 'AMDSMI_ACCELERATOR_PARTITION_MAX',
|
||||
}
|
||||
AMDSMI_ACCELERATOR_PARTITION_INVALID = 0
|
||||
AMDSMI_ACCELERATOR_PARTITION_SPX = 1
|
||||
@@ -379,8 +382,26 @@ AMDSMI_ACCELERATOR_PARTITION_DPX = 2
|
||||
AMDSMI_ACCELERATOR_PARTITION_TPX = 3
|
||||
AMDSMI_ACCELERATOR_PARTITION_QPX = 4
|
||||
AMDSMI_ACCELERATOR_PARTITION_CPX = 5
|
||||
AMDSMI_ACCELERATOR_PARTITION_MAX = 6
|
||||
amdsmi_accelerator_partition_type_t = ctypes.c_uint32 # enum
|
||||
|
||||
# values for enumeration 'amdsmi_accelerator_partition_resource_type_t'
|
||||
amdsmi_accelerator_partition_resource_type_t__enumvalues = {
|
||||
0: 'AMDSMI_ACCELERATOR_XCC',
|
||||
1: 'AMDSMI_ACCELERATOR_ENCODER',
|
||||
2: 'AMDSMI_ACCELERATOR_DECODER',
|
||||
3: 'AMDSMI_ACCELERATOR_DMA',
|
||||
4: 'AMDSMI_ACCELERATOR_JPEG',
|
||||
5: 'AMDSMI_ACCELERATOR_MAX',
|
||||
}
|
||||
AMDSMI_ACCELERATOR_XCC = 0
|
||||
AMDSMI_ACCELERATOR_ENCODER = 1
|
||||
AMDSMI_ACCELERATOR_DECODER = 2
|
||||
AMDSMI_ACCELERATOR_DMA = 3
|
||||
AMDSMI_ACCELERATOR_JPEG = 4
|
||||
AMDSMI_ACCELERATOR_MAX = 5
|
||||
amdsmi_accelerator_partition_resource_type_t = ctypes.c_uint32 # enum
|
||||
|
||||
# values for enumeration 'amdsmi_compute_partition_type_t'
|
||||
amdsmi_compute_partition_type_t__enumvalues = {
|
||||
0: 'AMDSMI_COMPUTE_PARTITION_INVALID',
|
||||
@@ -403,14 +424,14 @@ amdsmi_memory_partition_type_t__enumvalues = {
|
||||
0: 'AMDSMI_MEMORY_PARTITION_UNKNOWN',
|
||||
1: 'AMDSMI_MEMORY_PARTITION_NPS1',
|
||||
2: 'AMDSMI_MEMORY_PARTITION_NPS2',
|
||||
3: 'AMDSMI_MEMORY_PARTITION_NPS4',
|
||||
4: 'AMDSMI_MEMORY_PARTITION_NPS8',
|
||||
4: 'AMDSMI_MEMORY_PARTITION_NPS4',
|
||||
8: 'AMDSMI_MEMORY_PARTITION_NPS8',
|
||||
}
|
||||
AMDSMI_MEMORY_PARTITION_UNKNOWN = 0
|
||||
AMDSMI_MEMORY_PARTITION_NPS1 = 1
|
||||
AMDSMI_MEMORY_PARTITION_NPS2 = 2
|
||||
AMDSMI_MEMORY_PARTITION_NPS4 = 3
|
||||
AMDSMI_MEMORY_PARTITION_NPS8 = 4
|
||||
AMDSMI_MEMORY_PARTITION_NPS4 = 4
|
||||
AMDSMI_MEMORY_PARTITION_NPS8 = 8
|
||||
amdsmi_memory_partition_type_t = ctypes.c_uint32 # enum
|
||||
|
||||
# values for enumeration 'amdsmi_temperature_type_t'
|
||||
@@ -979,6 +1000,31 @@ union_amdsmi_nps_caps_t._fields_ = [
|
||||
]
|
||||
|
||||
amdsmi_nps_caps_t = union_amdsmi_nps_caps_t
|
||||
class struct_amdsmi_memory_partition_config_t(Structure):
|
||||
pass
|
||||
|
||||
class struct_numa_range_(Structure):
|
||||
pass
|
||||
|
||||
struct_numa_range_._pack_ = 1 # source:False
|
||||
struct_numa_range_._fields_ = [
|
||||
('memory_type', amdsmi_vram_type_t),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
('start', ctypes.c_uint64),
|
||||
('end', ctypes.c_uint64),
|
||||
]
|
||||
|
||||
struct_amdsmi_memory_partition_config_t._pack_ = 1 # source:False
|
||||
struct_amdsmi_memory_partition_config_t._fields_ = [
|
||||
('partition_caps', amdsmi_nps_caps_t),
|
||||
('mp_mode', amdsmi_memory_partition_type_t),
|
||||
('num_numa_ranges', ctypes.c_uint32),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
('numa_range', struct_numa_range_ * 32),
|
||||
('reserved', ctypes.c_uint64 * 11),
|
||||
]
|
||||
|
||||
amdsmi_memory_partition_config_t = struct_amdsmi_memory_partition_config_t
|
||||
class struct_amdsmi_accelerator_partition_profile_t(Structure):
|
||||
pass
|
||||
|
||||
@@ -995,6 +1041,34 @@ struct_amdsmi_accelerator_partition_profile_t._fields_ = [
|
||||
]
|
||||
|
||||
amdsmi_accelerator_partition_profile_t = struct_amdsmi_accelerator_partition_profile_t
|
||||
class struct_amdsmi_accelerator_partition_resource_profile_t(Structure):
|
||||
pass
|
||||
|
||||
struct_amdsmi_accelerator_partition_resource_profile_t._pack_ = 1 # source:False
|
||||
struct_amdsmi_accelerator_partition_resource_profile_t._fields_ = [
|
||||
('profile_index', ctypes.c_uint32),
|
||||
('resource_type', amdsmi_accelerator_partition_resource_type_t),
|
||||
('partition_resource', ctypes.c_uint32),
|
||||
('num_partitions_share_resource', ctypes.c_uint32),
|
||||
('reserved', ctypes.c_uint64 * 6),
|
||||
]
|
||||
|
||||
amdsmi_accelerator_partition_resource_profile_t = struct_amdsmi_accelerator_partition_resource_profile_t
|
||||
class struct_amdsmi_accelerator_partition_profile_config_t(Structure):
|
||||
pass
|
||||
|
||||
struct_amdsmi_accelerator_partition_profile_config_t._pack_ = 1 # source:False
|
||||
struct_amdsmi_accelerator_partition_profile_config_t._fields_ = [
|
||||
('num_profiles', ctypes.c_uint32),
|
||||
('num_resource_profiles', ctypes.c_uint32),
|
||||
('resource_profiles', struct_amdsmi_accelerator_partition_resource_profile_t * 32),
|
||||
('default_profile_index', ctypes.c_uint32),
|
||||
('PADDING_0', ctypes.c_ubyte * 4),
|
||||
('profiles', struct_amdsmi_accelerator_partition_profile_t * 32),
|
||||
('reserved', ctypes.c_uint64 * 30),
|
||||
]
|
||||
|
||||
amdsmi_accelerator_partition_profile_config_t = struct_amdsmi_accelerator_partition_profile_config_t
|
||||
|
||||
# values for enumeration 'amdsmi_link_type_t'
|
||||
amdsmi_link_type_t__enumvalues = {
|
||||
@@ -2181,6 +2255,12 @@ amdsmi_get_gpu_memory_usage.argtypes = [amdsmi_processor_handle, amdsmi_memory_t
|
||||
amdsmi_get_gpu_bad_page_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_bad_page_info
|
||||
amdsmi_get_gpu_bad_page_info.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_bad_page_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32), ctypes.POINTER(struct_amdsmi_retired_page_record_t)]
|
||||
amdsmi_get_gpu_bad_page_threshold = _libraries['libamd_smi.so'].amdsmi_get_gpu_bad_page_threshold
|
||||
amdsmi_get_gpu_bad_page_threshold.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_bad_page_threshold.argtypes = [amdsmi_processor_handle, ctypes.POINTER(ctypes.c_uint32)]
|
||||
amdsmi_gpu_validate_ras_eeprom = _libraries['libamd_smi.so'].amdsmi_gpu_validate_ras_eeprom
|
||||
amdsmi_gpu_validate_ras_eeprom.restype = amdsmi_status_t
|
||||
amdsmi_gpu_validate_ras_eeprom.argtypes = [amdsmi_processor_handle]
|
||||
amdsmi_get_gpu_ras_feature_info = _libraries['libamd_smi.so'].amdsmi_get_gpu_ras_feature_info
|
||||
amdsmi_get_gpu_ras_feature_info.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_ras_feature_info.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_ras_feature_t)]
|
||||
@@ -2382,9 +2462,21 @@ amdsmi_get_gpu_memory_partition.argtypes = [amdsmi_processor_handle, ctypes.POIN
|
||||
amdsmi_set_gpu_memory_partition = _libraries['libamd_smi.so'].amdsmi_set_gpu_memory_partition
|
||||
amdsmi_set_gpu_memory_partition.restype = amdsmi_status_t
|
||||
amdsmi_set_gpu_memory_partition.argtypes = [amdsmi_processor_handle, amdsmi_memory_partition_type_t]
|
||||
amdsmi_get_gpu_memory_partition_config = _libraries['libamd_smi.so'].amdsmi_get_gpu_memory_partition_config
|
||||
amdsmi_get_gpu_memory_partition_config.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_memory_partition_config.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_memory_partition_config_t)]
|
||||
amdsmi_set_gpu_memory_partition_mode = _libraries['libamd_smi.so'].amdsmi_set_gpu_memory_partition_mode
|
||||
amdsmi_set_gpu_memory_partition_mode.restype = amdsmi_status_t
|
||||
amdsmi_set_gpu_memory_partition_mode.argtypes = [amdsmi_processor_handle, amdsmi_memory_partition_type_t]
|
||||
amdsmi_get_gpu_accelerator_partition_profile_config = _libraries['libamd_smi.so'].amdsmi_get_gpu_accelerator_partition_profile_config
|
||||
amdsmi_get_gpu_accelerator_partition_profile_config.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_accelerator_partition_profile_config.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_accelerator_partition_profile_config_t)]
|
||||
amdsmi_get_gpu_accelerator_partition_profile = _libraries['libamd_smi.so'].amdsmi_get_gpu_accelerator_partition_profile
|
||||
amdsmi_get_gpu_accelerator_partition_profile.restype = amdsmi_status_t
|
||||
amdsmi_get_gpu_accelerator_partition_profile.argtypes = [amdsmi_processor_handle, ctypes.POINTER(struct_amdsmi_accelerator_partition_profile_t), ctypes.POINTER(ctypes.c_uint32)]
|
||||
amdsmi_set_gpu_accelerator_partition_profile = _libraries['libamd_smi.so'].amdsmi_set_gpu_accelerator_partition_profile
|
||||
amdsmi_set_gpu_accelerator_partition_profile.restype = amdsmi_status_t
|
||||
amdsmi_set_gpu_accelerator_partition_profile.argtypes = [amdsmi_processor_handle, uint32_t]
|
||||
amdsmi_init_gpu_event_notification = _libraries['libamd_smi.so'].amdsmi_init_gpu_event_notification
|
||||
amdsmi_init_gpu_event_notification.restype = amdsmi_status_t
|
||||
amdsmi_init_gpu_event_notification.argtypes = [amdsmi_processor_handle]
|
||||
@@ -2591,13 +2683,17 @@ amdsmi_get_esmi_err_msg = _libraries['libamd_smi.so'].amdsmi_get_esmi_err_msg
|
||||
amdsmi_get_esmi_err_msg.restype = amdsmi_status_t
|
||||
amdsmi_get_esmi_err_msg.argtypes = [amdsmi_status_t, ctypes.POINTER(ctypes.POINTER(ctypes.c_char))]
|
||||
__all__ = \
|
||||
['AGG_BW0', 'AMDSMI_ACCELERATOR_PARTITION_CPX',
|
||||
['AGG_BW0', 'AMDSMI_ACCELERATOR_DECODER',
|
||||
'AMDSMI_ACCELERATOR_DMA', 'AMDSMI_ACCELERATOR_ENCODER',
|
||||
'AMDSMI_ACCELERATOR_JPEG', 'AMDSMI_ACCELERATOR_MAX',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_CPX',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_DPX',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_INVALID',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_MAX',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_QPX',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_SPX',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_TPX', 'AMDSMI_AVERAGE_POWER',
|
||||
'AMDSMI_CACHE_PROPERTY_CPU_CACHE',
|
||||
'AMDSMI_ACCELERATOR_PARTITION_TPX', 'AMDSMI_ACCELERATOR_XCC',
|
||||
'AMDSMI_AVERAGE_POWER', 'AMDSMI_CACHE_PROPERTY_CPU_CACHE',
|
||||
'AMDSMI_CACHE_PROPERTY_DATA_CACHE',
|
||||
'AMDSMI_CACHE_PROPERTY_ENABLED',
|
||||
'AMDSMI_CACHE_PROPERTY_INST_CACHE',
|
||||
@@ -2737,6 +2833,7 @@ __all__ = \
|
||||
'AMDSMI_REG_XGMI', 'AMDSMI_STATUS_ADDRESS_FAULT',
|
||||
'AMDSMI_STATUS_AMDGPU_RESTART_ERR', 'AMDSMI_STATUS_API_FAILED',
|
||||
'AMDSMI_STATUS_ARG_PTR_NULL', 'AMDSMI_STATUS_BUSY',
|
||||
'AMDSMI_STATUS_CORRUPTED_EEPROM',
|
||||
'AMDSMI_STATUS_DRIVER_NOT_LOADED', 'AMDSMI_STATUS_DRM_ERROR',
|
||||
'AMDSMI_STATUS_FAIL_LOAD_MODULE',
|
||||
'AMDSMI_STATUS_FAIL_LOAD_SYMBOL', 'AMDSMI_STATUS_FILE_ERROR',
|
||||
@@ -2801,7 +2898,10 @@ __all__ = \
|
||||
'AMDSMI_XGMI_STATUS_ERROR', 'AMDSMI_XGMI_STATUS_MULTIPLE_ERRORS',
|
||||
'AMDSMI_XGMI_STATUS_NO_ERRORS', 'CLK_LIMIT_MAX', 'CLK_LIMIT_MIN',
|
||||
'RD_BW0', 'WR_BW0', 'amd_metrics_table_header_t',
|
||||
'amdsmi_accelerator_partition_profile_config_t',
|
||||
'amdsmi_accelerator_partition_profile_t',
|
||||
'amdsmi_accelerator_partition_resource_profile_t',
|
||||
'amdsmi_accelerator_partition_resource_type_t',
|
||||
'amdsmi_accelerator_partition_type_t', 'amdsmi_asic_info_t',
|
||||
'amdsmi_bdf_t', 'amdsmi_bit_field_t', 'amdsmi_board_info_t',
|
||||
'amdsmi_cache_property_type_t', 'amdsmi_card_form_factor_t',
|
||||
@@ -2849,9 +2949,11 @@ __all__ = \
|
||||
'amdsmi_get_energy_count', 'amdsmi_get_esmi_err_msg',
|
||||
'amdsmi_get_fw_info',
|
||||
'amdsmi_get_gpu_accelerator_partition_profile',
|
||||
'amdsmi_get_gpu_accelerator_partition_profile_config',
|
||||
'amdsmi_get_gpu_activity', 'amdsmi_get_gpu_asic_info',
|
||||
'amdsmi_get_gpu_available_counters',
|
||||
'amdsmi_get_gpu_bad_page_info', 'amdsmi_get_gpu_bdf_id',
|
||||
'amdsmi_get_gpu_bad_page_info',
|
||||
'amdsmi_get_gpu_bad_page_threshold', 'amdsmi_get_gpu_bdf_id',
|
||||
'amdsmi_get_gpu_board_info', 'amdsmi_get_gpu_cache_info',
|
||||
'amdsmi_get_gpu_compute_partition',
|
||||
'amdsmi_get_gpu_compute_process_gpus',
|
||||
@@ -2865,6 +2967,7 @@ __all__ = \
|
||||
'amdsmi_get_gpu_id', 'amdsmi_get_gpu_kfd_info',
|
||||
'amdsmi_get_gpu_mem_overdrive_level',
|
||||
'amdsmi_get_gpu_memory_partition',
|
||||
'amdsmi_get_gpu_memory_partition_config',
|
||||
'amdsmi_get_gpu_memory_reserved_pages',
|
||||
'amdsmi_get_gpu_memory_total', 'amdsmi_get_gpu_memory_usage',
|
||||
'amdsmi_get_gpu_metrics_header_info',
|
||||
@@ -2905,15 +3008,16 @@ __all__ = \
|
||||
'amdsmi_gpu_cache_info_t', 'amdsmi_gpu_control_counter',
|
||||
'amdsmi_gpu_counter_group_supported', 'amdsmi_gpu_create_counter',
|
||||
'amdsmi_gpu_destroy_counter', 'amdsmi_gpu_metrics_t',
|
||||
'amdsmi_gpu_read_counter', 'amdsmi_gpu_xcp_metrics_t',
|
||||
'amdsmi_gpu_xgmi_error_status', 'amdsmi_hsmp_freqlimit_src_names',
|
||||
'amdsmi_hsmp_metrics_table_t', 'amdsmi_init',
|
||||
'amdsmi_init_flags_t', 'amdsmi_init_gpu_event_notification',
|
||||
'amdsmi_io_bw_encoding_t', 'amdsmi_io_link_type_t',
|
||||
'amdsmi_is_P2P_accessible',
|
||||
'amdsmi_gpu_read_counter', 'amdsmi_gpu_validate_ras_eeprom',
|
||||
'amdsmi_gpu_xcp_metrics_t', 'amdsmi_gpu_xgmi_error_status',
|
||||
'amdsmi_hsmp_freqlimit_src_names', 'amdsmi_hsmp_metrics_table_t',
|
||||
'amdsmi_init', 'amdsmi_init_flags_t',
|
||||
'amdsmi_init_gpu_event_notification', 'amdsmi_io_bw_encoding_t',
|
||||
'amdsmi_io_link_type_t', 'amdsmi_is_P2P_accessible',
|
||||
'amdsmi_is_gpu_power_management_enabled', 'amdsmi_kfd_info_t',
|
||||
'amdsmi_link_id_bw_type_t', 'amdsmi_link_metrics_t',
|
||||
'amdsmi_link_type_t', 'amdsmi_memory_page_status_t',
|
||||
'amdsmi_memory_partition_config_t',
|
||||
'amdsmi_memory_partition_type_t', 'amdsmi_memory_type_t',
|
||||
'amdsmi_mm_ip_t', 'amdsmi_name_value_t', 'amdsmi_nps_caps_t',
|
||||
'amdsmi_od_vddc_point_t', 'amdsmi_od_volt_curve_t',
|
||||
@@ -2936,10 +3040,12 @@ __all__ = \
|
||||
'amdsmi_set_cpu_socket_boostlimit',
|
||||
'amdsmi_set_cpu_socket_lclk_dpm_level',
|
||||
'amdsmi_set_cpu_socket_power_cap', 'amdsmi_set_cpu_xgmi_width',
|
||||
'amdsmi_set_gpu_accelerator_partition_profile',
|
||||
'amdsmi_set_gpu_clk_limit', 'amdsmi_set_gpu_clk_range',
|
||||
'amdsmi_set_gpu_compute_partition',
|
||||
'amdsmi_set_gpu_event_notification_mask',
|
||||
'amdsmi_set_gpu_fan_speed', 'amdsmi_set_gpu_memory_partition',
|
||||
'amdsmi_set_gpu_memory_partition_mode',
|
||||
'amdsmi_set_gpu_od_clk_info', 'amdsmi_set_gpu_od_volt_info',
|
||||
'amdsmi_set_gpu_overdrive_level', 'amdsmi_set_gpu_pci_bandwidth',
|
||||
'amdsmi_set_gpu_perf_determinism_mode',
|
||||
@@ -2962,7 +3068,9 @@ __all__ = \
|
||||
'amdsmi_xgmi_link_status_t', 'amdsmi_xgmi_link_status_type_t',
|
||||
'amdsmi_xgmi_status_t', 'processor_type_t', 'size_t',
|
||||
'struct__links', 'struct_amd_metrics_table_header_t',
|
||||
'struct_amdsmi_accelerator_partition_profile_config_t',
|
||||
'struct_amdsmi_accelerator_partition_profile_t',
|
||||
'struct_amdsmi_accelerator_partition_resource_profile_t',
|
||||
'struct_amdsmi_asic_info_t', 'struct_amdsmi_board_info_t',
|
||||
'struct_amdsmi_clk_info_t', 'struct_amdsmi_counter_value_t',
|
||||
'struct_amdsmi_ddr_bw_metrics_t', 'struct_amdsmi_dimm_power_t',
|
||||
@@ -2977,6 +3085,7 @@ __all__ = \
|
||||
'struct_amdsmi_gpu_xcp_metrics_t',
|
||||
'struct_amdsmi_hsmp_metrics_table_t', 'struct_amdsmi_kfd_info_t',
|
||||
'struct_amdsmi_link_id_bw_type_t', 'struct_amdsmi_link_metrics_t',
|
||||
'struct_amdsmi_memory_partition_config_t',
|
||||
'struct_amdsmi_name_value_t', 'struct_amdsmi_od_vddc_point_t',
|
||||
'struct_amdsmi_od_volt_curve_t',
|
||||
'struct_amdsmi_od_volt_freq_data_t',
|
||||
@@ -2996,7 +3105,7 @@ __all__ = \
|
||||
'struct_amdsmi_vram_usage_t', 'struct_amdsmi_xgmi_info_t',
|
||||
'struct_amdsmi_xgmi_link_status_t', 'struct_cache_',
|
||||
'struct_engine_usage_', 'struct_fw_info_list_',
|
||||
'struct_memory_usage_', 'struct_nps_flags_',
|
||||
'struct_memory_usage_', 'struct_nps_flags_', 'struct_numa_range_',
|
||||
'struct_pcie_metric_', 'struct_pcie_static_',
|
||||
'struct_amdsmi_bdf_t', 'uint32_t', 'uint64_t', 'uint8_t',
|
||||
'union_amdsmi_bdf_t', 'union_amdsmi_nps_caps_t']
|
||||
|
||||
@@ -440,6 +440,31 @@ typedef enum {
|
||||
typedef rsmi_memory_partition_type_t rsmi_memory_partition_type;
|
||||
/// \endcond
|
||||
|
||||
/**
|
||||
* @brief XCP resources.
|
||||
* This enum is used to identify
|
||||
* various accelerator resource types.
|
||||
*/
|
||||
typedef enum {
|
||||
RSMI_ACCELERATOR_XCC,
|
||||
RSMI_ACCELERATOR_ENCODER,
|
||||
RSMI_ACCELERATOR_DECODER,
|
||||
RSMI_ACCELERATOR_DMA,
|
||||
RSMI_ACCELERATOR_JPEG,
|
||||
RSMI_ACCELERATOR_MAX
|
||||
} rsmi_accelerator_partition_resource_type_t;
|
||||
|
||||
/**
|
||||
* @brief Accelerator Partition Resources.
|
||||
* This struct is used to identify various partition resource profiles.
|
||||
*/
|
||||
typedef struct {
|
||||
rsmi_accelerator_partition_resource_type_t resource_type;
|
||||
uint32_t partition_resource; //!< Resources a partition can use, which may be shared
|
||||
uint32_t num_partitions_share_resource; //!< If it is greater than 1, then resource is shared.
|
||||
uint64_t reserved[6];
|
||||
} rsmi_accelerator_partition_resource_profile_t;
|
||||
|
||||
/**
|
||||
* @brief Temperature Metrics. This enum is used to identify various
|
||||
* temperature metrics. Corresponding values will be in millidegress
|
||||
@@ -4625,6 +4650,192 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_partition_id_get(uint32_t dv_ind, uint32_t *partition_id);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the available compute partition capabilities
|
||||
* for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a string @p compute_partition_caps ,
|
||||
* and uint32 @p len , this function will attempt to obtain the device's
|
||||
* available compute partition capabilities string. Upon successful
|
||||
* retreival, the obtained device's available compute partition capablilities
|
||||
* string shall be stored in the passed @p compute_partition_caps
|
||||
* char string variable.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] compute_partition_caps a pointer to a char string variable,
|
||||
* which the device's available compute partition capabilities will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p len ,
|
||||
* suggested length is 30 or greater.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
|
||||
* large enough to hold the entire memory partition value. In this case,
|
||||
* only @p len bytes will be written.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_capabilities_get(uint32_t dv_ind, char *compute_partition_caps,
|
||||
uint32_t len);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the compute partition supported xcp configs
|
||||
* for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a string @p supported_configs ,
|
||||
* and uint32 @p len , this function will attempt to obtain the device's
|
||||
* compute partition supported xcp configs string. Upon successful
|
||||
* retreival, the obtained device's available compute partition supported xcp configs
|
||||
* string shall be stored in the passed @p supported_configs
|
||||
* char string variable.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] supported_configs a pointer to a char string variable,
|
||||
* which the device's compute partition supported xcp configs will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p len ,
|
||||
* suggested length is 30 or greater.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
|
||||
* large enough to hold the entire memory partition value. In this case,
|
||||
* only @p len bytes will be written.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_supported_xcp_configs_get(uint32_t dv_ind, char *supported_configs,
|
||||
uint32_t len);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the compute partition supported NPS configs
|
||||
* for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a string @p supported_configs ,
|
||||
* and uint32 @p len , this function will attempt to obtain the device's
|
||||
* compute partition supported NPS configs string. Upon successful
|
||||
* retreival, the obtained device's available compute partition supported NPS configs
|
||||
* string shall be stored in the passed @p supported_configs
|
||||
* char string variable.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] supported_configs a pointer to a char string variable,
|
||||
* which the device's compute partition supported NPS configs will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p len ,
|
||||
* suggested length is 30 or greater.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
|
||||
* large enough to hold the entire memory partition value. In this case,
|
||||
* only @p len bytes will be written.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_supported_nps_configs_get(uint32_t dv_ind, char *supported_configs,
|
||||
uint32_t len);
|
||||
|
||||
/**
|
||||
* @brief Retrieves the current compute partition xcp config
|
||||
* for a desired device
|
||||
*
|
||||
* @details
|
||||
* Given a device index @p dv_ind and a string @p current_xcp_config ,
|
||||
* and uint32 @p len , this function will attempt to obtain the device's
|
||||
* curren tcompute partition xcp config string. Upon successful
|
||||
* retreival, the obtained device's current compute partition xcp config
|
||||
* string shall be stored in the passed @p current_xcp_config
|
||||
* char string variable.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[inout] supported_configs a pointer to a char string variable,
|
||||
* which the device's current compute partition xcp config will be written to.
|
||||
*
|
||||
* @param[in] len the length of the caller provided buffer @p len ,
|
||||
* suggested length is 30 or greater.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_UNEXPECTED_DATA data provided to function is not valid
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
* @retval ::RSMI_STATUS_INSUFFICIENT_SIZE is returned if @p len bytes is not
|
||||
* large enough to hold the entire memory partition value. In this case,
|
||||
* only @p len bytes will be written.
|
||||
*
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_current_compute_xcp_config_get(uint32_t dv_ind, char *current_xcp_config,
|
||||
uint32_t len);
|
||||
|
||||
/**
|
||||
* @brief Modifies a selected device's compute partition XCP config setting.
|
||||
*
|
||||
* @details Given a device index @p dv_ind, a type of compute partition
|
||||
* @p xcp_config, this function will attempt to update the selected
|
||||
* device's compute partition XCP config.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] xcp_config using enum ::rsmi_compute_partition_type_t,
|
||||
* define what the selected device's compute partition XCP config should be
|
||||
* updated to.
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_PERMISSION function requires root access
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
* @retval ::RSMI_STATUS_SETTING_UNAVAILABLE the provided setting is
|
||||
* unavailable for current device
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function
|
||||
* @retval ::RSMI_STATUS_BUSY A resource or mutex could not be acquired
|
||||
* because it is already being used - device is busy
|
||||
*
|
||||
*/
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_xcp_config_set(uint32_t dv_ind,
|
||||
rsmi_compute_partition_type_t xcp_config);
|
||||
|
||||
/**
|
||||
* @brief Retrieves a selected device's compute partition resource profile.
|
||||
*
|
||||
* @details Given a device index @p dv_ind, a pointer to a requested resorce of
|
||||
* rsmi_accelerator_partition_resource_type_t @p type, and a rsmi_accelerator_partition_resource_profile_t
|
||||
* @p profile this function will write the current XCP config's
|
||||
* resource profile to its @p profile.
|
||||
*
|
||||
* @param[in] dv_ind a device index
|
||||
*
|
||||
* @param[in] type a pointer to a requested resource using enum ::rsmi_accelerator_partition_resource_type_t
|
||||
*
|
||||
* @param[inout] profile a pointer to the requested rsmi_accelerator_partition_resource_profile_t details
|
||||
*
|
||||
* @retval ::RSMI_STATUS_SUCCESS call was successful
|
||||
* @retval ::RSMI_STATUS_NOT_SUPPORTED installed software or hardware does not
|
||||
* support this function with the given arguments
|
||||
* @retval ::RSMI_STATUS_INVALID_ARGS the provided arguments are not valid
|
||||
*/
|
||||
rsmi_status_t rsmi_dev_compute_partition_resource_profile_get(uint32_t dv_ind,
|
||||
rsmi_accelerator_partition_resource_type_t *type,
|
||||
rsmi_accelerator_partition_resource_profile_t *profile);
|
||||
|
||||
/** @} */ // end of ComputePartition
|
||||
|
||||
/*****************************************************************************/
|
||||
|
||||
@@ -163,6 +163,26 @@ enum DevInfoTypes {
|
||||
kDevComputePartition,
|
||||
kDevMemoryPartition,
|
||||
kDevAvailableMemoryPartition,
|
||||
kDevSupportedXcpConfigs,
|
||||
kDevSupportedNpsConfigs,
|
||||
kDevXcpConfig,
|
||||
|
||||
/**
|
||||
* Possible xcp config resources start
|
||||
*/
|
||||
kDevDecoderInst,
|
||||
kDevDecoderShared,
|
||||
kDevEncoderInst,
|
||||
kDevEncoderShared,
|
||||
kDevDmaInst,
|
||||
kDevDmaShared,
|
||||
kDevJpegInst,
|
||||
kDevJpegShared,
|
||||
kDevXccInst,
|
||||
kDevXccShared,
|
||||
/**
|
||||
* Possible xcp config resources end
|
||||
*/
|
||||
|
||||
// The information read from pci core sysfs
|
||||
kDevPCieTypeStart = 1000,
|
||||
|
||||
@@ -118,6 +118,8 @@ rsmi_status_t rsmi_dev_number_of_computes_get(uint32_t dv_ind, uint32_t* num_com
|
||||
std::string leftTrim(const std::string &s);
|
||||
std::string rightTrim(const std::string &s);
|
||||
std::string trim(const std::string &s);
|
||||
std::string trimAllWhiteSpace(const std::string &s);
|
||||
std::string removeWhitespace(const std::string &s);
|
||||
std::string removeNewLines(const std::string &s);
|
||||
|
||||
std::string removeString(const std::string origStr,
|
||||
|
||||
@@ -5618,6 +5618,524 @@ rsmi_dev_compute_partition_set(uint32_t dv_ind,
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_compute_partition_capabilities_get(
|
||||
uint32_t dv_ind, char *compute_partition_caps, uint32_t len) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
DEVICE_MUTEX
|
||||
std::string availableComputePartitions;
|
||||
rsmi_status_t ret =
|
||||
get_dev_value_line(amd::smi::kDevAvailableComputePartition,
|
||||
dv_ind, &availableComputePartitions);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | FAIL "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevAvailableComputePartition)
|
||||
<< " | Data: could not retrieve requested data"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t length = availableComputePartitions.copy(compute_partition_caps, len-1);
|
||||
compute_partition_caps[length]='\0';
|
||||
|
||||
if (len < (availableComputePartitions.size() + 1)) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevAvailableComputePartition)
|
||||
<< " | Cause: requested size was insufficient"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevAvailableComputePartition)
|
||||
<< " | Data: " << compute_partition_caps
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_TRACE(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_compute_partition_supported_xcp_configs_get(uint32_t dv_ind,
|
||||
char *supported_configs, uint32_t len) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
DEVICE_MUTEX
|
||||
std::string supported_xcp_configs;
|
||||
rsmi_status_t ret =
|
||||
get_dev_value_line(amd::smi::kDevSupportedXcpConfigs,
|
||||
dv_ind, &supported_xcp_configs);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | FAIL "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
|
||||
<< " | Data: could not retrieve requested data"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t length = supported_xcp_configs.copy(supported_configs, len-1);
|
||||
supported_configs[length]='\0';
|
||||
|
||||
if (len < (supported_xcp_configs.size() + 1)) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
|
||||
<< " | Cause: requested size was insufficient"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
|
||||
<< " | Data: " << supported_configs
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_TRACE(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_compute_partition_supported_nps_configs_get(uint32_t dv_ind,
|
||||
char *supported_configs, uint32_t len) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
DEVICE_MUTEX
|
||||
std::string supported_nps_configs;
|
||||
rsmi_status_t ret =
|
||||
get_dev_value_line(amd::smi::kDevSupportedNpsConfigs,
|
||||
dv_ind, &supported_nps_configs);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | FAIL "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedNpsConfigs)
|
||||
<< " | Data: could not retrieve requested data"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t length = supported_nps_configs.copy(supported_configs, len-1);
|
||||
supported_configs[length]='\0';
|
||||
|
||||
if (len < (supported_nps_configs.size() + 1)) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedNpsConfigs)
|
||||
<< " | Cause: requested size was insufficient"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedNpsConfigs)
|
||||
<< " | Data: " << supported_configs
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_TRACE(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_current_compute_xcp_config_get(
|
||||
uint32_t dv_ind, char *current_xcp_config, uint32_t len) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
DEVICE_MUTEX
|
||||
std::string currentXcpConfigStr;
|
||||
rsmi_status_t ret =
|
||||
get_dev_value_line(amd::smi::kDevXcpConfig,
|
||||
dv_ind, ¤tXcpConfigStr);
|
||||
if (ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | FAIL "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Data: could not retrieve requested data"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::size_t length = currentXcpConfigStr.copy(current_xcp_config, len-1);
|
||||
current_xcp_config[length]='\0';
|
||||
|
||||
if (len < (currentXcpConfigStr.size() + 1)) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Cause: requested size was insufficient"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INSUFFICIENT_SIZE) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INSUFFICIENT_SIZE;
|
||||
}
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Data: " << currentXcpConfigStr
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret) << " |";
|
||||
LOG_TRACE(ss);
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t
|
||||
rsmi_dev_compute_partition_xcp_config_set(uint32_t dv_ind,
|
||||
rsmi_compute_partition_type_t xcp_config) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
REQUIRE_ROOT_ACCESS
|
||||
if (!amd::smi::is_sudo_user()) {
|
||||
return RSMI_STATUS_PERMISSION;
|
||||
}
|
||||
std::string currentXcpConfig = "";
|
||||
std::string newXcpConfigStr = "";
|
||||
std::string availableXcpConfigsStr = "";
|
||||
const int kLen30 = 30;
|
||||
char available_xcp_configs[kLen30];
|
||||
available_xcp_configs[0] = '\0';
|
||||
const int kLen5 = 5;
|
||||
char current_xcp_config[kLen5];
|
||||
current_xcp_config[0] = '\0';
|
||||
|
||||
switch (xcp_config) {
|
||||
case RSMI_COMPUTE_PARTITION_CPX:
|
||||
case RSMI_COMPUTE_PARTITION_SPX:
|
||||
case RSMI_COMPUTE_PARTITION_DPX:
|
||||
case RSMI_COMPUTE_PARTITION_TPX:
|
||||
case RSMI_COMPUTE_PARTITION_QPX:
|
||||
newXcpConfigStr =
|
||||
mapRSMIToStringComputePartitionTypes.at(xcp_config);
|
||||
break;
|
||||
case RSMI_COMPUTE_PARTITION_INVALID:
|
||||
default:
|
||||
newXcpConfigStr =
|
||||
mapRSMIToStringComputePartitionTypes.at(RSMI_COMPUTE_PARTITION_INVALID);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Data: " << newXcpConfigStr
|
||||
<< " | Cause: requested setting was invalid"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
|
||||
// Confirm what we are trying to set is available, otherwise provide
|
||||
// RSMI_STATUS_INVALID_ARGS
|
||||
rsmi_status_t available_ret =
|
||||
rsmi_dev_compute_partition_supported_xcp_configs_get(dv_ind, available_xcp_configs, kLen30);
|
||||
if (available_ret != RSMI_STATUS_SUCCESS) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevSupportedXcpConfigs)
|
||||
<< " | Data: " << newXcpConfigStr
|
||||
<< " | Cause: could not find an available xcp configs file"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(available_ret) << " |";
|
||||
LOG_ERROR(ss);
|
||||
return available_ret;
|
||||
} else {
|
||||
availableXcpConfigsStr = available_xcp_configs;
|
||||
}
|
||||
|
||||
bool isXcpConfigAvailable =
|
||||
amd::smi::containsString(availableXcpConfigsStr,
|
||||
newXcpConfigStr);
|
||||
if (!isXcpConfigAvailable) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | Fail - Detected that the requested xcp config is not available"
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Data (newXcpConfigStr): " << newXcpConfigStr
|
||||
<< " | Data (availableXcpConfigsStr): " << availableXcpConfigsStr;
|
||||
LOG_ERROR(ss);
|
||||
// We do not return RSMI_STATUS_INVALID_ARGS
|
||||
// Instead we try setting anyways as requested
|
||||
// write will provide the correct error code
|
||||
}
|
||||
|
||||
ss << __PRETTY_FUNCTION__ << " | about to try writing |"
|
||||
<< newXcpConfigStr
|
||||
<< "| size of string = " << newXcpConfigStr.size()
|
||||
<< "| size of c-string = "<< std::dec
|
||||
<< sizeof(newXcpConfigStr.c_str())/sizeof(newXcpConfigStr[0])
|
||||
<< "| sizeof string = " << std::dec
|
||||
<< sizeof(newXcpConfigStr);
|
||||
LOG_DEBUG(ss);
|
||||
GET_DEV_FROM_INDX
|
||||
DEVICE_MUTEX
|
||||
int ret = dev->writeDevInfo(amd::smi::kDevXcpConfig,
|
||||
newXcpConfigStr);
|
||||
rsmi_status_t returnResponse = amd::smi::ErrnoToRsmiStatus(ret);
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Data: " << newXcpConfigStr
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(returnResponse) << " |";
|
||||
LOG_TRACE(ss);
|
||||
|
||||
return returnResponse;
|
||||
CATCH
|
||||
}
|
||||
|
||||
rsmi_status_t rsmi_dev_compute_partition_resource_profile_get(uint32_t dv_ind,
|
||||
rsmi_accelerator_partition_resource_type_t *type,
|
||||
rsmi_accelerator_partition_resource_profile_t *profile) {
|
||||
TRY
|
||||
std::ostringstream ss;
|
||||
ss << __PRETTY_FUNCTION__ << " | ======= start =======, " << dv_ind;
|
||||
LOG_TRACE(ss);
|
||||
if (type == nullptr) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Cause: user sent invalid arguments, type was a null ptr"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS, false);
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
// initialize the profile
|
||||
profile->partition_resource = std::numeric_limits<uint32_t>::max();
|
||||
profile->num_partitions_share_resource = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
DEVICE_MUTEX
|
||||
rsmi_status_t ret = RSMI_STATUS_NOT_SUPPORTED;
|
||||
// check if user provided supported resource types
|
||||
// Note: RSMI_ACCELERATOR_MAX is == largest enum value
|
||||
bool isAcceleratorTypeValid = false;
|
||||
for (int i = 0; i <= RSMI_ACCELERATOR_MAX; i++) {
|
||||
if (*type == i) {
|
||||
isAcceleratorTypeValid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isAcceleratorTypeValid == false) {
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Fail "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type: "
|
||||
<< amd::smi::Device::get_type_string(amd::smi::kDevXcpConfig)
|
||||
<< " | Cause: user sent invalid arguments, type was out of range"
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(RSMI_STATUS_INVALID_ARGS, false);
|
||||
LOG_ERROR(ss);
|
||||
return RSMI_STATUS_INVALID_ARGS;
|
||||
}
|
||||
amd::smi::DevInfoTypes dev_info_type_inst;
|
||||
amd::smi::DevInfoTypes dev_info_type_shared;
|
||||
if (*type == RSMI_ACCELERATOR_XCC) {
|
||||
profile->resource_type = RSMI_ACCELERATOR_XCC;
|
||||
dev_info_type_inst = amd::smi::kDevXccInst;
|
||||
dev_info_type_shared = amd::smi::kDevXccShared;
|
||||
std::string val_str;
|
||||
ret = get_dev_value_str(amd::smi::kDevXccInst, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->partition_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
|
||||
val_str.clear();
|
||||
ret = get_dev_value_str(amd::smi::kDevXccShared, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*type == RSMI_ACCELERATOR_ENCODER) {
|
||||
profile->resource_type = RSMI_ACCELERATOR_ENCODER;
|
||||
dev_info_type_inst = amd::smi::kDevEncoderInst;
|
||||
dev_info_type_shared = amd::smi::kDevEncoderShared;
|
||||
std::string val_str;
|
||||
ret = get_dev_value_str(amd::smi::kDevEncoderInst, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->partition_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
|
||||
val_str.clear();
|
||||
ret = get_dev_value_str(amd::smi::kDevEncoderShared, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*type == RSMI_ACCELERATOR_DECODER) {
|
||||
profile->resource_type = RSMI_ACCELERATOR_DECODER;
|
||||
dev_info_type_inst = amd::smi::kDevDecoderInst;
|
||||
dev_info_type_shared = amd::smi::kDevDecoderShared;
|
||||
std::string val_str;
|
||||
ret = get_dev_value_str(amd::smi::kDevDecoderInst, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->partition_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
|
||||
val_str.clear();
|
||||
ret = get_dev_value_str(amd::smi::kDevDecoderShared, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*type == RSMI_ACCELERATOR_DMA) {
|
||||
profile->resource_type = RSMI_ACCELERATOR_DMA;
|
||||
dev_info_type_inst = amd::smi::kDevDmaInst;
|
||||
dev_info_type_shared = amd::smi::kDevDmaShared;
|
||||
std::string val_str;
|
||||
ret = get_dev_value_str(amd::smi::kDevDmaInst, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->partition_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
|
||||
val_str.clear();
|
||||
ret = get_dev_value_str(amd::smi::kDevDmaShared, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// RSMI_ACCELERATOR_MAX == RSMI_ACCELERATOR_JPEG
|
||||
if (*type == RSMI_ACCELERATOR_JPEG) {
|
||||
profile->resource_type = RSMI_ACCELERATOR_JPEG;
|
||||
dev_info_type_inst = amd::smi::kDevJpegInst;
|
||||
dev_info_type_shared = amd::smi::kDevJpegShared;
|
||||
std::string val_str;
|
||||
ret = get_dev_value_str(amd::smi::kDevJpegInst, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->partition_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
|
||||
val_str.clear();
|
||||
ret = get_dev_value_str(amd::smi::kDevJpegShared, dv_ind, &val_str);
|
||||
if (ret == RSMI_STATUS_SUCCESS) {
|
||||
uint64_t val_ul = strtoul(val_str.c_str(), nullptr, 10);
|
||||
if (val_ul <= std::numeric_limits<uint32_t>::max()) {
|
||||
profile->num_partitions_share_resource = static_cast<uint32_t>(val_ul);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ss << __PRETTY_FUNCTION__
|
||||
<< " | ======= end ======= "
|
||||
<< " | Success "
|
||||
<< " | Device #: " << dv_ind
|
||||
<< " | Type (partition_resource): "
|
||||
<< amd::smi::Device::get_type_string(dev_info_type_inst)
|
||||
<< " | Data: " << profile->partition_resource
|
||||
<< " | Type (num_partitions_share_resource): "
|
||||
<< amd::smi::Device::get_type_string(dev_info_type_shared)
|
||||
<< " | Data: " << profile->num_partitions_share_resource
|
||||
<< " | Returning = "
|
||||
<< getRSMIStatusString(ret, false) << " |";
|
||||
LOG_TRACE(ss);
|
||||
|
||||
return ret;
|
||||
CATCH
|
||||
}
|
||||
|
||||
static rsmi_status_t get_memory_partition(uint32_t dv_ind,
|
||||
std::string &memory_partition) {
|
||||
TRY
|
||||
|
||||
@@ -120,6 +120,21 @@ static const char *kDevAvailableComputePartitionFName =
|
||||
static const char *kDevComputePartitionFName = "current_compute_partition";
|
||||
static const char *kDevMemoryPartitionFName = "current_memory_partition";
|
||||
static const char *kDevAvailableMemoryPartitionFName = "available_memory_partition";
|
||||
static const char *kDevSupportedXcpConfigsFName = "compute_partition_config/supported_xcp_configs";
|
||||
static const char *kDevSupportedNpsConfigsFName = "compute_partition_config/supported_nps_configs";
|
||||
static const char *kDevXcpConfigFName = "compute_partition_config/xcp_config";
|
||||
|
||||
// XCP config resource files - not every file will exist in all ASICs (ex. Decoders vs Encoders)
|
||||
static const char *kDevDecoderInstFName = "compute_partition_config/dec/num_inst";
|
||||
static const char *kDevDecoderSharedFName = "compute_partition_config/dec/num_shared";
|
||||
static const char *kDevEncoderInstFName = "compute_partition_config/enc/num_inst";
|
||||
static const char *kDevEncoderSharedFName = "compute_partition_config/enc/num_shared";
|
||||
static const char *kDevDmaInstFName = "compute_partition_config/dma/num_inst";
|
||||
static const char *kDevDmaSharedFName = "compute_partition_config/dma/num_shared";
|
||||
static const char *kDevJpegInstFName = "compute_partition_config/jpeg/num_inst";
|
||||
static const char *kDevJpegSharedFName = "compute_partition_config/jpeg/num_shared";
|
||||
static const char *kDevXccInstFName = "compute_partition_config/xcc/num_inst";
|
||||
static const char *kDevXccSharedFName = "compute_partition_config/xcc/num_shared";
|
||||
|
||||
// Firmware version files
|
||||
static const char *kDevFwVersionAsdFName = "fw_version/asd_fw_version";
|
||||
@@ -309,6 +324,21 @@ static const std::map<DevInfoTypes, const char *> kDevAttribNameMap = {
|
||||
{kDevComputePartition, kDevComputePartitionFName},
|
||||
{kDevMemoryPartition, kDevMemoryPartitionFName},
|
||||
{kDevAvailableMemoryPartition, kDevAvailableMemoryPartitionFName},
|
||||
{kDevSupportedXcpConfigs, kDevSupportedXcpConfigsFName},
|
||||
{kDevSupportedNpsConfigs, kDevSupportedNpsConfigsFName},
|
||||
{kDevXcpConfig, kDevXcpConfigFName},
|
||||
|
||||
// XCP config resource files
|
||||
{kDevDecoderInst, kDevDecoderInstFName},
|
||||
{kDevDecoderShared, kDevDecoderSharedFName},
|
||||
{kDevEncoderInst, kDevEncoderInstFName},
|
||||
{kDevEncoderShared, kDevEncoderSharedFName},
|
||||
{kDevDmaInst, kDevDmaInstFName},
|
||||
{kDevDmaShared, kDevDmaSharedFName},
|
||||
{kDevJpegInst, kDevJpegInstFName},
|
||||
{kDevJpegShared, kDevJpegSharedFName},
|
||||
{kDevXccInst, kDevXccInstFName},
|
||||
{kDevXccShared, kDevXccSharedFName},
|
||||
};
|
||||
|
||||
static const std::map<rsmi_dev_perf_level, const char *> kDevPerfLvlMap = {
|
||||
@@ -466,6 +496,20 @@ Device::devInfoTypesStrings = {
|
||||
{kDevXgmiPlpd, "kDevXgmiPlpd"},
|
||||
{kDevProcessIsolation, "kDevProcessIsolation"},
|
||||
{kDevShaderClean, "kDevShaderClean"},
|
||||
{kDevSupportedXcpConfigs, "kDevSupportedXcpConfigs"},
|
||||
{kDevSupportedNpsConfigs, "kDevSupportedNpsConfigs"},
|
||||
{kDevXcpConfig, "kDevXcpConfig"},
|
||||
|
||||
{kDevDecoderInst, "kDevDecoderInst"},
|
||||
{kDevDecoderShared, "kDevDecoderShared"},
|
||||
{kDevEncoderInst, "kDevEncoderInst"},
|
||||
{kDevEncoderShared, "kDevEncoderShared"},
|
||||
{kDevDmaInst, "kDevDmaInst"},
|
||||
{kDevDmaShared, "kDevDmaShared"},
|
||||
{kDevJpegInst, "kDevJpegInst"},
|
||||
{kDevJpegShared, "kDevJpegShared"},
|
||||
{kDevXccInst, "kDevXccInst"},
|
||||
{kDevXccShared, "kDevXccShared"},
|
||||
};
|
||||
|
||||
static const std::map<const char *, dev_depends_t> kDevFuncDependsMap = {
|
||||
@@ -946,6 +990,7 @@ int Device::writeDevInfo(DevInfoTypes type, std::string val) {
|
||||
return writeDevInfoStr(type, val);
|
||||
case kDevComputePartition:
|
||||
case kDevMemoryPartition:
|
||||
case kDevXcpConfig:
|
||||
return writeDevInfoStr(type, val, true);
|
||||
|
||||
default:
|
||||
@@ -1292,6 +1337,19 @@ int Device::readDevInfo(DevInfoTypes type, std::string *val) {
|
||||
case kDevXGMIPhysicalID:
|
||||
case kDevAvailableMemoryPartition:
|
||||
case kDevProcessIsolation:
|
||||
case kDevSupportedXcpConfigs:
|
||||
case kDevSupportedNpsConfigs:
|
||||
case kDevXcpConfig:
|
||||
case kDevDecoderInst:
|
||||
case kDevDecoderShared:
|
||||
case kDevEncoderInst:
|
||||
case kDevEncoderShared:
|
||||
case kDevDmaInst:
|
||||
case kDevDmaShared:
|
||||
case kDevJpegInst:
|
||||
case kDevJpegShared:
|
||||
case kDevXccInst:
|
||||
case kDevXccShared:
|
||||
return readDevInfoStr(type, val);
|
||||
break;
|
||||
|
||||
|
||||
@@ -747,6 +747,7 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
// location_id, bdf, domain, bus, device,
|
||||
// partition_id}
|
||||
std::multimap<uint64_t, systemNode> allSystemNodes;
|
||||
std::set<uint32_t> gpuNodeIdsFound;
|
||||
uint32_t node_id = 0;
|
||||
static const int BYTE = 8;
|
||||
while (true) {
|
||||
@@ -755,9 +756,24 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
int ret_unique_id = read_node_properties(node_id, "unique_id", &unique_id);
|
||||
int ret_loc_id =
|
||||
read_node_properties(node_id, "location_id", &location_id);
|
||||
read_node_properties(node_id, "domain", &domain);
|
||||
if (ret_gpu_id == 0 &&
|
||||
!(ret_unique_id != 0 || ret_loc_id != 0 || ret_unique_id != 0)) {
|
||||
int ret_domain = read_node_properties(node_id, "domain", &domain);
|
||||
bool isANode = (ret_gpu_id == 0 &&
|
||||
(ret_domain == 0 && ret_loc_id == 0));
|
||||
ss << __PRETTY_FUNCTION__ << " | isAGpuNode: "
|
||||
<< (isANode ? "TRUE" : "FALSE") << "; is_vm_guest(): "
|
||||
<< (is_vm_guest() ? "TRUE" : "FALSE")
|
||||
<< "\nret_gpu_id: " << ret_gpu_id
|
||||
<< "; ret_domain: " << ret_domain
|
||||
<< "; ret_loc_id: " << ret_loc_id
|
||||
<< "; ret_unique_id: " << ret_unique_id
|
||||
<< "\n[node_id = " << print_unsigned_hex_and_int(node_id) << "\n"
|
||||
<< "; gpu_id = " << print_unsigned_hex_and_int(gpu_id) << "\n"
|
||||
<< "; unique_id = " << print_unsigned_hex_and_int(unique_id) << "\n"
|
||||
<< "; location_id = " << print_unsigned_hex_and_int(location_id) << "\n"
|
||||
<< "; domain = " << print_unsigned_hex_and_int(domain)
|
||||
<< "]\n";
|
||||
LOG_DEBUG(ss);
|
||||
if (isANode || (is_vm_guest() && ret_gpu_id == 0)) {
|
||||
// Do not try to build a node if one of these fields
|
||||
// do not exist in KFD (0 as values okay)
|
||||
systemNode myNode;
|
||||
@@ -776,6 +792,24 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
myNode.s_function = myNode.s_location_id & 0x7;
|
||||
myNode.s_partition_id = ((myNode.s_location_id >> 28) & 0xF);
|
||||
if (gpu_id != 0) { // only add gpu nodes, 0 = CPU
|
||||
auto ret = gpuNodeIdsFound.insert(node_id);
|
||||
if (ret.second != false) {
|
||||
// only print out nodes which do not already exist
|
||||
ss << __PRETTY_FUNCTION__ << " | isAGpuNode: "
|
||||
<< (isANode ? "TRUE" : "FALSE") << "; is_vm_guest(): "
|
||||
<< (is_vm_guest() ? "TRUE" : "FALSE")
|
||||
<< "\nret_gpu_id: " << ret_gpu_id
|
||||
<< "; ret_domain: " << ret_domain
|
||||
<< "; ret_loc_id: " << ret_loc_id
|
||||
<< "; ret_unique_id: " << ret_unique_id
|
||||
<< "\n[node_id = " << print_unsigned_hex_and_int(node_id) << "\n"
|
||||
<< "; gpu_id = " << print_unsigned_hex_and_int(gpu_id) << "\n"
|
||||
<< "; unique_id = " << print_unsigned_hex_and_int(unique_id) << "\n"
|
||||
<< "; location_id = " << print_unsigned_hex_and_int(location_id) << "\n"
|
||||
<< "; domain = " << print_unsigned_hex_and_int(domain) << "\n"
|
||||
<< "]\n";
|
||||
LOG_DEBUG(ss);
|
||||
}
|
||||
allSystemNodes.emplace(unique_id, myNode);
|
||||
}
|
||||
} else {
|
||||
@@ -866,7 +900,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
<< "; partition_id = " << std::to_string(i->second.s_partition_id)
|
||||
<< "], ";
|
||||
LOG_DEBUG(ss);
|
||||
AddToDeviceList(d_name, primaryBdfId);
|
||||
ss << __PRETTY_FUNCTION__ << " | AddToDeviceList #1 (secondary node) \n"
|
||||
<< "; bdf: " << print_unsigned_hex_and_int(primaryBdfId) << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
} else {
|
||||
ss << __PRETTY_FUNCTION__ << " | primary node add ; "
|
||||
<< " BDF = " << std::to_string(UINT64_MAX);
|
||||
@@ -894,6 +930,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
<< "; partition_id = " << std::to_string(i->second.s_partition_id)
|
||||
<< "], ";
|
||||
LOG_DEBUG(ss);
|
||||
ss << __PRETTY_FUNCTION__ << " | AddToDeviceList #2 (primary node) \n"
|
||||
<< "; bdf: " << print_unsigned_hex_and_int(UINT64_MAX) << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
AddToDeviceList(d_name, UINT64_MAX);
|
||||
}
|
||||
|
||||
@@ -1029,6 +1068,9 @@ uint32_t RocmSMI::DiscoverAmdgpuDevices(void) {
|
||||
<< "; partition_id = " << std::to_string(it->second.s_partition_id)
|
||||
<< "], ";
|
||||
LOG_DEBUG(ss);
|
||||
ss << __PRETTY_FUNCTION__ << " | AddToDeviceList #3 (secondary node add #2) \n"
|
||||
<< "; bdf: " << print_unsigned_hex_and_int(myBdfId) << "\n";
|
||||
LOG_DEBUG(ss);
|
||||
AddToDeviceList(secNode, myBdfId);
|
||||
allSystemNodes.erase(it++);
|
||||
numb_nodes--;
|
||||
|
||||
@@ -383,6 +383,7 @@ std::string removeNewLines(const std::string &s) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// Trims white space from both ends of string
|
||||
std::string trim(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
// remove new lines -> trim white space at ends
|
||||
@@ -392,6 +393,23 @@ std::string trim(const std::string &s) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// Trims white space from both ends of string and removes all white space
|
||||
std::string trimAllWhiteSpace(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
// remove new lines -> trim white space at ends
|
||||
std::string noNewLines = trim(s);
|
||||
return removeWhitespace(noNewLines);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
std::string removeWhitespace(const std::string &s) {
|
||||
if (!s.empty()) {
|
||||
return std::regex_replace(s, std::regex("\\s+"), "");
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// Given original string and string to remove (removeMe)
|
||||
// Return will provide the resulting modified string with the removed string(s)
|
||||
std::string removeString(const std::string origStr,
|
||||
@@ -908,18 +926,18 @@ std::string getBuildType() {
|
||||
}
|
||||
|
||||
const char *my_fname(void) {
|
||||
std::string emptyRet="";
|
||||
#ifdef _GNU_SOURCE
|
||||
Dl_info dl_info;
|
||||
dladdr((void *)my_fname, &dl_info);
|
||||
dladdr(reinterpret_cast<void *>(my_fname), &dl_info);
|
||||
return (dl_info.dli_fname);
|
||||
#else
|
||||
std::string emptyRet = "";
|
||||
return emptyRet.c_str();
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string getMyLibPath(void) {
|
||||
std::string libName = "rocm-smi-lib";
|
||||
std::string libName = "amd-smi-lib";
|
||||
std::string path = std::string(my_fname());
|
||||
if (path.empty()) {
|
||||
path = "Could not find library path for " + libName;
|
||||
|
||||
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -128,7 +128,7 @@ amdsmi_status_t AMDSmiGPUDevice::amdgpu_query_vbios(void *info) const {
|
||||
amdsmi_status_t ret;
|
||||
uint32_t fd = 0;
|
||||
ret = drm_.get_drm_fd_by_index(gpu_id_, &fd);
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;;
|
||||
if (ret != AMDSMI_STATUS_SUCCESS) return AMDSMI_STATUS_NOT_SUPPORTED;
|
||||
|
||||
return drm_.amdgpu_query_vbios(fd, info);
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user