diff --git a/CHANGELOG.md b/CHANGELOG.md index e0c1c745ac..3caaedcd35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,14 +7,15 @@ Full documentation for amd_smi_lib is available at [https://rocm.docs.amd.com/pr ## amd_smi_lib for ROCm 6.3.0 ### Changes -- **Added support for GPU metrics 1.6 to `amdsmi_get_gpu_metrics_info()`** + +- **Added support for GPU metrics 1.6 to `amdsmi_get_gpu_metrics_info()`**. Updated `amdsmi_get_gpu_metrics_info()` and structure `amdsmi_gpu_metrics_t` to include new fields for PVIOL / TVIOL, XCP (Graphics Compute Partitions) stats, and pcie_lc_perf_other_end_recovery: - `uint64_t accumulation_counter` - used for all throttled calculations - `uint64_t prochot_residency_acc` - Processor hot accumulator - `uint64_t ppt_residency_acc` - Package Power Tracking (PPT) accumulator (used in PVIOL calculations) - `uint64_t socket_thm_residency_acc` - Socket thermal accumulator - (used in TVIOL calculations) - `uint64_t vr_thm_residency_acc` - Voltage Rail (VR) thermal accumulator - - `uint64_t hbm_thm_residency_acc` - High Bandwidth Memory (HBM) thermal accumulator + - `uint64_t hbm_thm_residency_acc` - High Bandwidth Memory (HBM) thermal accumulator - `uint16_t num_partition` - corresponds to the current total number of partitions - `struct amdgpu_xcp_metrics_t xcp_stats[MAX_NUM_XCP]` - for each partition associated with current GPU, provides gfx busy & accumulators, jpeg, and decoder (VCN) engine utilizations - `uint32_t gfx_busy_inst[MAX_NUM_XCC]` - graphic engine utilization (%) @@ -23,11 +24,12 @@ Updated `amdsmi_get_gpu_metrics_info()` and structure `amdsmi_gpu_metrics_t` to - `uint64_t gfx_busy_acc[MAX_NUM_XCC]` - graphic engine utilization accumulated (%) - `uint32_t pcie_lc_perf_other_end_recovery` - corresponds to the pcie other end recovery counter -- **Added new violation status outputs and APIs: `amdsmi_status_t amdsmi_get_violation_status()`, `amd-smi metric --throttle`, and `amd-smi monitor --violation`** +- **Added new violation status outputs and APIs: `amdsmi_status_t amdsmi_get_violation_status()`, `amd-smi metric --throttle`, and `amd-smi monitor --violation`**. ***Only available for MI300+ ASICs.*** Users can now retrieve violation status' through either our Python or C++ APIs. Additionally, we have added capability to view these outputs conviently through `amd-smi metric --throttle` and `amd-smi monitor --violation`. Example outputs are listed below (below is for reference, output is subject to change): + ```shell $ amd-smi metric --throttle GPU: 0 @@ -69,6 +71,7 @@ GPU: 1 HBM_THERMAL_VIOLATION_PERCENT: 0 % ... ``` + ```shell $ amd-smi monitor --violation GPU PVIOL TVIOL PHOT_TVIOL VR_TVIOL HBM_TVIOL @@ -91,12 +94,12 @@ GPU PVIOL TVIOL PHOT_TVIOL VR_TVIOL HBM_TVIOL ... ``` -- **Added ability to view XCP (Graphics Compute Partition) activity within `amd-smi metric --usage`** +- **Added ability to view XCP (Graphics Compute Partition) activity within `amd-smi metric --usage`**. ***Partition specific features are only available on MI300+ ASICs*** Users can now retrieve graphic utilization statistic on a per-XCP (per-partition) basis. Here all XCP activities will be listed, - but the current XCP is the partition id listed under both `amd-smi list` and `amd-smi static --partition`. - + but the current XCP is the partition id listed under both `amd-smi list` and `amd-smi static --partition`. Example outputs are listed below (below is for reference, output is subject to change): + ```shell $ amd-smi metric --usage GPU: 0 @@ -161,7 +164,6 @@ GPU: 0 XCP_6: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A] XCP_7: [N/A, N/A, N/A, N/A, N/A, N/A, N/A, N/A] - GPU: 1 USAGE: GFX_ACTIVITY: 0 % @@ -227,9 +229,10 @@ GPU: 1 ... ``` -- **Added `LC_PERF_OTHER_END_RECOVERY` CLI output to `amd-smi metric --pcie` and updated `amdsmi_get_pcie_info()` to include this value** +- **Added `LC_PERF_OTHER_END_RECOVERY` CLI output to `amd-smi metric --pcie` and updated `amdsmi_get_pcie_info()` to include this value**. ***Feature is only available on MI300+ ASICs*** Users can now retrieve both through `amdsmi_get_pcie_info()` which has an updated structure: + ```C typedef struct { ... @@ -247,9 +250,10 @@ typedef struct { } pcie_metric; uint64_t reserved[32]; } amdsmi_pcie_info_t; -``` +``` + + - Example outputs are listed below (below is for reference, output is subject to change): - Example outputs are listed below (below is for reference, output is subject to change): ```shell $ amd-smi metric --pcie GPU: 0 @@ -284,7 +288,7 @@ GPU: 1 ... ``` -- **Updated BDF commands to look use KFD SYSFS for BDF: `amdsmi_get_gpu_device_bdf()`** +- **Updated BDF commands to look use KFD SYSFS for BDF: `amdsmi_get_gpu_device_bdf()`**. This aligns BDF output with ROCm SMI. See below for overview as seen from `rsmi_dev_pci_id_get()` now provides partition ID. See API for better detail. Previously these bits were reserved bits (right before domain) and partition id was within function. - bits [63:32] = domain @@ -292,7 +296,6 @@ See below for overview as seen from `rsmi_dev_pci_id_get()` now provides partiti - bits [27:16] = reserved - bits [15: 0] = pci bus/device/function - - **Moved python tests directory path install location**. - `/opt//share/amd_smi/pytest/..` to `/opt//share/amd_smi/tests/python_unittest/..` - On amd-smi-lib-tests uninstall, the amd_smi tests folder is removed. @@ -306,9 +309,7 @@ See below for overview as seen from `rsmi_dev_pci_id_get()` now provides partiti - **Added `amd-smi set -L/--clk-limit ...` command**. Equivalent to rocm-smi's '--extremum' command which sets sclk's or mclk's soft minimum or soft maximum clock frequency. - - -- **Added Pytest functionality to test amdsmi API calls in Python**. +- **Added unittest functionality to test amdsmi API calls in Python**. - **Changed the `power` parameter in `amdsmi_get_energy_count()` to `energy_accumulator`**. - Changes propagate forwards into the python interface as well, however we are maintaing backwards compatibility and keeping the `power` field in the python API until ROCm 6.4. @@ -341,7 +342,6 @@ Topology arguments: ID: 7 | BDF: 0000:df:00.0 | UUID: all | Selects all devices - -a, --access Displays link accessibility between GPUs -w, --weight Displays relative weight between GPUs -o, --hops Displays the number of hops between GPUs @@ -352,7 +352,6 @@ Topology arguments: -d, --dma Display P2P direct memory access (DMA) link capability between nodes -z, --bi-dir Display P2P bi-directional link capability between nodes - Command Modifiers: --json Displays output in JSON format (human readable by default). --csv Displays output in CSV format (human readable by default). @@ -407,7 +406,6 @@ BI-DIRECTIONAL TABLE: 0000:bf:00.0 F T T T F F SELF F 0000:df:00.0 T T T F F T F SELF - Legend: SELF = Current GPU ENABLED / DISABLED = Link is enabled or disabled @@ -504,10 +502,10 @@ GPU: 0 TARGET_GRAPHICS_VERSION: gfx942 ``` -- **Udpated Partition APIs and struct information and added and partition_id to `amd-smi static --partition` & `amd-smi list`**. +- **Udpated Partition APIs and struct information and added and partition_id to `amd-smi static --partition`**. - As part of an overhaul to partition information, some partition information will be made available in the `amdsmi_accelerator_partition_profile_t`. - This struct will be filled out by a new API, `amdsmi_get_gpu_accelerator_partition_profile()`. - - Future data from these APIs wil will eventually get added to `static --partition`. + - Future data from these APIs wil will eventually get added to `amd-smi partition`. ```C #define AMDSMI_MAX_ACCELERATOR_PROFILE 32 @@ -548,7 +546,6 @@ typedef union { uint32_t nps_cap_mask; } amdsmi_nps_caps_t; - typedef struct { amdsmi_accelerator_partition_type_t profile_type; // SPX, DPX, QPX, CPX and so on uint32_t num_partitions; // On MI300X, SPX: 1, DPX: 2, QPX: 4, CPX: 8, length of resources array @@ -567,21 +564,6 @@ GPU: 0 COMPUTE_PARTITION: CPX MEMORY_PARTITION: NPS4 PARTITION_ID: 0 - -$ amd-smi list -GPU: 0 - BDF: 0000:23:00.0 - UUID: - KFD_ID: 45412 - NODE_ID: 1 - PARTITION_ID: 0 - -GPU: 1 - BDF: 0000:26:00.0 - UUID: - KFD_ID: 59881 - NODE_ID: 2 - PARTITION_ID: 0 ``` ### Removals @@ -610,7 +592,7 @@ plan to eventually remove partition ID from the function portion of the BDF (Bus - bits [7:3] = Device - bits [2:0] = Function (partition id maybe in bits [2:0]) <-- Fallback for non SPX modes -Previously in non-SPX modes (ex. CPX/TPX/DPX/etc) some MI3x ASICs would not report all logical GPU devices within AMD SMI. + - Previously in non-SPX modes (ex. CPX/TPX/DPX/etc) some MI3x ASICs would not report all logical GPU devices within AMD SMI. ```shell $ amd-smi monitor -p -t -v @@ -650,9 +632,8 @@ GPU POWER GPU_TEMP MEM_TEMP VRAM_USED VRAM_TOTAL ``` - **Fixed incorrect implementation of the Python API `amdsmi_get_gpu_metrics_header_info()`**. -- **`amdsmitst` TestGpuMetricsRead now prints metric in correct units** -- **`amd-smi static --partition` will have updates with additional partition information from `amdsmi_get_gpu_accelerator_partition_profile()`**. +- **`amdsmitst` TestGpuMetricsRead now prints metric in correct units**. ### Known issues @@ -662,6 +643,10 @@ GPU POWER GPU_TEMP MEM_TEMP VRAM_USED VRAM_TOTAL - **Python API for `amdsmi_get_energy_count()` will deprecate the `power` field in ROCm 6.4 and use `energy_accumulator` field instead**. +- **Added preliminary `amd-smi partition` command**. + - The new partition command can be used to display GPU information, including memory and accelerator partition information. + - The command will be at full functionality once additional partition information from `amdsmi_get_gpu_accelerator_partition_profile()` has been implemented. + ## amd_smi_lib for ROCm 6.2.1 ### Additions diff --git a/amdsmi_cli/amdsmi_cli.py b/amdsmi_cli/amdsmi_cli.py index 1e61fa44f1..463632c1b2 100755 --- a/amdsmi_cli/amdsmi_cli.py +++ b/amdsmi_cli/amdsmi_cli.py @@ -94,7 +94,8 @@ if __name__ == "__main__": amd_smi_commands.reset, amd_smi_commands.monitor, amd_smi_commands.rocm_smi, - amd_smi_commands.xgmi) + amd_smi_commands.xgmi, + amd_smi_commands.partition) try: try: argcomplete.autocomplete(amd_smi_parser) @@ -128,7 +129,6 @@ if __name__ == "__main__": sys.tracebacklimit = 10 else: sys.tracebacklimit = -1 - # Execute subcommands args.func(args) except amdsmi_cli_exceptions.AmdSmiException as e: diff --git a/amdsmi_cli/amdsmi_commands.py b/amdsmi_cli/amdsmi_commands.py index f7c0991a5f..b232c29c21 100644 --- a/amdsmi_cli/amdsmi_commands.py +++ b/amdsmi_cli/amdsmi_commands.py @@ -5043,6 +5043,8 @@ class AMDSMICommands(): bitrate = pcie_speed_GTs_value max_bandwidth = bitrate * pcie_static['max_pcie_width'] except amdsmi_exception.AmdSmiLibraryException as e: + bitrate = "N/A" + max_bandwidth = "N/A" logging.debug("Failed to get bitrate and bandwidth for GPU %s | %s", src_gpu_id, e.get_error_info()) @@ -5084,6 +5086,8 @@ class AMDSMICommands(): read = metrics_info['xgmi_read_data_acc'][dest_gpu_id] write = metrics_info['xgmi_write_data_acc'][dest_gpu_id] except amdsmi_exception.AmdSmiLibraryException as e: + read = "N/A" + write = "N/A" logging.debug("Failed to get read data for %s to %s | %s", self.helpers.get_gpu_id_from_device_handle(src_gpu), self.helpers.get_gpu_id_from_device_handle(dest_gpu), @@ -5172,6 +5176,207 @@ class AMDSMICommands(): self.logger.print_output(multiple_device_enabled=True) + def partition(self, args, multiple_devices=False, gpu=None, current=None, memory=None, accelerator=None): + """ Display parition information for the target GPU + param: + args - argparser args to pass to subcommand + multiple_devices (bool) - True if checking for multiple devices + gpu (device_handle) - device_handle for target device + current - boolean which dictates whether the current partition information is shown + memory - boolean which dictates whether the memory partition information is shown + accelerator - boolean which dictates whether the accelerator partition information is shown + returns: + nothing + """ + + if gpu: + args.gpu = gpu + if args.gpu == None: + args.gpu = self.device_handles + if not isinstance(args.gpu, list): + args.gpu = [args.gpu] + if current: + args.current = current + if memory: + args.memory = memory + if accelerator: + args.accelerator = accelerator + + # if no args are present, then everything should be displayed + if not args.current and not args.memory and not args.accelerator: + args.current = True + args.memory = True + args.accelerator = True + + if args.current: + self.logger.table_header = ''.rjust(7) + current_header = "GPU_ID".ljust(13) + \ + "MEMORY".ljust(8) + \ + "ACCELERATOR_TYPE".ljust(18) + \ + "ACCELERATOR_PROFILE_INDEX".ljust(27) + \ + "PARTITION_ID".ljust(14) + self.logger.table_header = current_header + self.logger.table_header.strip() + + tabular_output = [] + for gpu in args.gpu: + gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu) + try: + partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu) + profile_type = partition_dict['partition_profile']['profile_type'] + profile_index = partition_dict['partition_profile']['profile_index'] + partition_id = partition_dict['partition_id'] + except amdsmi_exception.AmdSmiLibraryException as e: + profile_type = "N/A" + profile_index = "N/A" + partition_id = "N/A" + logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info()) + try: + current_mem_cap = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu) + except amdsmi_exception.AmdSmiLibraryException as e: + current_mem_cap = "N/A" + logging.debug("Failed to get current memory partition capabilties for GPU %s | %s", gpu_id, e.get_error_info()) + + tabular_output_dict = {"gpu_id": gpu_id, + "memory": current_mem_cap, + "accelerator_type": profile_type, + "accelerator_profile_index": profile_index, + "partition_id": partition_id} + tabular_output.append(tabular_output_dict) + + self.logger.multiple_device_output = tabular_output + self.logger.table_title = "CURRENT_PARTITION" + self.logger.print_output(multiple_device_enabled=True, tabular=True) + self.logger.clear_multiple_devices_ouput() + + if args.memory: + for gpu in args.gpu: + gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu) + try: + memory_partition = amdsmi_interface.amdsmi_get_gpu_memory_partition(gpu) # this info likely actually comes from different apis than used here + except amdsmi_exception.AmdSmiLibraryException as e: + memory_partition = "N/A" + logging.debug("Failed to get current memory partition for GPU %s | %s", gpu_id, e.get_error_info()) + try: + partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu) + temp_mem_caps = partition_dict['partition_profile']['memory_caps'] + + if temp_mem_caps.amdsmi_nps_flags_t == None: + mem_caps = temp_mem_caps.nps_cap_mask + mem_caps_list = [] + if mem_caps & 1 == 1: + mem_caps_list.append("NPS1") + if mem_caps & 2 == 2: + mem_caps_list.append("NPS2") + if mem_caps & 4 == 4: + mem_caps_list.append("NPS4") + if mem_caps & 8 == 8: + mem_caps_list.append("NPS8") + mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "") + else: + mem_caps = temp_mem_caps.amdsmi_nps_flags_t + mem_caps_list = [] + if mem_caps.nps1_cap == 1: + mem_caps_list.append("NPS1") + if mem_caps.nps2_cap == 1: + mem_caps_list.append("NPS2") + if mem_caps.nps4_cap == 1: + mem_caps_list.append("NPS4") + if mem_caps.nps8_cap == 1: + mem_caps_list.append("NPS8") + mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "") + if mem_caps_str == "": + mem_caps_str = "N/A" + except amdsmi_exception.AmdSmiLibraryException as e: + mem_caps_str = "N/A" + logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info()) + + memory_dict = {'caps': mem_caps_str, 'current': memory_partition} + self.logger.store_output(gpu, 'memory_partition', memory_dict) + self.logger.store_multiple_device_output() + self.logger.print_output(multiple_device_enabled=True) + self.logger.clear_multiple_devices_ouput() + if args.accelerator: + self.logger.table_header = ''.rjust(7) + current_header = "GPU_ID".ljust(13) + \ + "PROFILE_INDEX".ljust(15) + \ + "MEMORY_PARTITION_CAPS".ljust(23) + \ + "ACCELERATOR_TYPE".ljust(18) + \ + "PARTITION_ID".ljust(14) + \ + "NUM_PARTITIONS".ljust(16) + \ + "NUM_RESOURCES".ljust(15) + \ + "RESOURCE_INDEX".ljust(16) + \ + "RESOURCE_TYPE".ljust(15) + \ + "RESOURCE_INSTANCES".ljust(20) + \ + "RESOURCES_SHARED".ljust(18) + self.logger.table_header = current_header + self.logger.table_header.strip() + + tabular_output = [] + for gpu in args.gpu: + gpu_id = self.helpers.get_gpu_id_from_device_handle(gpu) + try: + partition_dict = amdsmi_interface.amdsmi_get_gpu_accelerator_partition_profile(gpu) + profile_type = partition_dict['partition_profile']['profile_type'] + profile_index = partition_dict['partition_profile']['profile_index'] + temp_mem_caps = partition_dict['partition_profile']['memory_caps'] + parition_id = partition_dict['partition_id'] + num_resources = partition_dict['partition_profile']['num_resources'] + resources = partition_dict['partition_profile']['resources'] + + if temp_mem_caps.amdsmi_nps_flags_t == None: + mem_caps = temp_mem_caps.nps_cap_mask + mem_caps_list = [] + if mem_caps & 1 == 1: + mem_caps_list.append("NPS1") + if mem_caps & 2 == 2: + mem_caps_list.append("NPS2") + if mem_caps & 4 == 4: + mem_caps_list.append("NPS4") + if mem_caps & 8 == 8: + mem_caps_list.append("NPS8") + mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "") + else: + mem_caps = temp_mem_caps.amdsmi_nps_flags_t + mem_caps_list = [] + if mem_caps.nps1_cap == 1: + mem_caps_list.append("NPS1") + if mem_caps.nps2_cap == 1: + mem_caps_list.append("NPS2") + if mem_caps.nps4_cap == 1: + mem_caps_list.append("NPS4") + if mem_caps.nps8_cap == 1: + mem_caps_list.append("NPS8") + mem_caps_str = str(mem_caps_list).replace("]", "").replace("[", "") + if mem_caps_str == "": + mem_caps_str = "N/A" + except amdsmi_exception.AmdSmiLibraryException as e: + profile_type = "N/A" + profile_index = "N/A" + temp_mem_caps = "N/A" + parition_id = "N/A" + num_resources = "N/A" + resources = "N/A" + mem_caps_str = "N/A" + logging.debug("Failed to get accelerator partition profile for GPU %s | %s", gpu_id, e.get_error_info()) + + tabular_output_dict = {"gpu_id": gpu_id, + "profile_index": profile_index, + "memory_partition_caps": mem_caps_str, + "accelerator_type": profile_type, + "partition_id": parition_id, + "num_partitions": 0, + "num_resources": num_resources, + "resource_index": resources, + "resource_type": resources, + "resource_instances": resources, + "resources_shared": resources} + tabular_output.append(tabular_output_dict) + + self.logger.multiple_device_output = tabular_output + self.logger.table_title = "ACCELERATOR_PARTITION_PROFILES" + self.logger.print_output(multiple_device_enabled=True, tabular=True) + self.logger.clear_multiple_devices_ouput() + + def _event_thread(self, commands, i): devices = commands.device_handles if len(devices) == 0: diff --git a/amdsmi_cli/amdsmi_logger.py b/amdsmi_cli/amdsmi_logger.py index 8234f99eac..86b463938f 100644 --- a/amdsmi_cli/amdsmi_logger.py +++ b/amdsmi_cli/amdsmi_logger.py @@ -150,8 +150,32 @@ class AMDSMILogger(): table_values += string_value.ljust(14) elif key == "link_type": table_values += string_value.ljust(10) + elif key == "memory": + table_values += string_value.ljust(8) + elif key == "accelerator_type": + table_values += string_value.ljust(18) + elif key == "partition_id": + table_values += string_value.ljust(14) + elif key == "accelerator_profile_index": + table_values += string_value.ljust(27) + elif key == "profile_index": + table_values += string_value.ljust(15) + elif key == "memory_partition_caps": + table_values += string_value.ljust(23) + elif key == "num_partitions": + table_values += string_value.ljust(16) + elif key == "num_resources": + table_values += string_value.ljust(15) + elif key == "resource_index": + table_values += string_value.ljust(16) + elif key == "resource_type": + table_values += string_value.ljust(15) + elif key == "resource_instances": + table_values += string_value.ljust(20) + elif key == "resources_shared": + table_values += string_value.ljust(18) elif key == "RW": - table_values += " " + string_value.ljust(52) + table_values += string_value.ljust(52) elif key == "process_list": #Add an additional padding between the first instance of GPU and NAME table_values += ' ' diff --git a/amdsmi_cli/amdsmi_parser.py b/amdsmi_cli/amdsmi_parser.py index bc9c85149b..9965c486b5 100644 --- a/amdsmi_cli/amdsmi_parser.py +++ b/amdsmi_cli/amdsmi_parser.py @@ -71,7 +71,7 @@ class AMDSMIParser(argparse.ArgumentParser): """ def __init__(self, version, list, static, firmware, bad_pages, metric, process, profile, event, topology, set_value, reset, monitor, - rocmsmi, xgmi): + rocmsmi, xgmi, partition): # Helper variables self.helpers = AMDSMIHelpers() @@ -117,7 +117,7 @@ class AMDSMIParser(argparse.ArgumentParser): # Store possible subcommands & aliases for later errors self.possible_commands = ['version', 'list', 'static', 'firmware', 'ucode', 'bad-pages', 'metric', 'process', 'profile', 'event', 'topology', 'set', - 'reset', 'monitor', 'dmon', 'xgmi'] + 'reset', 'monitor', 'dmon', 'xgmi', 'partition'] # Add all subparsers self._add_version_parser(self.subparsers, version) @@ -135,6 +135,7 @@ class AMDSMIParser(argparse.ArgumentParser): self._add_monitor_parser(self.subparsers, monitor) self._add_rocm_smi_parser(self.subparsers, rocmsmi) self._add_xgmi_parser(self.subparsers, xgmi) + self._add_partition_parser(self.subparsers, partition) def _not_negative_int(self, int_value): @@ -1286,6 +1287,40 @@ class AMDSMIParser(argparse.ArgumentParser): xgmi_parser.add_argument('-m', '--metric', action='store_true', required=False, help=metrics_help) + def _add_partition_parser(self, subparsers, func): + if not self.helpers.is_amdgpu_initialized(): + # The partition subcommand is only applicable to systems with amdgpu initialized + return + + # Subparser help text + partition_help = "Displays partition information of the devices" + partition_subcommand_help = "If no GPU is specified, returns information for all GPUs on the system.\ + \nIf no partition argument is provided all partition information will be displayed." + partition_optionals_title = "partition arguments" + + # Options help text + current_help = "display the current partition information" + memory_help = "display the current memory partition mode and capabilities" + accelerator_help = "display accelerator partition information" + + # Create partition subparser + partition_parser = subparsers.add_parser('partition', help=partition_help, description=partition_subcommand_help) + partition_parser._optionals.title = partition_optionals_title + partition_parser.formatter_class=lambda prog: AMDSMISubparserHelpFormatter(prog) + partition_parser.set_defaults(func=func) + + # Add Universal Arguments + self._add_device_arguments(partition_parser, required=False) + + # Handle GPU Options + partition_parser.add_argument('-c', '--current', action='store_true', required=False, help=current_help) + partition_parser.add_argument('-m', '--memory', action='store_true', required=False, help=memory_help) + partition_parser.add_argument('-a', '--accelerator', action='store_true', required=False, help=accelerator_help) + + # Add command modifiers to the bottom + self._add_command_modifiers(partition_parser) + + def error(self, message): outputformat = self.helpers.get_output_format() diff --git a/py-interface/amdsmi_interface.py b/py-interface/amdsmi_interface.py index b35da33e1c..bed84e26a8 100644 --- a/py-interface/amdsmi_interface.py +++ b/py-interface/amdsmi_interface.py @@ -2788,7 +2788,7 @@ def amdsmi_get_gpu_accelerator_partition_profile( "profile_type" : profile.profile_type, "num_partitions" : profile.num_partitions, "profile_index" : profile.profile_index, - "memory_caps" : "N/A", + "memory_caps" : profile.memory_caps, "num_resources" : profile.num_resources, "resources" : "N/A" }