From 1dd2942136ca1234c5b00b226571401a04b56e05 Mon Sep 17 00:00:00 2001 From: Maisam Arif Date: Tue, 10 Oct 2023 20:42:52 -0500 Subject: [PATCH] Added static --cache to cli tool Change-Id: I494d29aba7915a0b8815036977b2636a2da5264e Signed-off-by: Maisam Arif [ROCm/amdsmi commit: 66eb3de5e42fa9176aaf05e7a288d5b243edc1bb] --- projects/amdsmi/amdsmi_cli/README.md | 1 + projects/amdsmi/amdsmi_cli/amdsmi_commands.py | 23 ++++-- projects/amdsmi/amdsmi_cli/amdsmi_parser.py | 6 +- projects/amdsmi/py-interface/README.md | 79 +++++++++++++++++++ projects/amdsmi/py-interface/__init__.py | 1 + .../amdsmi/py-interface/amdsmi_interface.py | 24 ++++++ 6 files changed, 127 insertions(+), 7 deletions(-) diff --git a/projects/amdsmi/amdsmi_cli/README.md b/projects/amdsmi/amdsmi_cli/README.md index ccc75b416a..7032b74d0c 100644 --- a/projects/amdsmi/amdsmi_cli/README.md +++ b/projects/amdsmi/amdsmi_cli/README.md @@ -176,6 +176,7 @@ Static Arguments: -d, --driver Displays driver version -r, --ras Displays RAS features information -v, --vram All vram information + -c, --cache All cache information -B, --board All board information -l, --limit All limit metric values (i.e. power and thermal limits) -u, --numa All numa node information diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py index b598a15d5c..3d1cb78bf7 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_commands.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_commands.py @@ -131,7 +131,7 @@ class AMDSMICommands(): def static(self, args, multiple_devices=False, gpu=None, asic=None, bus=None, vbios=None, limit=None, driver=None, - ras=None, board=None, numa=None, vram=None): + ras=None, board=None, numa=None, vram=None, cache=None): """Get Static information for target gpu Args: @@ -147,6 +147,7 @@ class AMDSMICommands(): board (bool, optional): Value override for args.board. Defaults to None. numa (bool, optional): Value override for args.numa. Defaults to None. vram (bool, optional): Value override for args.vram. Defaults to None. + cache (bool, optional): Value override for args.cache. Defaults to None. Raises: IndexError: Index error if gpu list is empty @@ -171,6 +172,8 @@ class AMDSMICommands(): args.driver = driver if vram: args.vram = vram + if cache: + args.cache = cache if self.helpers.is_linux() and self.helpers.is_baremetal(): if ras: args.ras = ras @@ -189,11 +192,11 @@ class AMDSMICommands(): # If all arguments are False, it means that no argument was passed and the entire static should be printed if self.helpers.is_linux() and self.helpers.is_baremetal(): - if not any([args.asic, args.bus, args.vbios, args.limit, args.board, args.ras, args.driver, args.numa, args.vram]): - args.asic = args.bus = args.vbios = args.limit = args.board = args.ras = args.driver = args.numa = args.vram = self.all_arguments = True + if not any([args.asic, args.bus, args.vbios, args.limit, args.board, args.ras, args.driver, args.numa, args.vram, args.cache]): + args.asic = args.bus = args.vbios = args.limit = args.board = args.ras = args.driver = args.numa = args.vram = args.cache = self.all_arguments = True if self.helpers.is_linux() and self.helpers.is_virtual_os(): - if not any([args.asic, args.bus, args.vbios, args.board, args.driver, args.vram]): - args.asic = args.bus = args.vbios = args.board = args.driver = args.vram = self.all_arguments = True + if not any([args.asic, args.bus, args.vbios, args.board, args.driver, args.vram, args.cache]): + args.asic = args.bus = args.vbios = args.board = args.driver = args.vram = args.cache = self.all_arguments = True static_dict = {} @@ -434,7 +437,17 @@ class AMDSMICommands(): logging.debug("Failed to get vram info for gpu %s | %s", gpu_id, e.get_error_info()) static_dict['vram'] = vram_info + if args.cache: + try: + cache_info = amdsmi_interface.amdsmi_get_gpu_cache_info(args.gpu) + if self.logger.is_human_readable_format(): + for _ , cache_values in cache_info.items(): + cache_values['cache_size'] = f"{cache_values['cache_size']} KB" + except amdsmi_exception.AmdSmiLibraryException as e: + cache_info = "N/A" + logging.debug("Failed to get cache info for gpu %s | %s", gpu_id, e.get_error_info()) + static_dict['cache'] = cache_info if self.helpers.is_hypervisor() or self.helpers.is_baremetal(): if args.ras: ras_dict = {"eeprom_version": "N/A", diff --git a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py index 85fda61d6f..1d737e0a5f 100644 --- a/projects/amdsmi/amdsmi_cli/amdsmi_parser.py +++ b/projects/amdsmi/amdsmi_cli/amdsmi_parser.py @@ -293,11 +293,12 @@ class AMDSMIParser(argparse.ArgumentParser): vbios_help = "All video bios information (if available)" limit_help = "All limit metric values (i.e. power and thermal limits)" driver_help = "Displays driver version" + vram_help = "All vram information" + cache_help = "All cache information" + board_help = "All board information" # Options arguments help text for Hypervisors and Baremetal ras_help = "Displays RAS features information" - vram_help = "All vram information" - board_help = "All board information" # Linux Baremetal only numa_help = "All numa node information" # Linux Baremetal only # Options arguments help text for Hypervisors @@ -321,6 +322,7 @@ class AMDSMIParser(argparse.ArgumentParser): static_parser.add_argument('-V', '--vbios', action='store_true', required=False, help=vbios_help) static_parser.add_argument('-d', '--driver', action='store_true', required=False, help=driver_help) static_parser.add_argument('-v', '--vram', action='store_true', required=False, help=vram_help) + static_parser.add_argument('-c', '--cache', action='store_true', required=False, help=cache_help) static_parser.add_argument('-B', '--board', action='store_true', required=False, help=board_help) # Options to display on Hypervisors and Baremetal diff --git a/projects/amdsmi/py-interface/README.md b/projects/amdsmi/py-interface/README.md index 8147ba1b6d..c57b63074c 100644 --- a/projects/amdsmi/py-interface/README.md +++ b/projects/amdsmi/py-interface/README.md @@ -424,6 +424,85 @@ except AmdSmiException as e: print(e) ``` +### amdsmi_get_gpu_vram_info + +Description: Returns dictionary of vram information for the given GPU. + +Input parameters: + +* `processor_handle` device which to query + +Output: Dictionary with fields + +Field | Description +---|--- +`vram_type` | vram type +`vram_vendor` | vram vendor +`vram_size_mb` | vram size in mb + +Exceptions that can be thrown by `amdsmi_get_gpu_vram_info` function: + +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: + +```python +try: + devices = amdsmi_get_processor_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + vram_info = amdsmi_get_gpu_vram_info(device) + print(vram_info['vram_type']) + print(vram_info['vram_vendor']) + print(vram_info['vram_size_mb']) +except AmdSmiException as e: + print(e) +``` + +### amdsmi_get_gpu_cache_info + +Description: Returns dictionary of cache information for the given GPU. + +Input parameters: + +* `processor_handle` device which to query + +Output: Dictionary of Dictionaries containing cache information + +Field | Description +---|--- +`cache #` | upt 10 caches will be available +`cache_size` | size of cache in KB +`cache_level` | level of cache + +Exceptions that can be thrown by `amdsmi_get_gpu_cache_info` function: + +* `AmdSmiLibraryException` +* `AmdSmiRetryException` +* `AmdSmiParameterException` + +Example: + +```python +try: + devices = amdsmi_get_processor_handles() + if len(devices) == 0: + print("No GPUs on machine") + else: + for device in devices: + cache_info = amdsmi_get_gpu_cache_info(device) + for cache_index, cache_values in cache_info.items(): + print(cache_index) + print(cache_values['cache_size']) + print(cache_values['cache_level']) +except AmdSmiException as e: + print(e) +``` + ### amdsmi_get_gpu_vbios_info Description: Returns the static information for the VBIOS on the device. diff --git a/projects/amdsmi/py-interface/__init__.py b/projects/amdsmi/py-interface/__init__.py index 22a13f7c95..d0b7bc417a 100644 --- a/projects/amdsmi/py-interface/__init__.py +++ b/projects/amdsmi/py-interface/__init__.py @@ -40,6 +40,7 @@ from .amdsmi_interface import amdsmi_get_gpu_driver_info from .amdsmi_interface import amdsmi_get_gpu_asic_info from .amdsmi_interface import amdsmi_get_power_cap_info from .amdsmi_interface import amdsmi_get_gpu_vram_info +from .amdsmi_interface import amdsmi_get_gpu_cache_info # # Microcode and VBIOS Information from .amdsmi_interface import amdsmi_get_gpu_vbios_info diff --git a/projects/amdsmi/py-interface/amdsmi_interface.py b/projects/amdsmi/py-interface/amdsmi_interface.py index 98907c2708..425ced72e7 100644 --- a/projects/amdsmi/py-interface/amdsmi_interface.py +++ b/projects/amdsmi/py-interface/amdsmi_interface.py @@ -691,6 +691,30 @@ def amdsmi_get_gpu_vram_info( } +def amdsmi_get_gpu_cache_info( + processor_handle: amdsmi_wrapper.amdsmi_processor_handle, +) -> Dict[str, Any]: + if not isinstance(processor_handle, amdsmi_wrapper.amdsmi_processor_handle): + raise AmdSmiParameterException( + processor_handle, amdsmi_wrapper.amdsmi_processor_handle + ) + + cache_info = amdsmi_wrapper.amdsmi_gpu_cache_info_t() + _check_res( + amdsmi_wrapper.amdsmi_get_gpu_cache_info( + processor_handle, ctypes.byref(cache_info)) + ) + + cache_info_dict = {} + for cache_index in range(cache_info.num_cache_types): + cache_size = cache_info.cache[cache_index].cache_size_kb + cache_level = cache_info.cache[cache_index].cache_level + cache_info_dict[f"cache {cache_index}"] = {"cache_size": cache_size, + "cache_level": cache_level} + + return cache_info_dict + + def amdsmi_get_gpu_vbios_info( processor_handle: amdsmi_wrapper.amdsmi_processor_handle, ) -> Dict[str, Any]: