From 098aa488aa4fbc804ddc8d7f2e3e971b8d5cd76c Mon Sep 17 00:00:00 2001 From: "Narlo, Joseph" Date: Wed, 1 Oct 2025 17:39:26 -0500 Subject: [PATCH] Add ASIC and Board information (#721) Signed-off-by: josnarlo [ROCm/amdsmi commit: b1eeff99928e0e21bdf723b9333b2c11a210852a] --- .../tests/python_unittest/integration_test.py | 38 ++++++++++++++++++- .../tests/python_unittest/unit_tests.py | 37 +++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/projects/amdsmi/tests/python_unittest/integration_test.py b/projects/amdsmi/tests/python_unittest/integration_test.py index 89ae688319..6107906f33 100755 --- a/projects/amdsmi/tests/python_unittest/integration_test.py +++ b/projects/amdsmi/tests/python_unittest/integration_test.py @@ -19,12 +19,13 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import json import multiprocessing +import os import sys import threading import unittest -import os # Default path for AMDSMI_CLI_PATH is "/opt/rocm/libexec/amdsmi_cli/" amdsmi_cli_path = os.environ.get("AMDSMI_CLI_PATH", "/opt/rocm/libexec/amdsmi_cli/") @@ -44,7 +45,39 @@ class TestAmdSmiInit(unittest.TestCase): class TestAmdSmiPythonInterface(unittest.TestCase): - max_num_physical_devices = amdsmi.amdsmi_interface.AMDSMI_MAX_NUM_XCP * amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.verbose = verbose + self.max_num_physical_devices = amdsmi.amdsmi_interface.AMDSMI_MAX_NUM_XCP * amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES + global has_info_printed + if self.verbose and has_info_printed is False: + # Execute the following to print the asic and board info once per test run + has_info_printed = True + self.setUp() + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), self.max_num_physical_devices) + for i in range(0, len(processors)): + try: + # Print asic info + msg = f'asic info(gpu={i})' + ret = amdsmi.amdsmi_get_gpu_asic_info(processors[i]) + print(msg) + print(json.dumps(ret, sort_keys=False, indent=4), flush=True) + except amdsmi.AmdSmiLibraryException as e: + raise e + for i in range(0, len(processors)): + try: + # Print board info + msg = f'board info(gpu={i})' + ret = amdsmi.amdsmi_get_gpu_board_info(processors[i]) + print(msg) + print(json.dumps(ret, sort_keys=False, indent=4), flush=True) + except amdsmi.AmdSmiLibraryException as e: + raise e + self.tearDown() + return def _check_exception(self, e): error_code = e.get_error_code() @@ -1335,6 +1368,7 @@ if __name__ == '__main__': verbose=0 elif '-v' in sys.argv or '--verbose' in sys.argv: verbose=2 + has_info_printed = False # If no -k or --keyword argument is given, print all available tests if not ('-k' in sys.argv or '--keyword' in sys.argv): diff --git a/projects/amdsmi/tests/python_unittest/unit_tests.py b/projects/amdsmi/tests/python_unittest/unit_tests.py index 793b91a4c4..887ed35310 100755 --- a/projects/amdsmi/tests/python_unittest/unit_tests.py +++ b/projects/amdsmi/tests/python_unittest/unit_tests.py @@ -31,6 +31,8 @@ try: except ImportError: raise ImportError("Could not import /opt/rocm/libexec/amdsmi_cli/amdsmi_cli.py") + +has_info_printed = False not_supported_error_codes = ['2', '3', '49'] not_supported_error_code_names = ['AMDSMI_STATUS_NOT_SUPPORTED', 'AMDSMI_STATUS_NOT_YET_IMPLEMENTED', 'AMDSMI_STATUS_NO_HSMP_MSG_SUP'] @@ -85,6 +87,39 @@ error_map = \ class TestAmdSmiPythonBDF(unittest.TestCase): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.verbose = verbose + self.max_num_physical_devices = amdsmi.amdsmi_interface.AMDSMI_MAX_NUM_XCP * amdsmi.amdsmi_interface.AMDSMI_MAX_DEVICES + global has_info_printed + if self.verbose and has_info_printed is False: + # Execute the following to print the asic and board info once per test run + has_info_printed = True + self.setUp() + processors = amdsmi.amdsmi_get_processor_handles() + self.assertGreaterEqual(len(processors), 1) + self.assertLessEqual(len(processors), self.max_num_physical_devices) + for i in range(0, len(processors)): + try: + # Print asic info + msg = f'asic info(gpu={i})' + ret = amdsmi.amdsmi_get_gpu_asic_info(processors[i]) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + raise e + for i in range(0, len(processors)): + try: + # Print board info + msg = f'board info(gpu={i})' + ret = amdsmi.amdsmi_get_gpu_board_info(processors[i]) + self._print(msg, ret) + except amdsmi.AmdSmiLibraryException as e: + raise e + self.tearDown() + return + valid_bdfs = { "00:00.0": [0, 0, 0, 0], "01:01.1": [0, 1, 1, 1], @@ -452,7 +487,7 @@ class TestAmdSmiPythonBDF(unittest.TestCase): self.assertEqual(None, result) @classmethod - def _print(self, msg, data=None, cond=None): + def _print(self, msg, data=None): if verbose == 2: if not data: print(msg, flush=True)